]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
dsputil: Split off pixel block routines into their own context
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mjpegenc_common.h"
46 #include "mathops.h"
47 #include "mpegutils.h"
48 #include "mjpegenc.h"
49 #include "msmpeg4.h"
50 #include "pixblockdsp.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60
61 static int encode_picture(MpegEncContext *s, int picture_number);
62 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
63 static int sse_mb(MpegEncContext *s);
64 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
65 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
66
67 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
68 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
69
70 const AVOption ff_mpv_generic_options[] = {
71     FF_MPV_COMMON_OPTS
72     { NULL },
73 };
74
75 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
76                        uint16_t (*qmat16)[2][64],
77                        const uint16_t *quant_matrix,
78                        int bias, int qmin, int qmax, int intra)
79 {
80     FDCTDSPContext *fdsp = &s->fdsp;
81     int qscale;
82     int shift = 0;
83
84     for (qscale = qmin; qscale <= qmax; qscale++) {
85         int i;
86         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
87             fdsp->fdct == ff_jpeg_fdct_islow_10 ||
88             fdsp->fdct == ff_faandct) {
89             for (i = 0; i < 64; i++) {
90                 const int j = s->idsp.idct_permutation[i];
91                 /* 16 <= qscale * quant_matrix[i] <= 7905
92                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
93                  *             19952 <=              x  <= 249205026
94                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
95                  *           3444240 >= (1 << 36) / (x) >= 275 */
96
97                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
98                                         (qscale * quant_matrix[j]));
99             }
100         } else if (fdsp->fdct == ff_fdct_ifast) {
101             for (i = 0; i < 64; i++) {
102                 const int j = s->idsp.idct_permutation[i];
103                 /* 16 <= qscale * quant_matrix[i] <= 7905
104                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
105                  *             19952 <=              x  <= 249205026
106                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
107                  *           3444240 >= (1 << 36) / (x) >= 275 */
108
109                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
110                                         (ff_aanscales[i] * qscale *
111                                          quant_matrix[j]));
112             }
113         } else {
114             for (i = 0; i < 64; i++) {
115                 const int j = s->idsp.idct_permutation[i];
116                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
117                  * Assume x = qscale * quant_matrix[i]
118                  * So             16 <=              x  <= 7905
119                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
120                  * so          32768 >= (1 << 19) / (x) >= 67 */
121                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
122                                         (qscale * quant_matrix[j]));
123                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
124                 //                    (qscale * quant_matrix[i]);
125                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
126                                        (qscale * quant_matrix[j]);
127
128                 if (qmat16[qscale][0][i] == 0 ||
129                     qmat16[qscale][0][i] == 128 * 256)
130                     qmat16[qscale][0][i] = 128 * 256 - 1;
131                 qmat16[qscale][1][i] =
132                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
133                                 qmat16[qscale][0][i]);
134             }
135         }
136
137         for (i = intra; i < 64; i++) {
138             int64_t max = 8191;
139             if (fdsp->fdct == ff_fdct_ifast) {
140                 max = (8191LL * ff_aanscales[i]) >> 14;
141             }
142             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
143                 shift++;
144             }
145         }
146     }
147     if (shift) {
148         av_log(NULL, AV_LOG_INFO,
149                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
150                QMAT_SHIFT - shift);
151     }
152 }
153
154 static inline void update_qscale(MpegEncContext *s)
155 {
156     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
157                 (FF_LAMBDA_SHIFT + 7);
158     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
159
160     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
161                  FF_LAMBDA_SHIFT;
162 }
163
164 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
165 {
166     int i;
167
168     if (matrix) {
169         put_bits(pb, 1, 1);
170         for (i = 0; i < 64; i++) {
171             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
172         }
173     } else
174         put_bits(pb, 1, 0);
175 }
176
177 /**
178  * init s->current_picture.qscale_table from s->lambda_table
179  */
180 void ff_init_qscale_tab(MpegEncContext *s)
181 {
182     int8_t * const qscale_table = s->current_picture.qscale_table;
183     int i;
184
185     for (i = 0; i < s->mb_num; i++) {
186         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
187         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
188         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
189                                                   s->avctx->qmax);
190     }
191 }
192
193 static void update_duplicate_context_after_me(MpegEncContext *dst,
194                                               MpegEncContext *src)
195 {
196 #define COPY(a) dst->a= src->a
197     COPY(pict_type);
198     COPY(current_picture);
199     COPY(f_code);
200     COPY(b_code);
201     COPY(qscale);
202     COPY(lambda);
203     COPY(lambda2);
204     COPY(picture_in_gop_number);
205     COPY(gop_picture_number);
206     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
207     COPY(progressive_frame);    // FIXME don't set in encode_header
208     COPY(partitioned_frame);    // FIXME don't set in encode_header
209 #undef COPY
210 }
211
212 /**
213  * Set the given MpegEncContext to defaults for encoding.
214  * the changed fields will not depend upon the prior state of the MpegEncContext.
215  */
216 static void MPV_encode_defaults(MpegEncContext *s)
217 {
218     int i;
219     ff_MPV_common_defaults(s);
220
221     for (i = -16; i < 16; i++) {
222         default_fcode_tab[i + MAX_MV] = 1;
223     }
224     s->me.mv_penalty = default_mv_penalty;
225     s->fcode_tab     = default_fcode_tab;
226
227     s->input_picture_number  = 0;
228     s->picture_in_gop_number = 0;
229 }
230
231 /* init video encoder */
232 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
233 {
234     MpegEncContext *s = avctx->priv_data;
235     int i, ret, format_supported;
236
237     MPV_encode_defaults(s);
238
239     switch (avctx->codec_id) {
240     case AV_CODEC_ID_MPEG2VIDEO:
241         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
242             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
243             av_log(avctx, AV_LOG_ERROR,
244                    "only YUV420 and YUV422 are supported\n");
245             return -1;
246         }
247         break;
248     case AV_CODEC_ID_MJPEG:
249         format_supported = 0;
250         /* JPEG color space */
251         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
252             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
253             (avctx->color_range == AVCOL_RANGE_JPEG &&
254              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
255               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
256             format_supported = 1;
257         /* MPEG color space */
258         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
259                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
260                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
261             format_supported = 1;
262
263         if (!format_supported) {
264             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
265             return -1;
266         }
267         break;
268     default:
269         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
270             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
271             return -1;
272         }
273     }
274
275     switch (avctx->pix_fmt) {
276     case AV_PIX_FMT_YUVJ422P:
277     case AV_PIX_FMT_YUV422P:
278         s->chroma_format = CHROMA_422;
279         break;
280     case AV_PIX_FMT_YUVJ420P:
281     case AV_PIX_FMT_YUV420P:
282     default:
283         s->chroma_format = CHROMA_420;
284         break;
285     }
286
287     s->bit_rate = avctx->bit_rate;
288     s->width    = avctx->width;
289     s->height   = avctx->height;
290     if (avctx->gop_size > 600 &&
291         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
292         av_log(avctx, AV_LOG_ERROR,
293                "Warning keyframe interval too large! reducing it ...\n");
294         avctx->gop_size = 600;
295     }
296     s->gop_size     = avctx->gop_size;
297     s->avctx        = avctx;
298     s->flags        = avctx->flags;
299     s->flags2       = avctx->flags2;
300     if (avctx->max_b_frames > MAX_B_FRAMES) {
301         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
302                "is %d.\n", MAX_B_FRAMES);
303     }
304     s->max_b_frames = avctx->max_b_frames;
305     s->codec_id     = avctx->codec->id;
306     s->strict_std_compliance = avctx->strict_std_compliance;
307     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
308     s->mpeg_quant         = avctx->mpeg_quant;
309     s->rtp_mode           = !!avctx->rtp_payload_size;
310     s->intra_dc_precision = avctx->intra_dc_precision;
311     s->user_specified_pts = AV_NOPTS_VALUE;
312
313     if (s->gop_size <= 1) {
314         s->intra_only = 1;
315         s->gop_size   = 12;
316     } else {
317         s->intra_only = 0;
318     }
319
320     s->me_method = avctx->me_method;
321
322     /* Fixed QSCALE */
323     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
324
325     s->adaptive_quant = (s->avctx->lumi_masking ||
326                          s->avctx->dark_masking ||
327                          s->avctx->temporal_cplx_masking ||
328                          s->avctx->spatial_cplx_masking  ||
329                          s->avctx->p_masking      ||
330                          s->avctx->border_masking ||
331                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
332                         !s->fixed_qscale;
333
334     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
335
336     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
337         av_log(avctx, AV_LOG_ERROR,
338                "a vbv buffer size is needed, "
339                "for encoding with a maximum bitrate\n");
340         return -1;
341     }
342
343     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
344         av_log(avctx, AV_LOG_INFO,
345                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
346     }
347
348     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
349         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
350         return -1;
351     }
352
353     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
354         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
355         return -1;
356     }
357
358     if (avctx->rc_max_rate &&
359         avctx->rc_max_rate == avctx->bit_rate &&
360         avctx->rc_max_rate != avctx->rc_min_rate) {
361         av_log(avctx, AV_LOG_INFO,
362                "impossible bitrate constraints, this will fail\n");
363     }
364
365     if (avctx->rc_buffer_size &&
366         avctx->bit_rate * (int64_t)avctx->time_base.num >
367             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
368         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
369         return -1;
370     }
371
372     if (!s->fixed_qscale &&
373         avctx->bit_rate * av_q2d(avctx->time_base) >
374             avctx->bit_rate_tolerance) {
375         av_log(avctx, AV_LOG_ERROR,
376                "bitrate tolerance too small for bitrate\n");
377         return -1;
378     }
379
380     if (s->avctx->rc_max_rate &&
381         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
382         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
383          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
384         90000LL * (avctx->rc_buffer_size - 1) >
385             s->avctx->rc_max_rate * 0xFFFFLL) {
386         av_log(avctx, AV_LOG_INFO,
387                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
388                "specified vbv buffer is too large for the given bitrate!\n");
389     }
390
391     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
392         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
393         s->codec_id != AV_CODEC_ID_FLV1) {
394         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
395         return -1;
396     }
397
398     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
399         av_log(avctx, AV_LOG_ERROR,
400                "OBMC is only supported with simple mb decision\n");
401         return -1;
402     }
403
404     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
405         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
406         return -1;
407     }
408
409     if (s->max_b_frames                    &&
410         s->codec_id != AV_CODEC_ID_MPEG4      &&
411         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
412         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
413         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
414         return -1;
415     }
416
417     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
418          s->codec_id == AV_CODEC_ID_H263  ||
419          s->codec_id == AV_CODEC_ID_H263P) &&
420         (avctx->sample_aspect_ratio.num > 255 ||
421          avctx->sample_aspect_ratio.den > 255)) {
422         av_log(avctx, AV_LOG_ERROR,
423                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
424                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
425         return -1;
426     }
427
428     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
429         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
430         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
431         return -1;
432     }
433
434     // FIXME mpeg2 uses that too
435     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
436         av_log(avctx, AV_LOG_ERROR,
437                "mpeg2 style quantization not supported by codec\n");
438         return -1;
439     }
440
441     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
442         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
443         return -1;
444     }
445
446     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
447         s->avctx->mb_decision != FF_MB_DECISION_RD) {
448         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
449         return -1;
450     }
451
452     if (s->avctx->scenechange_threshold < 1000000000 &&
453         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
454         av_log(avctx, AV_LOG_ERROR,
455                "closed gop with scene change detection are not supported yet, "
456                "set threshold to 1000000000\n");
457         return -1;
458     }
459
460     if (s->flags & CODEC_FLAG_LOW_DELAY) {
461         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
462             av_log(avctx, AV_LOG_ERROR,
463                   "low delay forcing is only available for mpeg2\n");
464             return -1;
465         }
466         if (s->max_b_frames != 0) {
467             av_log(avctx, AV_LOG_ERROR,
468                    "b frames cannot be used with low delay\n");
469             return -1;
470         }
471     }
472
473     if (s->q_scale_type == 1) {
474         if (avctx->qmax > 12) {
475             av_log(avctx, AV_LOG_ERROR,
476                    "non linear quant only supports qmax <= 12 currently\n");
477             return -1;
478         }
479     }
480
481     if (s->avctx->thread_count > 1         &&
482         s->codec_id != AV_CODEC_ID_MPEG4      &&
483         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
484         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
485         (s->codec_id != AV_CODEC_ID_H263P)) {
486         av_log(avctx, AV_LOG_ERROR,
487                "multi threaded encoding not supported by codec\n");
488         return -1;
489     }
490
491     if (s->avctx->thread_count < 1) {
492         av_log(avctx, AV_LOG_ERROR,
493                "automatic thread number detection not supported by codec,"
494                "patch welcome\n");
495         return -1;
496     }
497
498     if (s->avctx->thread_count > 1)
499         s->rtp_mode = 1;
500
501     if (!avctx->time_base.den || !avctx->time_base.num) {
502         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
503         return -1;
504     }
505
506     i = (INT_MAX / 2 + 128) >> 8;
507     if (avctx->mb_threshold >= i) {
508         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
509                i - 1);
510         return -1;
511     }
512
513     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
514         av_log(avctx, AV_LOG_INFO,
515                "notice: b_frame_strategy only affects the first pass\n");
516         avctx->b_frame_strategy = 0;
517     }
518
519     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
520     if (i > 1) {
521         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
522         avctx->time_base.den /= i;
523         avctx->time_base.num /= i;
524         //return -1;
525     }
526
527     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
528         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
529         // (a + x * 3 / 8) / x
530         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
531         s->inter_quant_bias = 0;
532     } else {
533         s->intra_quant_bias = 0;
534         // (a - x / 4) / x
535         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
536     }
537
538     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
539         s->intra_quant_bias = avctx->intra_quant_bias;
540     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
541         s->inter_quant_bias = avctx->inter_quant_bias;
542
543     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
544         s->avctx->time_base.den > (1 << 16) - 1) {
545         av_log(avctx, AV_LOG_ERROR,
546                "timebase %d/%d not supported by MPEG 4 standard, "
547                "the maximum admitted value for the timebase denominator "
548                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
549                (1 << 16) - 1);
550         return -1;
551     }
552     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
553
554     switch (avctx->codec->id) {
555     case AV_CODEC_ID_MPEG1VIDEO:
556         s->out_format = FMT_MPEG1;
557         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
558         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
559         break;
560     case AV_CODEC_ID_MPEG2VIDEO:
561         s->out_format = FMT_MPEG1;
562         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
563         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
564         s->rtp_mode   = 1;
565         break;
566     case AV_CODEC_ID_MJPEG:
567         s->out_format = FMT_MJPEG;
568         s->intra_only = 1; /* force intra only for jpeg */
569         if (!CONFIG_MJPEG_ENCODER ||
570             ff_mjpeg_encode_init(s) < 0)
571             return -1;
572         avctx->delay = 0;
573         s->low_delay = 1;
574         break;
575     case AV_CODEC_ID_H261:
576         if (!CONFIG_H261_ENCODER)
577             return -1;
578         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
579             av_log(avctx, AV_LOG_ERROR,
580                    "The specified picture size of %dx%d is not valid for the "
581                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
582                     s->width, s->height);
583             return -1;
584         }
585         s->out_format = FMT_H261;
586         avctx->delay  = 0;
587         s->low_delay  = 1;
588         break;
589     case AV_CODEC_ID_H263:
590         if (!CONFIG_H263_ENCODER)
591         return -1;
592         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
593                              s->width, s->height) == 8) {
594             av_log(avctx, AV_LOG_INFO,
595                    "The specified picture size of %dx%d is not valid for "
596                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
597                    "352x288, 704x576, and 1408x1152."
598                    "Try H.263+.\n", s->width, s->height);
599             return -1;
600         }
601         s->out_format = FMT_H263;
602         avctx->delay  = 0;
603         s->low_delay  = 1;
604         break;
605     case AV_CODEC_ID_H263P:
606         s->out_format = FMT_H263;
607         s->h263_plus  = 1;
608         /* Fx */
609         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
610         s->modified_quant  = s->h263_aic;
611         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
612         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
613
614         /* /Fx */
615         /* These are just to be sure */
616         avctx->delay = 0;
617         s->low_delay = 1;
618         break;
619     case AV_CODEC_ID_FLV1:
620         s->out_format      = FMT_H263;
621         s->h263_flv        = 2; /* format = 1; 11-bit codes */
622         s->unrestricted_mv = 1;
623         s->rtp_mode  = 0; /* don't allow GOB */
624         avctx->delay = 0;
625         s->low_delay = 1;
626         break;
627     case AV_CODEC_ID_RV10:
628         s->out_format = FMT_H263;
629         avctx->delay  = 0;
630         s->low_delay  = 1;
631         break;
632     case AV_CODEC_ID_RV20:
633         s->out_format      = FMT_H263;
634         avctx->delay       = 0;
635         s->low_delay       = 1;
636         s->modified_quant  = 1;
637         s->h263_aic        = 1;
638         s->h263_plus       = 1;
639         s->loop_filter     = 1;
640         s->unrestricted_mv = 0;
641         break;
642     case AV_CODEC_ID_MPEG4:
643         s->out_format      = FMT_H263;
644         s->h263_pred       = 1;
645         s->unrestricted_mv = 1;
646         s->low_delay       = s->max_b_frames ? 0 : 1;
647         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
648         break;
649     case AV_CODEC_ID_MSMPEG4V2:
650         s->out_format      = FMT_H263;
651         s->h263_pred       = 1;
652         s->unrestricted_mv = 1;
653         s->msmpeg4_version = 2;
654         avctx->delay       = 0;
655         s->low_delay       = 1;
656         break;
657     case AV_CODEC_ID_MSMPEG4V3:
658         s->out_format        = FMT_H263;
659         s->h263_pred         = 1;
660         s->unrestricted_mv   = 1;
661         s->msmpeg4_version   = 3;
662         s->flipflop_rounding = 1;
663         avctx->delay         = 0;
664         s->low_delay         = 1;
665         break;
666     case AV_CODEC_ID_WMV1:
667         s->out_format        = FMT_H263;
668         s->h263_pred         = 1;
669         s->unrestricted_mv   = 1;
670         s->msmpeg4_version   = 4;
671         s->flipflop_rounding = 1;
672         avctx->delay         = 0;
673         s->low_delay         = 1;
674         break;
675     case AV_CODEC_ID_WMV2:
676         s->out_format        = FMT_H263;
677         s->h263_pred         = 1;
678         s->unrestricted_mv   = 1;
679         s->msmpeg4_version   = 5;
680         s->flipflop_rounding = 1;
681         avctx->delay         = 0;
682         s->low_delay         = 1;
683         break;
684     default:
685         return -1;
686     }
687
688     avctx->has_b_frames = !s->low_delay;
689
690     s->encoding = 1;
691
692     s->progressive_frame    =
693     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
694                                                 CODEC_FLAG_INTERLACED_ME) ||
695                                 s->alternate_scan);
696
697     /* init */
698     if (ff_MPV_common_init(s) < 0)
699         return -1;
700
701     if (ARCH_X86)
702         ff_MPV_encode_init_x86(s);
703
704     ff_fdctdsp_init(&s->fdsp, avctx);
705     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
706     ff_pixblockdsp_init(&s->pdsp, avctx);
707     ff_qpeldsp_init(&s->qdsp);
708
709     s->avctx->coded_frame = s->current_picture.f;
710
711     if (s->msmpeg4_version) {
712         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
713                           2 * 2 * (MAX_LEVEL + 1) *
714                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
715     }
716     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
717
718     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
719     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
720     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
721     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
722     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
723                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
724     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
725                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
726
727     if (s->avctx->noise_reduction) {
728         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
729                           2 * 64 * sizeof(uint16_t), fail);
730     }
731
732     if (CONFIG_H263_ENCODER)
733         ff_h263dsp_init(&s->h263dsp);
734     if (!s->dct_quantize)
735         s->dct_quantize = ff_dct_quantize_c;
736     if (!s->denoise_dct)
737         s->denoise_dct  = denoise_dct_c;
738     s->fast_dct_quantize = s->dct_quantize;
739     if (avctx->trellis)
740         s->dct_quantize  = dct_quantize_trellis_c;
741
742     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
743         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
744
745     s->quant_precision = 5;
746
747     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
748     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
749
750     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
751         ff_h261_encode_init(s);
752     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
753         ff_h263_encode_init(s);
754     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
755         ff_msmpeg4_encode_init(s);
756     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
757         && s->out_format == FMT_MPEG1)
758         ff_mpeg1_encode_init(s);
759
760     /* init q matrix */
761     for (i = 0; i < 64; i++) {
762         int j = s->idsp.idct_permutation[i];
763         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
764             s->mpeg_quant) {
765             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
766             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
767         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
768             s->intra_matrix[j] =
769             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
770         } else {
771             /* mpeg1/2 */
772             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
773             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
774         }
775         if (s->avctx->intra_matrix)
776             s->intra_matrix[j] = s->avctx->intra_matrix[i];
777         if (s->avctx->inter_matrix)
778             s->inter_matrix[j] = s->avctx->inter_matrix[i];
779     }
780
781     /* precompute matrix */
782     /* for mjpeg, we do include qscale in the matrix */
783     if (s->out_format != FMT_MJPEG) {
784         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
785                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
786                           31, 1);
787         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
788                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
789                           31, 0);
790     }
791
792     if (ff_rate_control_init(s) < 0)
793         return -1;
794
795 #if FF_API_ERROR_RATE
796     FF_DISABLE_DEPRECATION_WARNINGS
797     if (avctx->error_rate)
798         s->error_rate = avctx->error_rate;
799     FF_ENABLE_DEPRECATION_WARNINGS;
800 #endif
801
802 #if FF_API_NORMALIZE_AQP
803     FF_DISABLE_DEPRECATION_WARNINGS
804     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
805         s->mpv_flags |= FF_MPV_FLAG_NAQ;
806     FF_ENABLE_DEPRECATION_WARNINGS;
807 #endif
808
809 #if FF_API_MV0
810     FF_DISABLE_DEPRECATION_WARNINGS
811     if (avctx->flags & CODEC_FLAG_MV0)
812         s->mpv_flags |= FF_MPV_FLAG_MV0;
813     FF_ENABLE_DEPRECATION_WARNINGS
814 #endif
815
816     if (avctx->b_frame_strategy == 2) {
817         for (i = 0; i < s->max_b_frames + 2; i++) {
818             s->tmp_frames[i] = av_frame_alloc();
819             if (!s->tmp_frames[i])
820                 return AVERROR(ENOMEM);
821
822             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
823             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
824             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
825
826             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
827             if (ret < 0)
828                 return ret;
829         }
830     }
831
832     return 0;
833 fail:
834     ff_MPV_encode_end(avctx);
835     return AVERROR_UNKNOWN;
836 }
837
838 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
839 {
840     MpegEncContext *s = avctx->priv_data;
841     int i;
842
843     ff_rate_control_uninit(s);
844
845     ff_MPV_common_end(s);
846     if (CONFIG_MJPEG_ENCODER &&
847         s->out_format == FMT_MJPEG)
848         ff_mjpeg_encode_close(s);
849
850     av_freep(&avctx->extradata);
851
852     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
853         av_frame_free(&s->tmp_frames[i]);
854
855     ff_free_picture_tables(&s->new_picture);
856     ff_mpeg_unref_picture(s, &s->new_picture);
857
858     av_freep(&s->avctx->stats_out);
859     av_freep(&s->ac_stats);
860
861     av_freep(&s->q_intra_matrix);
862     av_freep(&s->q_inter_matrix);
863     av_freep(&s->q_intra_matrix16);
864     av_freep(&s->q_inter_matrix16);
865     av_freep(&s->input_picture);
866     av_freep(&s->reordered_input_picture);
867     av_freep(&s->dct_offset);
868
869     return 0;
870 }
871
872 static int get_sae(uint8_t *src, int ref, int stride)
873 {
874     int x,y;
875     int acc = 0;
876
877     for (y = 0; y < 16; y++) {
878         for (x = 0; x < 16; x++) {
879             acc += FFABS(src[x + y * stride] - ref);
880         }
881     }
882
883     return acc;
884 }
885
886 static int get_intra_count(MpegEncContext *s, uint8_t *src,
887                            uint8_t *ref, int stride)
888 {
889     int x, y, w, h;
890     int acc = 0;
891
892     w = s->width  & ~15;
893     h = s->height & ~15;
894
895     for (y = 0; y < h; y += 16) {
896         for (x = 0; x < w; x += 16) {
897             int offset = x + y * stride;
898             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
899                                      16);
900             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
901             int sae  = get_sae(src + offset, mean, stride);
902
903             acc += sae + 500 < sad;
904         }
905     }
906     return acc;
907 }
908
909
910 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
911 {
912     Picture *pic = NULL;
913     int64_t pts;
914     int i, display_picture_number = 0, ret;
915     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
916                                                  (s->low_delay ? 0 : 1);
917     int direct = 1;
918
919     if (pic_arg) {
920         pts = pic_arg->pts;
921         display_picture_number = s->input_picture_number++;
922
923         if (pts != AV_NOPTS_VALUE) {
924             if (s->user_specified_pts != AV_NOPTS_VALUE) {
925                 int64_t time = pts;
926                 int64_t last = s->user_specified_pts;
927
928                 if (time <= last) {
929                     av_log(s->avctx, AV_LOG_ERROR,
930                            "Error, Invalid timestamp=%"PRId64", "
931                            "last=%"PRId64"\n", pts, s->user_specified_pts);
932                     return -1;
933                 }
934
935                 if (!s->low_delay && display_picture_number == 1)
936                     s->dts_delta = time - last;
937             }
938             s->user_specified_pts = pts;
939         } else {
940             if (s->user_specified_pts != AV_NOPTS_VALUE) {
941                 s->user_specified_pts =
942                 pts = s->user_specified_pts + 1;
943                 av_log(s->avctx, AV_LOG_INFO,
944                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
945                        pts);
946             } else {
947                 pts = display_picture_number;
948             }
949         }
950     }
951
952     if (pic_arg) {
953         if (!pic_arg->buf[0]);
954             direct = 0;
955         if (pic_arg->linesize[0] != s->linesize)
956             direct = 0;
957         if (pic_arg->linesize[1] != s->uvlinesize)
958             direct = 0;
959         if (pic_arg->linesize[2] != s->uvlinesize)
960             direct = 0;
961
962         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
963                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
964
965         if (direct) {
966             i = ff_find_unused_picture(s, 1);
967             if (i < 0)
968                 return i;
969
970             pic = &s->picture[i];
971             pic->reference = 3;
972
973             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
974                 return ret;
975             if (ff_alloc_picture(s, pic, 1) < 0) {
976                 return -1;
977             }
978         } else {
979             i = ff_find_unused_picture(s, 0);
980             if (i < 0)
981                 return i;
982
983             pic = &s->picture[i];
984             pic->reference = 3;
985
986             if (ff_alloc_picture(s, pic, 0) < 0) {
987                 return -1;
988             }
989
990             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
991                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
992                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
993                 // empty
994             } else {
995                 int h_chroma_shift, v_chroma_shift;
996                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
997                                                  &h_chroma_shift,
998                                                  &v_chroma_shift);
999
1000                 for (i = 0; i < 3; i++) {
1001                     int src_stride = pic_arg->linesize[i];
1002                     int dst_stride = i ? s->uvlinesize : s->linesize;
1003                     int h_shift = i ? h_chroma_shift : 0;
1004                     int v_shift = i ? v_chroma_shift : 0;
1005                     int w = s->width  >> h_shift;
1006                     int h = s->height >> v_shift;
1007                     uint8_t *src = pic_arg->data[i];
1008                     uint8_t *dst = pic->f->data[i];
1009
1010                     if (!s->avctx->rc_buffer_size)
1011                         dst += INPLACE_OFFSET;
1012
1013                     if (src_stride == dst_stride)
1014                         memcpy(dst, src, src_stride * h);
1015                     else {
1016                         while (h--) {
1017                             memcpy(dst, src, w);
1018                             dst += dst_stride;
1019                             src += src_stride;
1020                         }
1021                     }
1022                 }
1023             }
1024         }
1025         ret = av_frame_copy_props(pic->f, pic_arg);
1026         if (ret < 0)
1027             return ret;
1028
1029         pic->f->display_picture_number = display_picture_number;
1030         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1031     }
1032
1033     /* shift buffer entries */
1034     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1035         s->input_picture[i - 1] = s->input_picture[i];
1036
1037     s->input_picture[encoding_delay] = (Picture*) pic;
1038
1039     return 0;
1040 }
1041
1042 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1043 {
1044     int x, y, plane;
1045     int score = 0;
1046     int64_t score64 = 0;
1047
1048     for (plane = 0; plane < 3; plane++) {
1049         const int stride = p->f->linesize[plane];
1050         const int bw = plane ? 1 : 2;
1051         for (y = 0; y < s->mb_height * bw; y++) {
1052             for (x = 0; x < s->mb_width * bw; x++) {
1053                 int off = p->shared ? 0 : 16;
1054                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1055                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1056                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1057
1058                 switch (s->avctx->frame_skip_exp) {
1059                 case 0: score    =  FFMAX(score, v);          break;
1060                 case 1: score   += FFABS(v);                  break;
1061                 case 2: score   += v * v;                     break;
1062                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1063                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1064                 }
1065             }
1066         }
1067     }
1068
1069     if (score)
1070         score64 = score;
1071
1072     if (score64 < s->avctx->frame_skip_threshold)
1073         return 1;
1074     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1075         return 1;
1076     return 0;
1077 }
1078
1079 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1080 {
1081     AVPacket pkt = { 0 };
1082     int ret, got_output;
1083
1084     av_init_packet(&pkt);
1085     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1086     if (ret < 0)
1087         return ret;
1088
1089     ret = pkt.size;
1090     av_free_packet(&pkt);
1091     return ret;
1092 }
1093
1094 static int estimate_best_b_count(MpegEncContext *s)
1095 {
1096     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1097     AVCodecContext *c = avcodec_alloc_context3(NULL);
1098     const int scale = s->avctx->brd_scale;
1099     int i, j, out_size, p_lambda, b_lambda, lambda2;
1100     int64_t best_rd  = INT64_MAX;
1101     int best_b_count = -1;
1102
1103     assert(scale >= 0 && scale <= 3);
1104
1105     //emms_c();
1106     //s->next_picture_ptr->quality;
1107     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1108     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1109     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1110     if (!b_lambda) // FIXME we should do this somewhere else
1111         b_lambda = p_lambda;
1112     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1113                FF_LAMBDA_SHIFT;
1114
1115     c->width        = s->width  >> scale;
1116     c->height       = s->height >> scale;
1117     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1118     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1119     c->mb_decision  = s->avctx->mb_decision;
1120     c->me_cmp       = s->avctx->me_cmp;
1121     c->mb_cmp       = s->avctx->mb_cmp;
1122     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1123     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1124     c->time_base    = s->avctx->time_base;
1125     c->max_b_frames = s->max_b_frames;
1126
1127     if (avcodec_open2(c, codec, NULL) < 0)
1128         return -1;
1129
1130     for (i = 0; i < s->max_b_frames + 2; i++) {
1131         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1132                                                 s->next_picture_ptr;
1133
1134         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1135             pre_input = *pre_input_ptr;
1136
1137             if (!pre_input.shared && i) {
1138                 pre_input.f->data[0] += INPLACE_OFFSET;
1139                 pre_input.f->data[1] += INPLACE_OFFSET;
1140                 pre_input.f->data[2] += INPLACE_OFFSET;
1141             }
1142
1143             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1144                                        s->tmp_frames[i]->linesize[0],
1145                                        pre_input.f->data[0],
1146                                        pre_input.f->linesize[0],
1147                                        c->width, c->height);
1148             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1149                                        s->tmp_frames[i]->linesize[1],
1150                                        pre_input.f->data[1],
1151                                        pre_input.f->linesize[1],
1152                                        c->width >> 1, c->height >> 1);
1153             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1154                                        s->tmp_frames[i]->linesize[2],
1155                                        pre_input.f->data[2],
1156                                        pre_input.f->linesize[2],
1157                                        c->width >> 1, c->height >> 1);
1158         }
1159     }
1160
1161     for (j = 0; j < s->max_b_frames + 1; j++) {
1162         int64_t rd = 0;
1163
1164         if (!s->input_picture[j])
1165             break;
1166
1167         c->error[0] = c->error[1] = c->error[2] = 0;
1168
1169         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1170         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1171
1172         out_size = encode_frame(c, s->tmp_frames[0]);
1173
1174         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1175
1176         for (i = 0; i < s->max_b_frames + 1; i++) {
1177             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1178
1179             s->tmp_frames[i + 1]->pict_type = is_p ?
1180                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1181             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1182
1183             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1184
1185             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1186         }
1187
1188         /* get the delayed frames */
1189         while (out_size) {
1190             out_size = encode_frame(c, NULL);
1191             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1192         }
1193
1194         rd += c->error[0] + c->error[1] + c->error[2];
1195
1196         if (rd < best_rd) {
1197             best_rd = rd;
1198             best_b_count = j;
1199         }
1200     }
1201
1202     avcodec_close(c);
1203     av_freep(&c);
1204
1205     return best_b_count;
1206 }
1207
1208 static int select_input_picture(MpegEncContext *s)
1209 {
1210     int i, ret;
1211
1212     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1213         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1214     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1215
1216     /* set next picture type & ordering */
1217     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1218         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1219             s->next_picture_ptr == NULL || s->intra_only) {
1220             s->reordered_input_picture[0] = s->input_picture[0];
1221             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1222             s->reordered_input_picture[0]->f->coded_picture_number =
1223                 s->coded_picture_number++;
1224         } else {
1225             int b_frames;
1226
1227             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1228                 if (s->picture_in_gop_number < s->gop_size &&
1229                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1230                     // FIXME check that te gop check above is +-1 correct
1231                     av_frame_unref(s->input_picture[0]->f);
1232
1233                     emms_c();
1234                     ff_vbv_update(s, 0);
1235
1236                     goto no_output_pic;
1237                 }
1238             }
1239
1240             if (s->flags & CODEC_FLAG_PASS2) {
1241                 for (i = 0; i < s->max_b_frames + 1; i++) {
1242                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1243
1244                     if (pict_num >= s->rc_context.num_entries)
1245                         break;
1246                     if (!s->input_picture[i]) {
1247                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1248                         break;
1249                     }
1250
1251                     s->input_picture[i]->f->pict_type =
1252                         s->rc_context.entry[pict_num].new_pict_type;
1253                 }
1254             }
1255
1256             if (s->avctx->b_frame_strategy == 0) {
1257                 b_frames = s->max_b_frames;
1258                 while (b_frames && !s->input_picture[b_frames])
1259                     b_frames--;
1260             } else if (s->avctx->b_frame_strategy == 1) {
1261                 for (i = 1; i < s->max_b_frames + 1; i++) {
1262                     if (s->input_picture[i] &&
1263                         s->input_picture[i]->b_frame_score == 0) {
1264                         s->input_picture[i]->b_frame_score =
1265                             get_intra_count(s,
1266                                             s->input_picture[i    ]->f->data[0],
1267                                             s->input_picture[i - 1]->f->data[0],
1268                                             s->linesize) + 1;
1269                     }
1270                 }
1271                 for (i = 0; i < s->max_b_frames + 1; i++) {
1272                     if (s->input_picture[i] == NULL ||
1273                         s->input_picture[i]->b_frame_score - 1 >
1274                             s->mb_num / s->avctx->b_sensitivity)
1275                         break;
1276                 }
1277
1278                 b_frames = FFMAX(0, i - 1);
1279
1280                 /* reset scores */
1281                 for (i = 0; i < b_frames + 1; i++) {
1282                     s->input_picture[i]->b_frame_score = 0;
1283                 }
1284             } else if (s->avctx->b_frame_strategy == 2) {
1285                 b_frames = estimate_best_b_count(s);
1286             } else {
1287                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1288                 b_frames = 0;
1289             }
1290
1291             emms_c();
1292
1293             for (i = b_frames - 1; i >= 0; i--) {
1294                 int type = s->input_picture[i]->f->pict_type;
1295                 if (type && type != AV_PICTURE_TYPE_B)
1296                     b_frames = i;
1297             }
1298             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1299                 b_frames == s->max_b_frames) {
1300                 av_log(s->avctx, AV_LOG_ERROR,
1301                        "warning, too many b frames in a row\n");
1302             }
1303
1304             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1305                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1306                     s->gop_size > s->picture_in_gop_number) {
1307                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1308                 } else {
1309                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1310                         b_frames = 0;
1311                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1312                 }
1313             }
1314
1315             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1316                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1317                 b_frames--;
1318
1319             s->reordered_input_picture[0] = s->input_picture[b_frames];
1320             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1321                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1322             s->reordered_input_picture[0]->f->coded_picture_number =
1323                 s->coded_picture_number++;
1324             for (i = 0; i < b_frames; i++) {
1325                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1326                 s->reordered_input_picture[i + 1]->f->pict_type =
1327                     AV_PICTURE_TYPE_B;
1328                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1329                     s->coded_picture_number++;
1330             }
1331         }
1332     }
1333 no_output_pic:
1334     if (s->reordered_input_picture[0]) {
1335         s->reordered_input_picture[0]->reference =
1336            s->reordered_input_picture[0]->f->pict_type !=
1337                AV_PICTURE_TYPE_B ? 3 : 0;
1338
1339         ff_mpeg_unref_picture(s, &s->new_picture);
1340         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1341             return ret;
1342
1343         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1344             // input is a shared pix, so we can't modifiy it -> alloc a new
1345             // one & ensure that the shared one is reuseable
1346
1347             Picture *pic;
1348             int i = ff_find_unused_picture(s, 0);
1349             if (i < 0)
1350                 return i;
1351             pic = &s->picture[i];
1352
1353             pic->reference = s->reordered_input_picture[0]->reference;
1354             if (ff_alloc_picture(s, pic, 0) < 0) {
1355                 return -1;
1356             }
1357
1358             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1359             if (ret < 0)
1360                 return ret;
1361
1362             /* mark us unused / free shared pic */
1363             av_frame_unref(s->reordered_input_picture[0]->f);
1364             s->reordered_input_picture[0]->shared = 0;
1365
1366             s->current_picture_ptr = pic;
1367         } else {
1368             // input is not a shared pix -> reuse buffer for current_pix
1369             s->current_picture_ptr = s->reordered_input_picture[0];
1370             for (i = 0; i < 4; i++) {
1371                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1372             }
1373         }
1374         ff_mpeg_unref_picture(s, &s->current_picture);
1375         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1376                                        s->current_picture_ptr)) < 0)
1377             return ret;
1378
1379         s->picture_number = s->new_picture.f->display_picture_number;
1380     } else {
1381         ff_mpeg_unref_picture(s, &s->new_picture);
1382     }
1383     return 0;
1384 }
1385
1386 static void frame_end(MpegEncContext *s)
1387 {
1388     int i;
1389
1390     if (s->unrestricted_mv &&
1391         s->current_picture.reference &&
1392         !s->intra_only) {
1393         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1394         int hshift = desc->log2_chroma_w;
1395         int vshift = desc->log2_chroma_h;
1396         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1397                                 s->h_edge_pos, s->v_edge_pos,
1398                                 EDGE_WIDTH, EDGE_WIDTH,
1399                                 EDGE_TOP | EDGE_BOTTOM);
1400         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1401                                 s->h_edge_pos >> hshift,
1402                                 s->v_edge_pos >> vshift,
1403                                 EDGE_WIDTH >> hshift,
1404                                 EDGE_WIDTH >> vshift,
1405                                 EDGE_TOP | EDGE_BOTTOM);
1406         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1407                                 s->h_edge_pos >> hshift,
1408                                 s->v_edge_pos >> vshift,
1409                                 EDGE_WIDTH >> hshift,
1410                                 EDGE_WIDTH >> vshift,
1411                                 EDGE_TOP | EDGE_BOTTOM);
1412     }
1413
1414     emms_c();
1415
1416     s->last_pict_type                 = s->pict_type;
1417     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1418     if (s->pict_type!= AV_PICTURE_TYPE_B)
1419         s->last_non_b_pict_type = s->pict_type;
1420
1421     if (s->encoding) {
1422         /* release non-reference frames */
1423         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1424             if (!s->picture[i].reference)
1425                 ff_mpeg_unref_picture(s, &s->picture[i]);
1426         }
1427     }
1428
1429     s->avctx->coded_frame = s->current_picture_ptr->f;
1430
1431 }
1432
1433 static void update_noise_reduction(MpegEncContext *s)
1434 {
1435     int intra, i;
1436
1437     for (intra = 0; intra < 2; intra++) {
1438         if (s->dct_count[intra] > (1 << 16)) {
1439             for (i = 0; i < 64; i++) {
1440                 s->dct_error_sum[intra][i] >>= 1;
1441             }
1442             s->dct_count[intra] >>= 1;
1443         }
1444
1445         for (i = 0; i < 64; i++) {
1446             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1447                                        s->dct_count[intra] +
1448                                        s->dct_error_sum[intra][i] / 2) /
1449                                       (s->dct_error_sum[intra][i] + 1);
1450         }
1451     }
1452 }
1453
1454 static int frame_start(MpegEncContext *s)
1455 {
1456     int ret;
1457
1458     /* mark & release old frames */
1459     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1460         s->last_picture_ptr != s->next_picture_ptr &&
1461         s->last_picture_ptr->f->buf[0]) {
1462         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1463     }
1464
1465     s->current_picture_ptr->f->pict_type = s->pict_type;
1466     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1467
1468     ff_mpeg_unref_picture(s, &s->current_picture);
1469     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1470                                    s->current_picture_ptr)) < 0)
1471         return ret;
1472
1473     if (s->pict_type != AV_PICTURE_TYPE_B) {
1474         s->last_picture_ptr = s->next_picture_ptr;
1475         if (!s->droppable)
1476             s->next_picture_ptr = s->current_picture_ptr;
1477     }
1478
1479     if (s->last_picture_ptr) {
1480         ff_mpeg_unref_picture(s, &s->last_picture);
1481         if (s->last_picture_ptr->f->buf[0] &&
1482             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1483                                        s->last_picture_ptr)) < 0)
1484             return ret;
1485     }
1486     if (s->next_picture_ptr) {
1487         ff_mpeg_unref_picture(s, &s->next_picture);
1488         if (s->next_picture_ptr->f->buf[0] &&
1489             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1490                                        s->next_picture_ptr)) < 0)
1491             return ret;
1492     }
1493
1494     if (s->picture_structure!= PICT_FRAME) {
1495         int i;
1496         for (i = 0; i < 4; i++) {
1497             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1498                 s->current_picture.f->data[i] +=
1499                     s->current_picture.f->linesize[i];
1500             }
1501             s->current_picture.f->linesize[i] *= 2;
1502             s->last_picture.f->linesize[i]    *= 2;
1503             s->next_picture.f->linesize[i]    *= 2;
1504         }
1505     }
1506
1507     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1508         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1509         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1510     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1511         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1512         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1513     } else {
1514         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1515         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1516     }
1517
1518     if (s->dct_error_sum) {
1519         assert(s->avctx->noise_reduction && s->encoding);
1520         update_noise_reduction(s);
1521     }
1522
1523     return 0;
1524 }
1525
1526 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1527                           const AVFrame *pic_arg, int *got_packet)
1528 {
1529     MpegEncContext *s = avctx->priv_data;
1530     int i, stuffing_count, ret;
1531     int context_count = s->slice_context_count;
1532
1533     s->picture_in_gop_number++;
1534
1535     if (load_input_picture(s, pic_arg) < 0)
1536         return -1;
1537
1538     if (select_input_picture(s) < 0) {
1539         return -1;
1540     }
1541
1542     /* output? */
1543     if (s->new_picture.f->data[0]) {
1544         if (!pkt->data &&
1545             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1546             return ret;
1547         if (s->mb_info) {
1548             s->mb_info_ptr = av_packet_new_side_data(pkt,
1549                                  AV_PKT_DATA_H263_MB_INFO,
1550                                  s->mb_width*s->mb_height*12);
1551             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1552         }
1553
1554         for (i = 0; i < context_count; i++) {
1555             int start_y = s->thread_context[i]->start_mb_y;
1556             int   end_y = s->thread_context[i]->  end_mb_y;
1557             int h       = s->mb_height;
1558             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1559             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1560
1561             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1562         }
1563
1564         s->pict_type = s->new_picture.f->pict_type;
1565         //emms_c();
1566         ret = frame_start(s);
1567         if (ret < 0)
1568             return ret;
1569 vbv_retry:
1570         if (encode_picture(s, s->picture_number) < 0)
1571             return -1;
1572
1573         avctx->header_bits = s->header_bits;
1574         avctx->mv_bits     = s->mv_bits;
1575         avctx->misc_bits   = s->misc_bits;
1576         avctx->i_tex_bits  = s->i_tex_bits;
1577         avctx->p_tex_bits  = s->p_tex_bits;
1578         avctx->i_count     = s->i_count;
1579         // FIXME f/b_count in avctx
1580         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1581         avctx->skip_count  = s->skip_count;
1582
1583         frame_end(s);
1584
1585         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1586             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1587
1588         if (avctx->rc_buffer_size) {
1589             RateControlContext *rcc = &s->rc_context;
1590             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1591
1592             if (put_bits_count(&s->pb) > max_size &&
1593                 s->lambda < s->avctx->lmax) {
1594                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1595                                        (s->qscale + 1) / s->qscale);
1596                 if (s->adaptive_quant) {
1597                     int i;
1598                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1599                         s->lambda_table[i] =
1600                             FFMAX(s->lambda_table[i] + 1,
1601                                   s->lambda_table[i] * (s->qscale + 1) /
1602                                   s->qscale);
1603                 }
1604                 s->mb_skipped = 0;        // done in frame_start()
1605                 // done in encode_picture() so we must undo it
1606                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1607                     if (s->flipflop_rounding          ||
1608                         s->codec_id == AV_CODEC_ID_H263P ||
1609                         s->codec_id == AV_CODEC_ID_MPEG4)
1610                         s->no_rounding ^= 1;
1611                 }
1612                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1613                     s->time_base       = s->last_time_base;
1614                     s->last_non_b_time = s->time - s->pp_time;
1615                 }
1616                 for (i = 0; i < context_count; i++) {
1617                     PutBitContext *pb = &s->thread_context[i]->pb;
1618                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1619                 }
1620                 goto vbv_retry;
1621             }
1622
1623             assert(s->avctx->rc_max_rate);
1624         }
1625
1626         if (s->flags & CODEC_FLAG_PASS1)
1627             ff_write_pass1_stats(s);
1628
1629         for (i = 0; i < 4; i++) {
1630             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1631             avctx->error[i] += s->current_picture_ptr->f->error[i];
1632         }
1633
1634         if (s->flags & CODEC_FLAG_PASS1)
1635             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1636                    avctx->i_tex_bits + avctx->p_tex_bits ==
1637                        put_bits_count(&s->pb));
1638         flush_put_bits(&s->pb);
1639         s->frame_bits  = put_bits_count(&s->pb);
1640
1641         stuffing_count = ff_vbv_update(s, s->frame_bits);
1642         if (stuffing_count) {
1643             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1644                     stuffing_count + 50) {
1645                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1646                 return -1;
1647             }
1648
1649             switch (s->codec_id) {
1650             case AV_CODEC_ID_MPEG1VIDEO:
1651             case AV_CODEC_ID_MPEG2VIDEO:
1652                 while (stuffing_count--) {
1653                     put_bits(&s->pb, 8, 0);
1654                 }
1655             break;
1656             case AV_CODEC_ID_MPEG4:
1657                 put_bits(&s->pb, 16, 0);
1658                 put_bits(&s->pb, 16, 0x1C3);
1659                 stuffing_count -= 4;
1660                 while (stuffing_count--) {
1661                     put_bits(&s->pb, 8, 0xFF);
1662                 }
1663             break;
1664             default:
1665                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1666             }
1667             flush_put_bits(&s->pb);
1668             s->frame_bits  = put_bits_count(&s->pb);
1669         }
1670
1671         /* update mpeg1/2 vbv_delay for CBR */
1672         if (s->avctx->rc_max_rate                          &&
1673             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1674             s->out_format == FMT_MPEG1                     &&
1675             90000LL * (avctx->rc_buffer_size - 1) <=
1676                 s->avctx->rc_max_rate * 0xFFFFLL) {
1677             int vbv_delay, min_delay;
1678             double inbits  = s->avctx->rc_max_rate *
1679                              av_q2d(s->avctx->time_base);
1680             int    minbits = s->frame_bits - 8 *
1681                              (s->vbv_delay_ptr - s->pb.buf - 1);
1682             double bits    = s->rc_context.buffer_index + minbits - inbits;
1683
1684             if (bits < 0)
1685                 av_log(s->avctx, AV_LOG_ERROR,
1686                        "Internal error, negative bits\n");
1687
1688             assert(s->repeat_first_field == 0);
1689
1690             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1691             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1692                         s->avctx->rc_max_rate;
1693
1694             vbv_delay = FFMAX(vbv_delay, min_delay);
1695
1696             assert(vbv_delay < 0xFFFF);
1697
1698             s->vbv_delay_ptr[0] &= 0xF8;
1699             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1700             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1701             s->vbv_delay_ptr[2] &= 0x07;
1702             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1703             avctx->vbv_delay     = vbv_delay * 300;
1704         }
1705         s->total_bits     += s->frame_bits;
1706         avctx->frame_bits  = s->frame_bits;
1707
1708         pkt->pts = s->current_picture.f->pts;
1709         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1710             if (!s->current_picture.f->coded_picture_number)
1711                 pkt->dts = pkt->pts - s->dts_delta;
1712             else
1713                 pkt->dts = s->reordered_pts;
1714             s->reordered_pts = pkt->pts;
1715         } else
1716             pkt->dts = pkt->pts;
1717         if (s->current_picture.f->key_frame)
1718             pkt->flags |= AV_PKT_FLAG_KEY;
1719         if (s->mb_info)
1720             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1721     } else {
1722         s->frame_bits = 0;
1723     }
1724     assert((s->frame_bits & 7) == 0);
1725
1726     pkt->size = s->frame_bits / 8;
1727     *got_packet = !!pkt->size;
1728     return 0;
1729 }
1730
1731 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1732                                                 int n, int threshold)
1733 {
1734     static const char tab[64] = {
1735         3, 2, 2, 1, 1, 1, 1, 1,
1736         1, 1, 1, 1, 1, 1, 1, 1,
1737         1, 1, 1, 1, 1, 1, 1, 1,
1738         0, 0, 0, 0, 0, 0, 0, 0,
1739         0, 0, 0, 0, 0, 0, 0, 0,
1740         0, 0, 0, 0, 0, 0, 0, 0,
1741         0, 0, 0, 0, 0, 0, 0, 0,
1742         0, 0, 0, 0, 0, 0, 0, 0
1743     };
1744     int score = 0;
1745     int run = 0;
1746     int i;
1747     int16_t *block = s->block[n];
1748     const int last_index = s->block_last_index[n];
1749     int skip_dc;
1750
1751     if (threshold < 0) {
1752         skip_dc = 0;
1753         threshold = -threshold;
1754     } else
1755         skip_dc = 1;
1756
1757     /* Are all we could set to zero already zero? */
1758     if (last_index <= skip_dc - 1)
1759         return;
1760
1761     for (i = 0; i <= last_index; i++) {
1762         const int j = s->intra_scantable.permutated[i];
1763         const int level = FFABS(block[j]);
1764         if (level == 1) {
1765             if (skip_dc && i == 0)
1766                 continue;
1767             score += tab[run];
1768             run = 0;
1769         } else if (level > 1) {
1770             return;
1771         } else {
1772             run++;
1773         }
1774     }
1775     if (score >= threshold)
1776         return;
1777     for (i = skip_dc; i <= last_index; i++) {
1778         const int j = s->intra_scantable.permutated[i];
1779         block[j] = 0;
1780     }
1781     if (block[0])
1782         s->block_last_index[n] = 0;
1783     else
1784         s->block_last_index[n] = -1;
1785 }
1786
1787 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1788                                int last_index)
1789 {
1790     int i;
1791     const int maxlevel = s->max_qcoeff;
1792     const int minlevel = s->min_qcoeff;
1793     int overflow = 0;
1794
1795     if (s->mb_intra) {
1796         i = 1; // skip clipping of intra dc
1797     } else
1798         i = 0;
1799
1800     for (; i <= last_index; i++) {
1801         const int j = s->intra_scantable.permutated[i];
1802         int level = block[j];
1803
1804         if (level > maxlevel) {
1805             level = maxlevel;
1806             overflow++;
1807         } else if (level < minlevel) {
1808             level = minlevel;
1809             overflow++;
1810         }
1811
1812         block[j] = level;
1813     }
1814
1815     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1816         av_log(s->avctx, AV_LOG_INFO,
1817                "warning, clipping %d dct coefficients to %d..%d\n",
1818                overflow, minlevel, maxlevel);
1819 }
1820
1821 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1822 {
1823     int x, y;
1824     // FIXME optimize
1825     for (y = 0; y < 8; y++) {
1826         for (x = 0; x < 8; x++) {
1827             int x2, y2;
1828             int sum = 0;
1829             int sqr = 0;
1830             int count = 0;
1831
1832             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1833                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1834                     int v = ptr[x2 + y2 * stride];
1835                     sum += v;
1836                     sqr += v * v;
1837                     count++;
1838                 }
1839             }
1840             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1841         }
1842     }
1843 }
1844
1845 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1846                                                 int motion_x, int motion_y,
1847                                                 int mb_block_height,
1848                                                 int mb_block_count)
1849 {
1850     int16_t weight[8][64];
1851     int16_t orig[8][64];
1852     const int mb_x = s->mb_x;
1853     const int mb_y = s->mb_y;
1854     int i;
1855     int skip_dct[8];
1856     int dct_offset = s->linesize * 8; // default for progressive frames
1857     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1858     ptrdiff_t wrap_y, wrap_c;
1859
1860     for (i = 0; i < mb_block_count; i++)
1861         skip_dct[i] = s->skipdct;
1862
1863     if (s->adaptive_quant) {
1864         const int last_qp = s->qscale;
1865         const int mb_xy = mb_x + mb_y * s->mb_stride;
1866
1867         s->lambda = s->lambda_table[mb_xy];
1868         update_qscale(s);
1869
1870         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1871             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1872             s->dquant = s->qscale - last_qp;
1873
1874             if (s->out_format == FMT_H263) {
1875                 s->dquant = av_clip(s->dquant, -2, 2);
1876
1877                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1878                     if (!s->mb_intra) {
1879                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1880                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1881                                 s->dquant = 0;
1882                         }
1883                         if (s->mv_type == MV_TYPE_8X8)
1884                             s->dquant = 0;
1885                     }
1886                 }
1887             }
1888         }
1889         ff_set_qscale(s, last_qp + s->dquant);
1890     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1891         ff_set_qscale(s, s->qscale + s->dquant);
1892
1893     wrap_y = s->linesize;
1894     wrap_c = s->uvlinesize;
1895     ptr_y  = s->new_picture.f->data[0] +
1896              (mb_y * 16 * wrap_y)              + mb_x * 16;
1897     ptr_cb = s->new_picture.f->data[1] +
1898              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1899     ptr_cr = s->new_picture.f->data[2] +
1900              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1901
1902     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1903         uint8_t *ebuf = s->edge_emu_buffer + 32;
1904         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1905                                  wrap_y, wrap_y,
1906                                  16, 16, mb_x * 16, mb_y * 16,
1907                                  s->width, s->height);
1908         ptr_y = ebuf;
1909         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1910                                  wrap_c, wrap_c,
1911                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1912                                  s->width >> 1, s->height >> 1);
1913         ptr_cb = ebuf + 18 * wrap_y;
1914         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1915                                  wrap_c, wrap_c,
1916                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1917                                  s->width >> 1, s->height >> 1);
1918         ptr_cr = ebuf + 18 * wrap_y + 8;
1919     }
1920
1921     if (s->mb_intra) {
1922         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1923             int progressive_score, interlaced_score;
1924
1925             s->interlaced_dct = 0;
1926             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1927                                                     NULL, wrap_y, 8) +
1928                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1929                                                     NULL, wrap_y, 8) - 400;
1930
1931             if (progressive_score > 0) {
1932                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1933                                                        NULL, wrap_y * 2, 8) +
1934                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1935                                                        NULL, wrap_y * 2, 8);
1936                 if (progressive_score > interlaced_score) {
1937                     s->interlaced_dct = 1;
1938
1939                     dct_offset = wrap_y;
1940                     wrap_y <<= 1;
1941                     if (s->chroma_format == CHROMA_422)
1942                         wrap_c <<= 1;
1943                 }
1944             }
1945         }
1946
1947         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
1948         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
1949         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
1950         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
1951
1952         if (s->flags & CODEC_FLAG_GRAY) {
1953             skip_dct[4] = 1;
1954             skip_dct[5] = 1;
1955         } else {
1956             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1957             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1958             if (!s->chroma_y_shift) { /* 422 */
1959                 s->pdsp.get_pixels(s->block[6],
1960                                    ptr_cb + (dct_offset >> 1), wrap_c);
1961                 s->pdsp.get_pixels(s->block[7],
1962                                    ptr_cr + (dct_offset >> 1), wrap_c);
1963             }
1964         }
1965     } else {
1966         op_pixels_func (*op_pix)[4];
1967         qpel_mc_func (*op_qpix)[16];
1968         uint8_t *dest_y, *dest_cb, *dest_cr;
1969
1970         dest_y  = s->dest[0];
1971         dest_cb = s->dest[1];
1972         dest_cr = s->dest[2];
1973
1974         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1975             op_pix  = s->hdsp.put_pixels_tab;
1976             op_qpix = s->qdsp.put_qpel_pixels_tab;
1977         } else {
1978             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1979             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
1980         }
1981
1982         if (s->mv_dir & MV_DIR_FORWARD) {
1983             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1984                           s->last_picture.f->data,
1985                           op_pix, op_qpix);
1986             op_pix  = s->hdsp.avg_pixels_tab;
1987             op_qpix = s->qdsp.avg_qpel_pixels_tab;
1988         }
1989         if (s->mv_dir & MV_DIR_BACKWARD) {
1990             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1991                           s->next_picture.f->data,
1992                           op_pix, op_qpix);
1993         }
1994
1995         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1996             int progressive_score, interlaced_score;
1997
1998             s->interlaced_dct = 0;
1999             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
2000                                                     ptr_y,              wrap_y,
2001                                                     8) +
2002                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
2003                                                     ptr_y + wrap_y * 8, wrap_y,
2004                                                     8) - 400;
2005
2006             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2007                 progressive_score -= 400;
2008
2009             if (progressive_score > 0) {
2010                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
2011                                                        ptr_y,
2012                                                        wrap_y * 2, 8) +
2013                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
2014                                                        ptr_y + wrap_y,
2015                                                        wrap_y * 2, 8);
2016
2017                 if (progressive_score > interlaced_score) {
2018                     s->interlaced_dct = 1;
2019
2020                     dct_offset = wrap_y;
2021                     wrap_y <<= 1;
2022                     if (s->chroma_format == CHROMA_422)
2023                         wrap_c <<= 1;
2024                 }
2025             }
2026         }
2027
2028         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2029         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2030         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2031                             dest_y + dct_offset, wrap_y);
2032         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2033                             dest_y + dct_offset + 8, wrap_y);
2034
2035         if (s->flags & CODEC_FLAG_GRAY) {
2036             skip_dct[4] = 1;
2037             skip_dct[5] = 1;
2038         } else {
2039             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2040             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2041             if (!s->chroma_y_shift) { /* 422 */
2042                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2043                                     dest_cb + (dct_offset >> 1), wrap_c);
2044                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2045                                     dest_cr + (dct_offset >> 1), wrap_c);
2046             }
2047         }
2048         /* pre quantization */
2049         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2050                 2 * s->qscale * s->qscale) {
2051             // FIXME optimize
2052             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2053                               wrap_y, 8) < 20 * s->qscale)
2054                 skip_dct[0] = 1;
2055             if (s->dsp.sad[1](NULL, ptr_y + 8,
2056                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2057                 skip_dct[1] = 1;
2058             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2059                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2060                 skip_dct[2] = 1;
2061             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2062                               dest_y + dct_offset + 8,
2063                               wrap_y, 8) < 20 * s->qscale)
2064                 skip_dct[3] = 1;
2065             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2066                               wrap_c, 8) < 20 * s->qscale)
2067                 skip_dct[4] = 1;
2068             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2069                               wrap_c, 8) < 20 * s->qscale)
2070                 skip_dct[5] = 1;
2071             if (!s->chroma_y_shift) { /* 422 */
2072                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2073                                   dest_cb + (dct_offset >> 1),
2074                                   wrap_c, 8) < 20 * s->qscale)
2075                     skip_dct[6] = 1;
2076                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2077                                   dest_cr + (dct_offset >> 1),
2078                                   wrap_c, 8) < 20 * s->qscale)
2079                     skip_dct[7] = 1;
2080             }
2081         }
2082     }
2083
2084     if (s->quantizer_noise_shaping) {
2085         if (!skip_dct[0])
2086             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2087         if (!skip_dct[1])
2088             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2089         if (!skip_dct[2])
2090             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2091         if (!skip_dct[3])
2092             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2093         if (!skip_dct[4])
2094             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2095         if (!skip_dct[5])
2096             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2097         if (!s->chroma_y_shift) { /* 422 */
2098             if (!skip_dct[6])
2099                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2100                                   wrap_c);
2101             if (!skip_dct[7])
2102                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2103                                   wrap_c);
2104         }
2105         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2106     }
2107
2108     /* DCT & quantize */
2109     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2110     {
2111         for (i = 0; i < mb_block_count; i++) {
2112             if (!skip_dct[i]) {
2113                 int overflow;
2114                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2115                 // FIXME we could decide to change to quantizer instead of
2116                 // clipping
2117                 // JS: I don't think that would be a good idea it could lower
2118                 //     quality instead of improve it. Just INTRADC clipping
2119                 //     deserves changes in quantizer
2120                 if (overflow)
2121                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2122             } else
2123                 s->block_last_index[i] = -1;
2124         }
2125         if (s->quantizer_noise_shaping) {
2126             for (i = 0; i < mb_block_count; i++) {
2127                 if (!skip_dct[i]) {
2128                     s->block_last_index[i] =
2129                         dct_quantize_refine(s, s->block[i], weight[i],
2130                                             orig[i], i, s->qscale);
2131                 }
2132             }
2133         }
2134
2135         if (s->luma_elim_threshold && !s->mb_intra)
2136             for (i = 0; i < 4; i++)
2137                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2138         if (s->chroma_elim_threshold && !s->mb_intra)
2139             for (i = 4; i < mb_block_count; i++)
2140                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2141
2142         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2143             for (i = 0; i < mb_block_count; i++) {
2144                 if (s->block_last_index[i] == -1)
2145                     s->coded_score[i] = INT_MAX / 256;
2146             }
2147         }
2148     }
2149
2150     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2151         s->block_last_index[4] =
2152         s->block_last_index[5] = 0;
2153         s->block[4][0] =
2154         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2155     }
2156
2157     // non c quantize code returns incorrect block_last_index FIXME
2158     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2159         for (i = 0; i < mb_block_count; i++) {
2160             int j;
2161             if (s->block_last_index[i] > 0) {
2162                 for (j = 63; j > 0; j--) {
2163                     if (s->block[i][s->intra_scantable.permutated[j]])
2164                         break;
2165                 }
2166                 s->block_last_index[i] = j;
2167             }
2168         }
2169     }
2170
2171     /* huffman encode */
2172     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2173     case AV_CODEC_ID_MPEG1VIDEO:
2174     case AV_CODEC_ID_MPEG2VIDEO:
2175         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2176             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2177         break;
2178     case AV_CODEC_ID_MPEG4:
2179         if (CONFIG_MPEG4_ENCODER)
2180             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2181         break;
2182     case AV_CODEC_ID_MSMPEG4V2:
2183     case AV_CODEC_ID_MSMPEG4V3:
2184     case AV_CODEC_ID_WMV1:
2185         if (CONFIG_MSMPEG4_ENCODER)
2186             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2187         break;
2188     case AV_CODEC_ID_WMV2:
2189         if (CONFIG_WMV2_ENCODER)
2190             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2191         break;
2192     case AV_CODEC_ID_H261:
2193         if (CONFIG_H261_ENCODER)
2194             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2195         break;
2196     case AV_CODEC_ID_H263:
2197     case AV_CODEC_ID_H263P:
2198     case AV_CODEC_ID_FLV1:
2199     case AV_CODEC_ID_RV10:
2200     case AV_CODEC_ID_RV20:
2201         if (CONFIG_H263_ENCODER)
2202             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2203         break;
2204     case AV_CODEC_ID_MJPEG:
2205         if (CONFIG_MJPEG_ENCODER)
2206             ff_mjpeg_encode_mb(s, s->block);
2207         break;
2208     default:
2209         assert(0);
2210     }
2211 }
2212
2213 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2214 {
2215     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2216     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2217 }
2218
2219 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2220     int i;
2221
2222     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2223
2224     /* mpeg1 */
2225     d->mb_skip_run= s->mb_skip_run;
2226     for(i=0; i<3; i++)
2227         d->last_dc[i] = s->last_dc[i];
2228
2229     /* statistics */
2230     d->mv_bits= s->mv_bits;
2231     d->i_tex_bits= s->i_tex_bits;
2232     d->p_tex_bits= s->p_tex_bits;
2233     d->i_count= s->i_count;
2234     d->f_count= s->f_count;
2235     d->b_count= s->b_count;
2236     d->skip_count= s->skip_count;
2237     d->misc_bits= s->misc_bits;
2238     d->last_bits= 0;
2239
2240     d->mb_skipped= 0;
2241     d->qscale= s->qscale;
2242     d->dquant= s->dquant;
2243
2244     d->esc3_level_length= s->esc3_level_length;
2245 }
2246
2247 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2248     int i;
2249
2250     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2251     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2252
2253     /* mpeg1 */
2254     d->mb_skip_run= s->mb_skip_run;
2255     for(i=0; i<3; i++)
2256         d->last_dc[i] = s->last_dc[i];
2257
2258     /* statistics */
2259     d->mv_bits= s->mv_bits;
2260     d->i_tex_bits= s->i_tex_bits;
2261     d->p_tex_bits= s->p_tex_bits;
2262     d->i_count= s->i_count;
2263     d->f_count= s->f_count;
2264     d->b_count= s->b_count;
2265     d->skip_count= s->skip_count;
2266     d->misc_bits= s->misc_bits;
2267
2268     d->mb_intra= s->mb_intra;
2269     d->mb_skipped= s->mb_skipped;
2270     d->mv_type= s->mv_type;
2271     d->mv_dir= s->mv_dir;
2272     d->pb= s->pb;
2273     if(s->data_partitioning){
2274         d->pb2= s->pb2;
2275         d->tex_pb= s->tex_pb;
2276     }
2277     d->block= s->block;
2278     for(i=0; i<8; i++)
2279         d->block_last_index[i]= s->block_last_index[i];
2280     d->interlaced_dct= s->interlaced_dct;
2281     d->qscale= s->qscale;
2282
2283     d->esc3_level_length= s->esc3_level_length;
2284 }
2285
2286 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2287                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2288                            int *dmin, int *next_block, int motion_x, int motion_y)
2289 {
2290     int score;
2291     uint8_t *dest_backup[3];
2292
2293     copy_context_before_encode(s, backup, type);
2294
2295     s->block= s->blocks[*next_block];
2296     s->pb= pb[*next_block];
2297     if(s->data_partitioning){
2298         s->pb2   = pb2   [*next_block];
2299         s->tex_pb= tex_pb[*next_block];
2300     }
2301
2302     if(*next_block){
2303         memcpy(dest_backup, s->dest, sizeof(s->dest));
2304         s->dest[0] = s->rd_scratchpad;
2305         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2306         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2307         assert(s->linesize >= 32); //FIXME
2308     }
2309
2310     encode_mb(s, motion_x, motion_y);
2311
2312     score= put_bits_count(&s->pb);
2313     if(s->data_partitioning){
2314         score+= put_bits_count(&s->pb2);
2315         score+= put_bits_count(&s->tex_pb);
2316     }
2317
2318     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2319         ff_MPV_decode_mb(s, s->block);
2320
2321         score *= s->lambda2;
2322         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2323     }
2324
2325     if(*next_block){
2326         memcpy(s->dest, dest_backup, sizeof(s->dest));
2327     }
2328
2329     if(score<*dmin){
2330         *dmin= score;
2331         *next_block^=1;
2332
2333         copy_context_after_encode(best, s, type);
2334     }
2335 }
2336
2337 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2338     uint32_t *sq = ff_square_tab + 256;
2339     int acc=0;
2340     int x,y;
2341
2342     if(w==16 && h==16)
2343         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2344     else if(w==8 && h==8)
2345         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2346
2347     for(y=0; y<h; y++){
2348         for(x=0; x<w; x++){
2349             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2350         }
2351     }
2352
2353     assert(acc>=0);
2354
2355     return acc;
2356 }
2357
2358 static int sse_mb(MpegEncContext *s){
2359     int w= 16;
2360     int h= 16;
2361
2362     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2363     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2364
2365     if(w==16 && h==16)
2366       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2367         return  s->dsp.nsse[0](s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2368                +s->dsp.nsse[1](s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2369                +s->dsp.nsse[1](s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2370       }else{
2371         return  s->dsp.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2372                +s->dsp.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2373                +s->dsp.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2374       }
2375     else
2376         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2377                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2378                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2379 }
2380
2381 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2382     MpegEncContext *s= *(void**)arg;
2383
2384
2385     s->me.pre_pass=1;
2386     s->me.dia_size= s->avctx->pre_dia_size;
2387     s->first_slice_line=1;
2388     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2389         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2390             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2391         }
2392         s->first_slice_line=0;
2393     }
2394
2395     s->me.pre_pass=0;
2396
2397     return 0;
2398 }
2399
2400 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2401     MpegEncContext *s= *(void**)arg;
2402
2403     s->me.dia_size= s->avctx->dia_size;
2404     s->first_slice_line=1;
2405     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2406         s->mb_x=0; //for block init below
2407         ff_init_block_index(s);
2408         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2409             s->block_index[0]+=2;
2410             s->block_index[1]+=2;
2411             s->block_index[2]+=2;
2412             s->block_index[3]+=2;
2413
2414             /* compute motion vector & mb_type and store in context */
2415             if(s->pict_type==AV_PICTURE_TYPE_B)
2416                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2417             else
2418                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2419         }
2420         s->first_slice_line=0;
2421     }
2422     return 0;
2423 }
2424
2425 static int mb_var_thread(AVCodecContext *c, void *arg){
2426     MpegEncContext *s= *(void**)arg;
2427     int mb_x, mb_y;
2428
2429     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2430         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2431             int xx = mb_x * 16;
2432             int yy = mb_y * 16;
2433             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2434             int varc;
2435             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2436
2437             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2438                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2439
2440             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2441             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2442             s->me.mb_var_sum_temp    += varc;
2443         }
2444     }
2445     return 0;
2446 }
2447
2448 static void write_slice_end(MpegEncContext *s){
2449     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2450         if(s->partitioned_frame){
2451             ff_mpeg4_merge_partitions(s);
2452         }
2453
2454         ff_mpeg4_stuffing(&s->pb);
2455     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2456         ff_mjpeg_encode_stuffing(&s->pb);
2457     }
2458
2459     avpriv_align_put_bits(&s->pb);
2460     flush_put_bits(&s->pb);
2461
2462     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2463         s->misc_bits+= get_bits_diff(s);
2464 }
2465
2466 static void write_mb_info(MpegEncContext *s)
2467 {
2468     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2469     int offset = put_bits_count(&s->pb);
2470     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2471     int gobn = s->mb_y / s->gob_index;
2472     int pred_x, pred_y;
2473     if (CONFIG_H263_ENCODER)
2474         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2475     bytestream_put_le32(&ptr, offset);
2476     bytestream_put_byte(&ptr, s->qscale);
2477     bytestream_put_byte(&ptr, gobn);
2478     bytestream_put_le16(&ptr, mba);
2479     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2480     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2481     /* 4MV not implemented */
2482     bytestream_put_byte(&ptr, 0); /* hmv2 */
2483     bytestream_put_byte(&ptr, 0); /* vmv2 */
2484 }
2485
2486 static void update_mb_info(MpegEncContext *s, int startcode)
2487 {
2488     if (!s->mb_info)
2489         return;
2490     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2491         s->mb_info_size += 12;
2492         s->prev_mb_info = s->last_mb_info;
2493     }
2494     if (startcode) {
2495         s->prev_mb_info = put_bits_count(&s->pb)/8;
2496         /* This might have incremented mb_info_size above, and we return without
2497          * actually writing any info into that slot yet. But in that case,
2498          * this will be called again at the start of the after writing the
2499          * start code, actually writing the mb info. */
2500         return;
2501     }
2502
2503     s->last_mb_info = put_bits_count(&s->pb)/8;
2504     if (!s->mb_info_size)
2505         s->mb_info_size += 12;
2506     write_mb_info(s);
2507 }
2508
2509 static int encode_thread(AVCodecContext *c, void *arg){
2510     MpegEncContext *s= *(void**)arg;
2511     int mb_x, mb_y, pdif = 0;
2512     int chr_h= 16>>s->chroma_y_shift;
2513     int i, j;
2514     MpegEncContext best_s, backup_s;
2515     uint8_t bit_buf[2][MAX_MB_BYTES];
2516     uint8_t bit_buf2[2][MAX_MB_BYTES];
2517     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2518     PutBitContext pb[2], pb2[2], tex_pb[2];
2519
2520     for(i=0; i<2; i++){
2521         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2522         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2523         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2524     }
2525
2526     s->last_bits= put_bits_count(&s->pb);
2527     s->mv_bits=0;
2528     s->misc_bits=0;
2529     s->i_tex_bits=0;
2530     s->p_tex_bits=0;
2531     s->i_count=0;
2532     s->f_count=0;
2533     s->b_count=0;
2534     s->skip_count=0;
2535
2536     for(i=0; i<3; i++){
2537         /* init last dc values */
2538         /* note: quant matrix value (8) is implied here */
2539         s->last_dc[i] = 128 << s->intra_dc_precision;
2540
2541         s->current_picture.f->error[i] = 0;
2542     }
2543     s->mb_skip_run = 0;
2544     memset(s->last_mv, 0, sizeof(s->last_mv));
2545
2546     s->last_mv_dir = 0;
2547
2548     switch(s->codec_id){
2549     case AV_CODEC_ID_H263:
2550     case AV_CODEC_ID_H263P:
2551     case AV_CODEC_ID_FLV1:
2552         if (CONFIG_H263_ENCODER)
2553             s->gob_index = ff_h263_get_gob_height(s);
2554         break;
2555     case AV_CODEC_ID_MPEG4:
2556         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2557             ff_mpeg4_init_partitions(s);
2558         break;
2559     }
2560
2561     s->resync_mb_x=0;
2562     s->resync_mb_y=0;
2563     s->first_slice_line = 1;
2564     s->ptr_lastgob = s->pb.buf;
2565     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2566         s->mb_x=0;
2567         s->mb_y= mb_y;
2568
2569         ff_set_qscale(s, s->qscale);
2570         ff_init_block_index(s);
2571
2572         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2573             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2574             int mb_type= s->mb_type[xy];
2575 //            int d;
2576             int dmin= INT_MAX;
2577             int dir;
2578
2579             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2580                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2581                 return -1;
2582             }
2583             if(s->data_partitioning){
2584                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2585                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2586                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2587                     return -1;
2588                 }
2589             }
2590
2591             s->mb_x = mb_x;
2592             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2593             ff_update_block_index(s);
2594
2595             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2596                 ff_h261_reorder_mb_index(s);
2597                 xy= s->mb_y*s->mb_stride + s->mb_x;
2598                 mb_type= s->mb_type[xy];
2599             }
2600
2601             /* write gob / video packet header  */
2602             if(s->rtp_mode){
2603                 int current_packet_size, is_gob_start;
2604
2605                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2606
2607                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2608
2609                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2610
2611                 switch(s->codec_id){
2612                 case AV_CODEC_ID_H263:
2613                 case AV_CODEC_ID_H263P:
2614                     if(!s->h263_slice_structured)
2615                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2616                     break;
2617                 case AV_CODEC_ID_MPEG2VIDEO:
2618                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2619                 case AV_CODEC_ID_MPEG1VIDEO:
2620                     if(s->mb_skip_run) is_gob_start=0;
2621                     break;
2622                 }
2623
2624                 if(is_gob_start){
2625                     if(s->start_mb_y != mb_y || mb_x!=0){
2626                         write_slice_end(s);
2627
2628                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2629                             ff_mpeg4_init_partitions(s);
2630                         }
2631                     }
2632
2633                     assert((put_bits_count(&s->pb)&7) == 0);
2634                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2635
2636                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2637                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2638                         int d = 100 / s->error_rate;
2639                         if(r % d == 0){
2640                             current_packet_size=0;
2641                             s->pb.buf_ptr= s->ptr_lastgob;
2642                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2643                         }
2644                     }
2645
2646                     if (s->avctx->rtp_callback){
2647                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2648                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2649                     }
2650                     update_mb_info(s, 1);
2651
2652                     switch(s->codec_id){
2653                     case AV_CODEC_ID_MPEG4:
2654                         if (CONFIG_MPEG4_ENCODER) {
2655                             ff_mpeg4_encode_video_packet_header(s);
2656                             ff_mpeg4_clean_buffers(s);
2657                         }
2658                     break;
2659                     case AV_CODEC_ID_MPEG1VIDEO:
2660                     case AV_CODEC_ID_MPEG2VIDEO:
2661                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2662                             ff_mpeg1_encode_slice_header(s);
2663                             ff_mpeg1_clean_buffers(s);
2664                         }
2665                     break;
2666                     case AV_CODEC_ID_H263:
2667                     case AV_CODEC_ID_H263P:
2668                         if (CONFIG_H263_ENCODER)
2669                             ff_h263_encode_gob_header(s, mb_y);
2670                     break;
2671                     }
2672
2673                     if(s->flags&CODEC_FLAG_PASS1){
2674                         int bits= put_bits_count(&s->pb);
2675                         s->misc_bits+= bits - s->last_bits;
2676                         s->last_bits= bits;
2677                     }
2678
2679                     s->ptr_lastgob += current_packet_size;
2680                     s->first_slice_line=1;
2681                     s->resync_mb_x=mb_x;
2682                     s->resync_mb_y=mb_y;
2683                 }
2684             }
2685
2686             if(  (s->resync_mb_x   == s->mb_x)
2687                && s->resync_mb_y+1 == s->mb_y){
2688                 s->first_slice_line=0;
2689             }
2690
2691             s->mb_skipped=0;
2692             s->dquant=0; //only for QP_RD
2693
2694             update_mb_info(s, 0);
2695
2696             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2697                 int next_block=0;
2698                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2699
2700                 copy_context_before_encode(&backup_s, s, -1);
2701                 backup_s.pb= s->pb;
2702                 best_s.data_partitioning= s->data_partitioning;
2703                 best_s.partitioned_frame= s->partitioned_frame;
2704                 if(s->data_partitioning){
2705                     backup_s.pb2= s->pb2;
2706                     backup_s.tex_pb= s->tex_pb;
2707                 }
2708
2709                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2710                     s->mv_dir = MV_DIR_FORWARD;
2711                     s->mv_type = MV_TYPE_16X16;
2712                     s->mb_intra= 0;
2713                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2714                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2715                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2716                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2717                 }
2718                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2719                     s->mv_dir = MV_DIR_FORWARD;
2720                     s->mv_type = MV_TYPE_FIELD;
2721                     s->mb_intra= 0;
2722                     for(i=0; i<2; i++){
2723                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2724                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2725                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2726                     }
2727                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2728                                  &dmin, &next_block, 0, 0);
2729                 }
2730                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2731                     s->mv_dir = MV_DIR_FORWARD;
2732                     s->mv_type = MV_TYPE_16X16;
2733                     s->mb_intra= 0;
2734                     s->mv[0][0][0] = 0;
2735                     s->mv[0][0][1] = 0;
2736                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2737                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2738                 }
2739                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2740                     s->mv_dir = MV_DIR_FORWARD;
2741                     s->mv_type = MV_TYPE_8X8;
2742                     s->mb_intra= 0;
2743                     for(i=0; i<4; i++){
2744                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2745                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2746                     }
2747                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2748                                  &dmin, &next_block, 0, 0);
2749                 }
2750                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2751                     s->mv_dir = MV_DIR_FORWARD;
2752                     s->mv_type = MV_TYPE_16X16;
2753                     s->mb_intra= 0;
2754                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2755                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2756                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2757                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2758                 }
2759                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2760                     s->mv_dir = MV_DIR_BACKWARD;
2761                     s->mv_type = MV_TYPE_16X16;
2762                     s->mb_intra= 0;
2763                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2764                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2765                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2766                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2767                 }
2768                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2769                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2770                     s->mv_type = MV_TYPE_16X16;
2771                     s->mb_intra= 0;
2772                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2773                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2774                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2775                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2776                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2777                                  &dmin, &next_block, 0, 0);
2778                 }
2779                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2780                     s->mv_dir = MV_DIR_FORWARD;
2781                     s->mv_type = MV_TYPE_FIELD;
2782                     s->mb_intra= 0;
2783                     for(i=0; i<2; i++){
2784                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2785                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2786                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2787                     }
2788                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2789                                  &dmin, &next_block, 0, 0);
2790                 }
2791                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2792                     s->mv_dir = MV_DIR_BACKWARD;
2793                     s->mv_type = MV_TYPE_FIELD;
2794                     s->mb_intra= 0;
2795                     for(i=0; i<2; i++){
2796                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2797                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2798                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2799                     }
2800                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2801                                  &dmin, &next_block, 0, 0);
2802                 }
2803                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2804                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2805                     s->mv_type = MV_TYPE_FIELD;
2806                     s->mb_intra= 0;
2807                     for(dir=0; dir<2; dir++){
2808                         for(i=0; i<2; i++){
2809                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2810                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2811                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2812                         }
2813                     }
2814                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2815                                  &dmin, &next_block, 0, 0);
2816                 }
2817                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2818                     s->mv_dir = 0;
2819                     s->mv_type = MV_TYPE_16X16;
2820                     s->mb_intra= 1;
2821                     s->mv[0][0][0] = 0;
2822                     s->mv[0][0][1] = 0;
2823                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2824                                  &dmin, &next_block, 0, 0);
2825                     if(s->h263_pred || s->h263_aic){
2826                         if(best_s.mb_intra)
2827                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2828                         else
2829                             ff_clean_intra_table_entries(s); //old mode?
2830                     }
2831                 }
2832
2833                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2834                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2835                         const int last_qp= backup_s.qscale;
2836                         int qpi, qp, dc[6];
2837                         int16_t ac[6][16];
2838                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2839                         static const int dquant_tab[4]={-1,1,-2,2};
2840
2841                         assert(backup_s.dquant == 0);
2842
2843                         //FIXME intra
2844                         s->mv_dir= best_s.mv_dir;
2845                         s->mv_type = MV_TYPE_16X16;
2846                         s->mb_intra= best_s.mb_intra;
2847                         s->mv[0][0][0] = best_s.mv[0][0][0];
2848                         s->mv[0][0][1] = best_s.mv[0][0][1];
2849                         s->mv[1][0][0] = best_s.mv[1][0][0];
2850                         s->mv[1][0][1] = best_s.mv[1][0][1];
2851
2852                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2853                         for(; qpi<4; qpi++){
2854                             int dquant= dquant_tab[qpi];
2855                             qp= last_qp + dquant;
2856                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2857                                 continue;
2858                             backup_s.dquant= dquant;
2859                             if(s->mb_intra && s->dc_val[0]){
2860                                 for(i=0; i<6; i++){
2861                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2862                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2863                                 }
2864                             }
2865
2866                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2867                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2868                             if(best_s.qscale != qp){
2869                                 if(s->mb_intra && s->dc_val[0]){
2870                                     for(i=0; i<6; i++){
2871                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2872                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2873                                     }
2874                                 }
2875                             }
2876                         }
2877                     }
2878                 }
2879                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2880                     int mx= s->b_direct_mv_table[xy][0];
2881                     int my= s->b_direct_mv_table[xy][1];
2882
2883                     backup_s.dquant = 0;
2884                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2885                     s->mb_intra= 0;
2886                     ff_mpeg4_set_direct_mv(s, mx, my);
2887                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2888                                  &dmin, &next_block, mx, my);
2889                 }
2890                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2891                     backup_s.dquant = 0;
2892                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2893                     s->mb_intra= 0;
2894                     ff_mpeg4_set_direct_mv(s, 0, 0);
2895                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2896                                  &dmin, &next_block, 0, 0);
2897                 }
2898                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2899                     int coded=0;
2900                     for(i=0; i<6; i++)
2901                         coded |= s->block_last_index[i];
2902                     if(coded){
2903                         int mx,my;
2904                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2905                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2906                             mx=my=0; //FIXME find the one we actually used
2907                             ff_mpeg4_set_direct_mv(s, mx, my);
2908                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2909                             mx= s->mv[1][0][0];
2910                             my= s->mv[1][0][1];
2911                         }else{
2912                             mx= s->mv[0][0][0];
2913                             my= s->mv[0][0][1];
2914                         }
2915
2916                         s->mv_dir= best_s.mv_dir;
2917                         s->mv_type = best_s.mv_type;
2918                         s->mb_intra= 0;
2919 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2920                         s->mv[0][0][1] = best_s.mv[0][0][1];
2921                         s->mv[1][0][0] = best_s.mv[1][0][0];
2922                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2923                         backup_s.dquant= 0;
2924                         s->skipdct=1;
2925                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2926                                         &dmin, &next_block, mx, my);
2927                         s->skipdct=0;
2928                     }
2929                 }
2930
2931                 s->current_picture.qscale_table[xy] = best_s.qscale;
2932
2933                 copy_context_after_encode(s, &best_s, -1);
2934
2935                 pb_bits_count= put_bits_count(&s->pb);
2936                 flush_put_bits(&s->pb);
2937                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2938                 s->pb= backup_s.pb;
2939
2940                 if(s->data_partitioning){
2941                     pb2_bits_count= put_bits_count(&s->pb2);
2942                     flush_put_bits(&s->pb2);
2943                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2944                     s->pb2= backup_s.pb2;
2945
2946                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2947                     flush_put_bits(&s->tex_pb);
2948                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2949                     s->tex_pb= backup_s.tex_pb;
2950                 }
2951                 s->last_bits= put_bits_count(&s->pb);
2952
2953                 if (CONFIG_H263_ENCODER &&
2954                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2955                     ff_h263_update_motion_val(s);
2956
2957                 if(next_block==0){ //FIXME 16 vs linesize16
2958                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2959                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2960                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2961                 }
2962
2963                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2964                     ff_MPV_decode_mb(s, s->block);
2965             } else {
2966                 int motion_x = 0, motion_y = 0;
2967                 s->mv_type=MV_TYPE_16X16;
2968                 // only one MB-Type possible
2969
2970                 switch(mb_type){
2971                 case CANDIDATE_MB_TYPE_INTRA:
2972                     s->mv_dir = 0;
2973                     s->mb_intra= 1;
2974                     motion_x= s->mv[0][0][0] = 0;
2975                     motion_y= s->mv[0][0][1] = 0;
2976                     break;
2977                 case CANDIDATE_MB_TYPE_INTER:
2978                     s->mv_dir = MV_DIR_FORWARD;
2979                     s->mb_intra= 0;
2980                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2981                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2982                     break;
2983                 case CANDIDATE_MB_TYPE_INTER_I:
2984                     s->mv_dir = MV_DIR_FORWARD;
2985                     s->mv_type = MV_TYPE_FIELD;
2986                     s->mb_intra= 0;
2987                     for(i=0; i<2; i++){
2988                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2989                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2990                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2991                     }
2992                     break;
2993                 case CANDIDATE_MB_TYPE_INTER4V:
2994                     s->mv_dir = MV_DIR_FORWARD;
2995                     s->mv_type = MV_TYPE_8X8;
2996                     s->mb_intra= 0;
2997                     for(i=0; i<4; i++){
2998                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2999                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3000                     }
3001                     break;
3002                 case CANDIDATE_MB_TYPE_DIRECT:
3003                     if (CONFIG_MPEG4_ENCODER) {
3004                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3005                         s->mb_intra= 0;
3006                         motion_x=s->b_direct_mv_table[xy][0];
3007                         motion_y=s->b_direct_mv_table[xy][1];
3008                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3009                     }
3010                     break;
3011                 case CANDIDATE_MB_TYPE_DIRECT0:
3012                     if (CONFIG_MPEG4_ENCODER) {
3013                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3014                         s->mb_intra= 0;
3015                         ff_mpeg4_set_direct_mv(s, 0, 0);
3016                     }
3017                     break;
3018                 case CANDIDATE_MB_TYPE_BIDIR:
3019                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3020                     s->mb_intra= 0;
3021                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3022                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3023                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3024                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3025                     break;
3026                 case CANDIDATE_MB_TYPE_BACKWARD:
3027                     s->mv_dir = MV_DIR_BACKWARD;
3028                     s->mb_intra= 0;
3029                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3030                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3031                     break;
3032                 case CANDIDATE_MB_TYPE_FORWARD:
3033                     s->mv_dir = MV_DIR_FORWARD;
3034                     s->mb_intra= 0;
3035                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3036                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3037                     break;
3038                 case CANDIDATE_MB_TYPE_FORWARD_I:
3039                     s->mv_dir = MV_DIR_FORWARD;
3040                     s->mv_type = MV_TYPE_FIELD;
3041                     s->mb_intra= 0;
3042                     for(i=0; i<2; i++){
3043                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3044                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3045                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3046                     }
3047                     break;
3048                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3049                     s->mv_dir = MV_DIR_BACKWARD;
3050                     s->mv_type = MV_TYPE_FIELD;
3051                     s->mb_intra= 0;
3052                     for(i=0; i<2; i++){
3053                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3054                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3055                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3056                     }
3057                     break;
3058                 case CANDIDATE_MB_TYPE_BIDIR_I:
3059                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3060                     s->mv_type = MV_TYPE_FIELD;
3061                     s->mb_intra= 0;
3062                     for(dir=0; dir<2; dir++){
3063                         for(i=0; i<2; i++){
3064                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3065                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3066                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3067                         }
3068                     }
3069                     break;
3070                 default:
3071                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3072                 }
3073
3074                 encode_mb(s, motion_x, motion_y);
3075
3076                 // RAL: Update last macroblock type
3077                 s->last_mv_dir = s->mv_dir;
3078
3079                 if (CONFIG_H263_ENCODER &&
3080                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3081                     ff_h263_update_motion_val(s);
3082
3083                 ff_MPV_decode_mb(s, s->block);
3084             }
3085
3086             /* clean the MV table in IPS frames for direct mode in B frames */
3087             if(s->mb_intra /* && I,P,S_TYPE */){
3088                 s->p_mv_table[xy][0]=0;
3089                 s->p_mv_table[xy][1]=0;
3090             }
3091
3092             if(s->flags&CODEC_FLAG_PSNR){
3093                 int w= 16;
3094                 int h= 16;
3095
3096                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3097                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3098
3099                 s->current_picture.f->error[0] += sse(
3100                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3101                     s->dest[0], w, h, s->linesize);
3102                 s->current_picture.f->error[1] += sse(
3103                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3104                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3105                 s->current_picture.f->error[2] += sse(
3106                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3107                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3108             }
3109             if(s->loop_filter){
3110                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3111                     ff_h263_loop_filter(s);
3112             }
3113             av_dlog(s->avctx, "MB %d %d bits\n",
3114                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3115         }
3116     }
3117
3118     //not beautiful here but we must write it before flushing so it has to be here
3119     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3120         ff_msmpeg4_encode_ext_header(s);
3121
3122     write_slice_end(s);
3123
3124     /* Send the last GOB if RTP */
3125     if (s->avctx->rtp_callback) {
3126         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3127         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3128         /* Call the RTP callback to send the last GOB */
3129         emms_c();
3130         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3131     }
3132
3133     return 0;
3134 }
3135
3136 #define MERGE(field) dst->field += src->field; src->field=0
3137 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3138     MERGE(me.scene_change_score);
3139     MERGE(me.mc_mb_var_sum_temp);
3140     MERGE(me.mb_var_sum_temp);
3141 }
3142
3143 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3144     int i;
3145
3146     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3147     MERGE(dct_count[1]);
3148     MERGE(mv_bits);
3149     MERGE(i_tex_bits);
3150     MERGE(p_tex_bits);
3151     MERGE(i_count);
3152     MERGE(f_count);
3153     MERGE(b_count);
3154     MERGE(skip_count);
3155     MERGE(misc_bits);
3156     MERGE(er.error_count);
3157     MERGE(padding_bug_score);
3158     MERGE(current_picture.f->error[0]);
3159     MERGE(current_picture.f->error[1]);
3160     MERGE(current_picture.f->error[2]);
3161
3162     if(dst->avctx->noise_reduction){
3163         for(i=0; i<64; i++){
3164             MERGE(dct_error_sum[0][i]);
3165             MERGE(dct_error_sum[1][i]);
3166         }
3167     }
3168
3169     assert(put_bits_count(&src->pb) % 8 ==0);
3170     assert(put_bits_count(&dst->pb) % 8 ==0);
3171     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3172     flush_put_bits(&dst->pb);
3173 }
3174
3175 static int estimate_qp(MpegEncContext *s, int dry_run){
3176     if (s->next_lambda){
3177         s->current_picture_ptr->f->quality =
3178         s->current_picture.f->quality = s->next_lambda;
3179         if(!dry_run) s->next_lambda= 0;
3180     } else if (!s->fixed_qscale) {
3181         s->current_picture_ptr->f->quality =
3182         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3183         if (s->current_picture.f->quality < 0)
3184             return -1;
3185     }
3186
3187     if(s->adaptive_quant){
3188         switch(s->codec_id){
3189         case AV_CODEC_ID_MPEG4:
3190             if (CONFIG_MPEG4_ENCODER)
3191                 ff_clean_mpeg4_qscales(s);
3192             break;
3193         case AV_CODEC_ID_H263:
3194         case AV_CODEC_ID_H263P:
3195         case AV_CODEC_ID_FLV1:
3196             if (CONFIG_H263_ENCODER)
3197                 ff_clean_h263_qscales(s);
3198             break;
3199         default:
3200             ff_init_qscale_tab(s);
3201         }
3202
3203         s->lambda= s->lambda_table[0];
3204         //FIXME broken
3205     }else
3206         s->lambda = s->current_picture.f->quality;
3207     update_qscale(s);
3208     return 0;
3209 }
3210
3211 /* must be called before writing the header */
3212 static void set_frame_distances(MpegEncContext * s){
3213     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3214     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3215
3216     if(s->pict_type==AV_PICTURE_TYPE_B){
3217         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3218         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3219     }else{
3220         s->pp_time= s->time - s->last_non_b_time;
3221         s->last_non_b_time= s->time;
3222         assert(s->picture_number==0 || s->pp_time > 0);
3223     }
3224 }
3225
3226 static int encode_picture(MpegEncContext *s, int picture_number)
3227 {
3228     int i, ret;
3229     int bits;
3230     int context_count = s->slice_context_count;
3231
3232     s->picture_number = picture_number;
3233
3234     /* Reset the average MB variance */
3235     s->me.mb_var_sum_temp    =
3236     s->me.mc_mb_var_sum_temp = 0;
3237
3238     /* we need to initialize some time vars before we can encode b-frames */
3239     // RAL: Condition added for MPEG1VIDEO
3240     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3241         set_frame_distances(s);
3242     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3243         ff_set_mpeg4_time(s);
3244
3245     s->me.scene_change_score=0;
3246
3247 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3248
3249     if(s->pict_type==AV_PICTURE_TYPE_I){
3250         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3251         else                        s->no_rounding=0;
3252     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3253         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3254             s->no_rounding ^= 1;
3255     }
3256
3257     if(s->flags & CODEC_FLAG_PASS2){
3258         if (estimate_qp(s,1) < 0)
3259             return -1;
3260         ff_get_2pass_fcode(s);
3261     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3262         if(s->pict_type==AV_PICTURE_TYPE_B)
3263             s->lambda= s->last_lambda_for[s->pict_type];
3264         else
3265             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3266         update_qscale(s);
3267     }
3268
3269     s->mb_intra=0; //for the rate distortion & bit compare functions
3270     for(i=1; i<context_count; i++){
3271         ret = ff_update_duplicate_context(s->thread_context[i], s);
3272         if (ret < 0)
3273             return ret;
3274     }
3275
3276     if(ff_init_me(s)<0)
3277         return -1;
3278
3279     /* Estimate motion for every MB */
3280     if(s->pict_type != AV_PICTURE_TYPE_I){
3281         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3282         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3283         if (s->pict_type != AV_PICTURE_TYPE_B) {
3284             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3285                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3286             }
3287         }
3288
3289         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3290     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3291         /* I-Frame */
3292         for(i=0; i<s->mb_stride*s->mb_height; i++)
3293             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3294
3295         if(!s->fixed_qscale){
3296             /* finding spatial complexity for I-frame rate control */
3297             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3298         }
3299     }
3300     for(i=1; i<context_count; i++){
3301         merge_context_after_me(s, s->thread_context[i]);
3302     }
3303     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3304     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3305     emms_c();
3306
3307     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3308         s->pict_type= AV_PICTURE_TYPE_I;
3309         for(i=0; i<s->mb_stride*s->mb_height; i++)
3310             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3311         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3312                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3313     }
3314
3315     if(!s->umvplus){
3316         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3317             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3318
3319             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3320                 int a,b;
3321                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3322                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3323                 s->f_code= FFMAX3(s->f_code, a, b);
3324             }
3325
3326             ff_fix_long_p_mvs(s);
3327             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3328             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3329                 int j;
3330                 for(i=0; i<2; i++){
3331                     for(j=0; j<2; j++)
3332                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3333                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3334                 }
3335             }
3336         }
3337
3338         if(s->pict_type==AV_PICTURE_TYPE_B){
3339             int a, b;
3340
3341             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3342             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3343             s->f_code = FFMAX(a, b);
3344
3345             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3346             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3347             s->b_code = FFMAX(a, b);
3348
3349             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3350             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3351             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3352             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3353             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3354                 int dir, j;
3355                 for(dir=0; dir<2; dir++){
3356                     for(i=0; i<2; i++){
3357                         for(j=0; j<2; j++){
3358                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3359                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3360                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3361                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3362                         }
3363                     }
3364                 }
3365             }
3366         }
3367     }
3368
3369     if (estimate_qp(s, 0) < 0)
3370         return -1;
3371
3372     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3373         s->qscale= 3; //reduce clipping problems
3374
3375     if (s->out_format == FMT_MJPEG) {
3376         /* for mjpeg, we do include qscale in the matrix */
3377         for(i=1;i<64;i++){
3378             int j = s->idsp.idct_permutation[i];
3379
3380             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3381         }
3382         s->y_dc_scale_table=
3383         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3384         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3385         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3386                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3387         s->qscale= 8;
3388     }
3389
3390     //FIXME var duplication
3391     s->current_picture_ptr->f->key_frame =
3392     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3393     s->current_picture_ptr->f->pict_type =
3394     s->current_picture.f->pict_type = s->pict_type;
3395
3396     if (s->current_picture.f->key_frame)
3397         s->picture_in_gop_number=0;
3398
3399     s->last_bits= put_bits_count(&s->pb);
3400     switch(s->out_format) {
3401     case FMT_MJPEG:
3402         if (CONFIG_MJPEG_ENCODER)
3403             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3404                                            s->intra_matrix);
3405         break;
3406     case FMT_H261:
3407         if (CONFIG_H261_ENCODER)
3408             ff_h261_encode_picture_header(s, picture_number);
3409         break;
3410     case FMT_H263:
3411         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3412             ff_wmv2_encode_picture_header(s, picture_number);
3413         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3414             ff_msmpeg4_encode_picture_header(s, picture_number);
3415         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3416             ff_mpeg4_encode_picture_header(s, picture_number);
3417         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3418             ff_rv10_encode_picture_header(s, picture_number);
3419         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3420             ff_rv20_encode_picture_header(s, picture_number);
3421         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3422             ff_flv_encode_picture_header(s, picture_number);
3423         else if (CONFIG_H263_ENCODER)
3424             ff_h263_encode_picture_header(s, picture_number);
3425         break;
3426     case FMT_MPEG1:
3427         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3428             ff_mpeg1_encode_picture_header(s, picture_number);
3429         break;
3430     default:
3431         assert(0);
3432     }
3433     bits= put_bits_count(&s->pb);
3434     s->header_bits= bits - s->last_bits;
3435
3436     for(i=1; i<context_count; i++){
3437         update_duplicate_context_after_me(s->thread_context[i], s);
3438     }
3439     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3440     for(i=1; i<context_count; i++){
3441         merge_context_after_encode(s, s->thread_context[i]);
3442     }
3443     emms_c();
3444     return 0;
3445 }
3446
3447 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3448     const int intra= s->mb_intra;
3449     int i;
3450
3451     s->dct_count[intra]++;
3452
3453     for(i=0; i<64; i++){
3454         int level= block[i];
3455
3456         if(level){
3457             if(level>0){
3458                 s->dct_error_sum[intra][i] += level;
3459                 level -= s->dct_offset[intra][i];
3460                 if(level<0) level=0;
3461             }else{
3462                 s->dct_error_sum[intra][i] -= level;
3463                 level += s->dct_offset[intra][i];
3464                 if(level>0) level=0;
3465             }
3466             block[i]= level;
3467         }
3468     }
3469 }
3470
3471 static int dct_quantize_trellis_c(MpegEncContext *s,
3472                                   int16_t *block, int n,
3473                                   int qscale, int *overflow){
3474     const int *qmat;
3475     const uint8_t *scantable= s->intra_scantable.scantable;
3476     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3477     int max=0;
3478     unsigned int threshold1, threshold2;
3479     int bias=0;
3480     int run_tab[65];
3481     int level_tab[65];
3482     int score_tab[65];
3483     int survivor[65];
3484     int survivor_count;
3485     int last_run=0;
3486     int last_level=0;
3487     int last_score= 0;
3488     int last_i;
3489     int coeff[2][64];
3490     int coeff_count[64];
3491     int qmul, qadd, start_i, last_non_zero, i, dc;
3492     const int esc_length= s->ac_esc_length;
3493     uint8_t * length;
3494     uint8_t * last_length;
3495     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3496
3497     s->fdsp.fdct(block);
3498
3499     if(s->dct_error_sum)
3500         s->denoise_dct(s, block);
3501     qmul= qscale*16;
3502     qadd= ((qscale-1)|1)*8;
3503
3504     if (s->mb_intra) {
3505         int q;
3506         if (!s->h263_aic) {
3507             if (n < 4)
3508                 q = s->y_dc_scale;
3509             else
3510                 q = s->c_dc_scale;
3511             q = q << 3;
3512         } else{
3513             /* For AIC we skip quant/dequant of INTRADC */
3514             q = 1 << 3;
3515             qadd=0;
3516         }
3517
3518         /* note: block[0] is assumed to be positive */
3519         block[0] = (block[0] + (q >> 1)) / q;
3520         start_i = 1;
3521         last_non_zero = 0;
3522         qmat = s->q_intra_matrix[qscale];
3523         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3524             bias= 1<<(QMAT_SHIFT-1);
3525         length     = s->intra_ac_vlc_length;
3526         last_length= s->intra_ac_vlc_last_length;
3527     } else {
3528         start_i = 0;
3529         last_non_zero = -1;
3530         qmat = s->q_inter_matrix[qscale];
3531         length     = s->inter_ac_vlc_length;
3532         last_length= s->inter_ac_vlc_last_length;
3533     }
3534     last_i= start_i;
3535
3536     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3537     threshold2= (threshold1<<1);
3538
3539     for(i=63; i>=start_i; i--) {
3540         const int j = scantable[i];
3541         int level = block[j] * qmat[j];
3542
3543         if(((unsigned)(level+threshold1))>threshold2){
3544             last_non_zero = i;
3545             break;
3546         }
3547     }
3548
3549     for(i=start_i; i<=last_non_zero; i++) {
3550         const int j = scantable[i];
3551         int level = block[j] * qmat[j];
3552
3553 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3554 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3555         if(((unsigned)(level+threshold1))>threshold2){
3556             if(level>0){
3557                 level= (bias + level)>>QMAT_SHIFT;
3558                 coeff[0][i]= level;
3559                 coeff[1][i]= level-1;
3560 //                coeff[2][k]= level-2;
3561             }else{
3562                 level= (bias - level)>>QMAT_SHIFT;
3563                 coeff[0][i]= -level;
3564                 coeff[1][i]= -level+1;
3565 //                coeff[2][k]= -level+2;
3566             }
3567             coeff_count[i]= FFMIN(level, 2);
3568             assert(coeff_count[i]);
3569             max |=level;
3570         }else{
3571             coeff[0][i]= (level>>31)|1;
3572             coeff_count[i]= 1;
3573         }
3574     }
3575
3576     *overflow= s->max_qcoeff < max; //overflow might have happened
3577
3578     if(last_non_zero < start_i){
3579         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3580         return last_non_zero;
3581     }
3582
3583     score_tab[start_i]= 0;
3584     survivor[0]= start_i;
3585     survivor_count= 1;
3586
3587     for(i=start_i; i<=last_non_zero; i++){
3588         int level_index, j, zero_distortion;
3589         int dct_coeff= FFABS(block[ scantable[i] ]);
3590         int best_score=256*256*256*120;
3591
3592         if (s->fdsp.fdct == ff_fdct_ifast)
3593             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3594         zero_distortion= dct_coeff*dct_coeff;
3595
3596         for(level_index=0; level_index < coeff_count[i]; level_index++){
3597             int distortion;
3598             int level= coeff[level_index][i];
3599             const int alevel= FFABS(level);
3600             int unquant_coeff;
3601
3602             assert(level);
3603
3604             if(s->out_format == FMT_H263){
3605                 unquant_coeff= alevel*qmul + qadd;
3606             }else{ //MPEG1
3607                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3608                 if(s->mb_intra){
3609                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3610                         unquant_coeff =   (unquant_coeff - 1) | 1;
3611                 }else{
3612                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3613                         unquant_coeff =   (unquant_coeff - 1) | 1;
3614                 }
3615                 unquant_coeff<<= 3;
3616             }
3617
3618             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3619             level+=64;
3620             if((level&(~127)) == 0){
3621                 for(j=survivor_count-1; j>=0; j--){
3622                     int run= i - survivor[j];
3623                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3624                     score += score_tab[i-run];
3625
3626                     if(score < best_score){
3627                         best_score= score;
3628                         run_tab[i+1]= run;
3629                         level_tab[i+1]= level-64;
3630                     }
3631                 }
3632
3633                 if(s->out_format == FMT_H263){
3634                     for(j=survivor_count-1; j>=0; j--){
3635                         int run= i - survivor[j];
3636                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3637                         score += score_tab[i-run];
3638                         if(score < last_score){
3639                             last_score= score;
3640                             last_run= run;
3641                             last_level= level-64;
3642                             last_i= i+1;
3643                         }
3644                     }
3645                 }
3646             }else{
3647                 distortion += esc_length*lambda;
3648                 for(j=survivor_count-1; j>=0; j--){
3649                     int run= i - survivor[j];
3650                     int score= distortion + score_tab[i-run];
3651
3652                     if(score < best_score){
3653                         best_score= score;
3654                         run_tab[i+1]= run;
3655                         level_tab[i+1]= level-64;
3656                     }
3657                 }
3658
3659                 if(s->out_format == FMT_H263){
3660                   for(j=survivor_count-1; j>=0; j--){
3661                         int run= i - survivor[j];
3662                         int score= distortion + score_tab[i-run];
3663                         if(score < last_score){
3664                             last_score= score;
3665                             last_run= run;
3666                             last_level= level-64;
3667                             last_i= i+1;
3668                         }
3669                     }
3670                 }
3671             }
3672         }
3673
3674         score_tab[i+1]= best_score;
3675
3676         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3677         if(last_non_zero <= 27){
3678             for(; survivor_count; survivor_count--){
3679                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3680                     break;
3681             }
3682         }else{
3683             for(; survivor_count; survivor_count--){
3684                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3685                     break;
3686             }
3687         }
3688
3689         survivor[ survivor_count++ ]= i+1;
3690     }
3691
3692     if(s->out_format != FMT_H263){
3693         last_score= 256*256*256*120;
3694         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3695             int score= score_tab[i];
3696             if(i) score += lambda*2; //FIXME exacter?
3697
3698             if(score < last_score){
3699                 last_score= score;
3700                 last_i= i;
3701                 last_level= level_tab[i];
3702                 last_run= run_tab[i];
3703             }
3704         }
3705     }
3706
3707     s->coded_score[n] = last_score;
3708
3709     dc= FFABS(block[0]);
3710     last_non_zero= last_i - 1;
3711     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3712
3713     if(last_non_zero < start_i)
3714         return last_non_zero;
3715
3716     if(last_non_zero == 0 && start_i == 0){
3717         int best_level= 0;
3718         int best_score= dc * dc;
3719
3720         for(i=0; i<coeff_count[0]; i++){
3721             int level= coeff[i][0];
3722             int alevel= FFABS(level);
3723             int unquant_coeff, score, distortion;
3724
3725             if(s->out_format == FMT_H263){
3726                     unquant_coeff= (alevel*qmul + qadd)>>3;
3727             }else{ //MPEG1
3728                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3729                     unquant_coeff =   (unquant_coeff - 1) | 1;
3730             }
3731             unquant_coeff = (unquant_coeff + 4) >> 3;
3732             unquant_coeff<<= 3 + 3;
3733
3734             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3735             level+=64;
3736             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3737             else                    score= distortion + esc_length*lambda;
3738
3739             if(score < best_score){
3740                 best_score= score;
3741                 best_level= level - 64;
3742             }
3743         }
3744         block[0]= best_level;
3745         s->coded_score[n] = best_score - dc*dc;
3746         if(best_level == 0) return -1;
3747         else                return last_non_zero;
3748     }
3749
3750     i= last_i;
3751     assert(last_level);
3752
3753     block[ perm_scantable[last_non_zero] ]= last_level;
3754     i -= last_run + 1;
3755
3756     for(; i>start_i; i -= run_tab[i] + 1){
3757         block[ perm_scantable[i-1] ]= level_tab[i];
3758     }
3759
3760     return last_non_zero;
3761 }
3762
3763 //#define REFINE_STATS 1
3764 static int16_t basis[64][64];
3765
3766 static void build_basis(uint8_t *perm){
3767     int i, j, x, y;
3768     emms_c();
3769     for(i=0; i<8; i++){
3770         for(j=0; j<8; j++){
3771             for(y=0; y<8; y++){
3772                 for(x=0; x<8; x++){
3773                     double s= 0.25*(1<<BASIS_SHIFT);
3774                     int index= 8*i + j;
3775                     int perm_index= perm[index];
3776                     if(i==0) s*= sqrt(0.5);
3777                     if(j==0) s*= sqrt(0.5);
3778                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3779                 }
3780             }
3781         }
3782     }
3783 }
3784
3785 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3786                         int16_t *block, int16_t *weight, int16_t *orig,
3787                         int n, int qscale){
3788     int16_t rem[64];
3789     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3790     const uint8_t *scantable= s->intra_scantable.scantable;
3791     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3792 //    unsigned int threshold1, threshold2;
3793 //    int bias=0;
3794     int run_tab[65];
3795     int prev_run=0;
3796     int prev_level=0;
3797     int qmul, qadd, start_i, last_non_zero, i, dc;
3798     uint8_t * length;
3799     uint8_t * last_length;
3800     int lambda;
3801     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3802 #ifdef REFINE_STATS
3803 static int count=0;
3804 static int after_last=0;
3805 static int to_zero=0;
3806 static int from_zero=0;
3807 static int raise=0;
3808 static int lower=0;
3809 static int messed_sign=0;
3810 #endif
3811
3812     if(basis[0][0] == 0)
3813         build_basis(s->idsp.idct_permutation);
3814
3815     qmul= qscale*2;
3816     qadd= (qscale-1)|1;
3817     if (s->mb_intra) {
3818         if (!s->h263_aic) {
3819             if (n < 4)
3820                 q = s->y_dc_scale;
3821             else
3822                 q = s->c_dc_scale;
3823         } else{
3824             /* For AIC we skip quant/dequant of INTRADC */
3825             q = 1;
3826             qadd=0;
3827         }
3828         q <<= RECON_SHIFT-3;
3829         /* note: block[0] is assumed to be positive */
3830         dc= block[0]*q;
3831 //        block[0] = (block[0] + (q >> 1)) / q;
3832         start_i = 1;
3833 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3834 //            bias= 1<<(QMAT_SHIFT-1);
3835         length     = s->intra_ac_vlc_length;
3836         last_length= s->intra_ac_vlc_last_length;
3837     } else {
3838         dc= 0;
3839         start_i = 0;
3840         length     = s->inter_ac_vlc_length;
3841         last_length= s->inter_ac_vlc_last_length;
3842     }
3843     last_non_zero = s->block_last_index[n];
3844
3845 #ifdef REFINE_STATS
3846 {START_TIMER
3847 #endif
3848     dc += (1<<(RECON_SHIFT-1));
3849     for(i=0; i<64; i++){
3850         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3851     }
3852 #ifdef REFINE_STATS
3853 STOP_TIMER("memset rem[]")}
3854 #endif
3855     sum=0;
3856     for(i=0; i<64; i++){
3857         int one= 36;
3858         int qns=4;
3859         int w;
3860
3861         w= FFABS(weight[i]) + qns*one;
3862         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3863
3864         weight[i] = w;
3865 //        w=weight[i] = (63*qns + (w/2)) / w;
3866
3867         assert(w>0);
3868         assert(w<(1<<6));
3869         sum += w*w;
3870     }
3871     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3872 #ifdef REFINE_STATS
3873 {START_TIMER
3874 #endif
3875     run=0;
3876     rle_index=0;
3877     for(i=start_i; i<=last_non_zero; i++){
3878         int j= perm_scantable[i];
3879         const int level= block[j];
3880         int coeff;
3881
3882         if(level){
3883             if(level<0) coeff= qmul*level - qadd;
3884             else        coeff= qmul*level + qadd;
3885             run_tab[rle_index++]=run;
3886             run=0;
3887
3888             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
3889         }else{
3890             run++;
3891         }
3892     }
3893 #ifdef REFINE_STATS
3894 if(last_non_zero>0){
3895 STOP_TIMER("init rem[]")
3896 }
3897 }
3898
3899 {START_TIMER
3900 #endif
3901     for(;;){
3902         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
3903         int best_coeff=0;
3904         int best_change=0;
3905         int run2, best_unquant_change=0, analyze_gradient;
3906 #ifdef REFINE_STATS
3907 {START_TIMER
3908 #endif
3909         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3910
3911         if(analyze_gradient){
3912 #ifdef REFINE_STATS
3913 {START_TIMER
3914 #endif
3915             for(i=0; i<64; i++){
3916                 int w= weight[i];
3917
3918                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3919             }
3920 #ifdef REFINE_STATS
3921 STOP_TIMER("rem*w*w")}
3922 {START_TIMER
3923 #endif
3924             s->fdsp.fdct(d1);
3925 #ifdef REFINE_STATS
3926 STOP_TIMER("dct")}
3927 #endif
3928         }
3929
3930         if(start_i){
3931             const int level= block[0];
3932             int change, old_coeff;
3933
3934             assert(s->mb_intra);
3935
3936             old_coeff= q*level;
3937
3938             for(change=-1; change<=1; change+=2){
3939                 int new_level= level + change;
3940                 int score, new_coeff;
3941
3942                 new_coeff= q*new_level;
3943                 if(new_coeff >= 2048 || new_coeff < 0)
3944                     continue;
3945
3946                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
3947                                                   new_coeff - old_coeff);
3948                 if(score<best_score){
3949                     best_score= score;
3950                     best_coeff= 0;
3951                     best_change= change;
3952                     best_unquant_change= new_coeff - old_coeff;
3953                 }
3954             }
3955         }
3956
3957         run=0;
3958         rle_index=0;
3959         run2= run_tab[rle_index++];
3960         prev_level=0;
3961         prev_run=0;
3962
3963         for(i=start_i; i<64; i++){
3964             int j= perm_scantable[i];
3965             const int level= block[j];
3966             int change, old_coeff;
3967
3968             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3969                 break;
3970
3971             if(level){
3972                 if(level<0) old_coeff= qmul*level - qadd;
3973                 else        old_coeff= qmul*level + qadd;
3974                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3975             }else{
3976                 old_coeff=0;
3977                 run2--;
3978                 assert(run2>=0 || i >= last_non_zero );
3979             }
3980
3981             for(change=-1; change<=1; change+=2){
3982                 int new_level= level + change;
3983                 int score, new_coeff, unquant_change;
3984
3985                 score=0;
3986                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3987                    continue;
3988
3989                 if(new_level){
3990                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3991                     else            new_coeff= qmul*new_level + qadd;
3992                     if(new_coeff >= 2048 || new_coeff <= -2048)
3993                         continue;
3994                     //FIXME check for overflow
3995
3996                     if(level){
3997                         if(level < 63 && level > -63){
3998                             if(i < last_non_zero)
3999                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4000                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4001                             else
4002                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4003                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4004                         }
4005                     }else{
4006                         assert(FFABS(new_level)==1);
4007
4008                         if(analyze_gradient){
4009                             int g= d1[ scantable[i] ];
4010                             if(g && (g^new_level) >= 0)
4011                                 continue;
4012                         }
4013
4014                         if(i < last_non_zero){
4015                             int next_i= i + run2 + 1;
4016                             int next_level= block[ perm_scantable[next_i] ] + 64;
4017
4018                             if(next_level&(~127))
4019                                 next_level= 0;
4020
4021                             if(next_i < last_non_zero)
4022                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4023                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4024                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4025                             else
4026                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4027                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4028                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4029                         }else{
4030                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4031                             if(prev_level){
4032                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4033                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4034                             }
4035                         }
4036                     }
4037                 }else{
4038                     new_coeff=0;
4039                     assert(FFABS(level)==1);
4040
4041                     if(i < last_non_zero){
4042                         int next_i= i + run2 + 1;
4043                         int next_level= block[ perm_scantable[next_i] ] + 64;
4044
4045                         if(next_level&(~127))
4046                             next_level= 0;
4047
4048                         if(next_i < last_non_zero)
4049                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4050                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4051                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4052                         else
4053                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4054                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4055                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4056                     }else{
4057                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4058                         if(prev_level){
4059                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4060                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4061                         }
4062                     }
4063                 }
4064
4065                 score *= lambda;
4066
4067                 unquant_change= new_coeff - old_coeff;
4068                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4069
4070                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4071                                                    unquant_change);
4072                 if(score<best_score){
4073                     best_score= score;
4074                     best_coeff= i;
4075                     best_change= change;
4076                     best_unquant_change= unquant_change;
4077                 }
4078             }
4079             if(level){
4080                 prev_level= level + 64;
4081                 if(prev_level&(~127))
4082                     prev_level= 0;
4083                 prev_run= run;
4084                 run=0;
4085             }else{
4086                 run++;
4087             }
4088         }
4089 #ifdef REFINE_STATS
4090 STOP_TIMER("iterative step")}
4091 #endif
4092
4093         if(best_change){
4094             int j= perm_scantable[ best_coeff ];
4095
4096             block[j] += best_change;
4097
4098             if(best_coeff > last_non_zero){
4099                 last_non_zero= best_coeff;
4100                 assert(block[j]);
4101 #ifdef REFINE_STATS
4102 after_last++;
4103 #endif
4104             }else{
4105 #ifdef REFINE_STATS
4106 if(block[j]){
4107     if(block[j] - best_change){
4108         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4109             raise++;
4110         }else{
4111             lower++;
4112         }
4113     }else{
4114         from_zero++;
4115     }
4116 }else{
4117     to_zero++;
4118 }
4119 #endif
4120                 for(; last_non_zero>=start_i; last_non_zero--){
4121                     if(block[perm_scantable[last_non_zero]])
4122                         break;
4123                 }
4124             }
4125 #ifdef REFINE_STATS
4126 count++;
4127 if(256*256*256*64 % count == 0){
4128     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4129 }
4130 #endif
4131             run=0;
4132             rle_index=0;
4133             for(i=start_i; i<=last_non_zero; i++){
4134                 int j= perm_scantable[i];
4135                 const int level= block[j];
4136
4137                  if(level){
4138                      run_tab[rle_index++]=run;
4139                      run=0;
4140                  }else{
4141                      run++;
4142                  }
4143             }
4144
4145             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4146         }else{
4147             break;
4148         }
4149     }
4150 #ifdef REFINE_STATS
4151 if(last_non_zero>0){
4152 STOP_TIMER("iterative search")
4153 }
4154 }
4155 #endif
4156
4157     return last_non_zero;
4158 }
4159
4160 int ff_dct_quantize_c(MpegEncContext *s,
4161                         int16_t *block, int n,
4162                         int qscale, int *overflow)
4163 {
4164     int i, j, level, last_non_zero, q, start_i;
4165     const int *qmat;
4166     const uint8_t *scantable= s->intra_scantable.scantable;
4167     int bias;
4168     int max=0;
4169     unsigned int threshold1, threshold2;
4170
4171     s->fdsp.fdct(block);
4172
4173     if(s->dct_error_sum)
4174         s->denoise_dct(s, block);
4175
4176     if (s->mb_intra) {
4177         if (!s->h263_aic) {
4178             if (n < 4)
4179                 q = s->y_dc_scale;
4180             else
4181                 q = s->c_dc_scale;
4182             q = q << 3;
4183         } else
4184             /* For AIC we skip quant/dequant of INTRADC */
4185             q = 1 << 3;
4186
4187         /* note: block[0] is assumed to be positive */
4188         block[0] = (block[0] + (q >> 1)) / q;
4189         start_i = 1;
4190         last_non_zero = 0;
4191         qmat = s->q_intra_matrix[qscale];
4192         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4193     } else {
4194         start_i = 0;
4195         last_non_zero = -1;
4196         qmat = s->q_inter_matrix[qscale];
4197         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4198     }
4199     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4200     threshold2= (threshold1<<1);
4201     for(i=63;i>=start_i;i--) {
4202         j = scantable[i];
4203         level = block[j] * qmat[j];
4204
4205         if(((unsigned)(level+threshold1))>threshold2){
4206             last_non_zero = i;
4207             break;
4208         }else{
4209             block[j]=0;
4210         }
4211     }
4212     for(i=start_i; i<=last_non_zero; i++) {
4213         j = scantable[i];
4214         level = block[j] * qmat[j];
4215
4216 //        if(   bias+level >= (1<<QMAT_SHIFT)
4217 //           || bias-level >= (1<<QMAT_SHIFT)){
4218         if(((unsigned)(level+threshold1))>threshold2){
4219             if(level>0){
4220                 level= (bias + level)>>QMAT_SHIFT;
4221                 block[j]= level;
4222             }else{
4223                 level= (bias - level)>>QMAT_SHIFT;
4224                 block[j]= -level;
4225             }
4226             max |=level;
4227         }else{
4228             block[j]=0;
4229         }
4230     }
4231     *overflow= s->max_qcoeff < max; //overflow might have happened
4232
4233     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4234     if (s->idsp.idct_permutation_type != FF_NO_IDCT_PERM)
4235         ff_block_permute(block, s->idsp.idct_permutation,
4236                          scantable, last_non_zero);
4237
4238     return last_non_zero;
4239 }
4240
4241 #define OFFSET(x) offsetof(MpegEncContext, x)
4242 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4243 static const AVOption h263_options[] = {
4244     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4245     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4246     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4247     FF_MPV_COMMON_OPTS
4248     { NULL },
4249 };
4250
4251 static const AVClass h263_class = {
4252     .class_name = "H.263 encoder",
4253     .item_name  = av_default_item_name,
4254     .option     = h263_options,
4255     .version    = LIBAVUTIL_VERSION_INT,
4256 };
4257
4258 AVCodec ff_h263_encoder = {
4259     .name           = "h263",
4260     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4261     .type           = AVMEDIA_TYPE_VIDEO,
4262     .id             = AV_CODEC_ID_H263,
4263     .priv_data_size = sizeof(MpegEncContext),
4264     .init           = ff_MPV_encode_init,
4265     .encode2        = ff_MPV_encode_picture,
4266     .close          = ff_MPV_encode_end,
4267     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4268     .priv_class     = &h263_class,
4269 };
4270
4271 static const AVOption h263p_options[] = {
4272     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4273     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4274     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4275     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4276     FF_MPV_COMMON_OPTS
4277     { NULL },
4278 };
4279 static const AVClass h263p_class = {
4280     .class_name = "H.263p encoder",
4281     .item_name  = av_default_item_name,
4282     .option     = h263p_options,
4283     .version    = LIBAVUTIL_VERSION_INT,
4284 };
4285
4286 AVCodec ff_h263p_encoder = {
4287     .name           = "h263p",
4288     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4289     .type           = AVMEDIA_TYPE_VIDEO,
4290     .id             = AV_CODEC_ID_H263P,
4291     .priv_data_size = sizeof(MpegEncContext),
4292     .init           = ff_MPV_encode_init,
4293     .encode2        = ff_MPV_encode_picture,
4294     .close          = ff_MPV_encode_end,
4295     .capabilities   = CODEC_CAP_SLICE_THREADS,
4296     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4297     .priv_class     = &h263p_class,
4298 };
4299
4300 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4301
4302 AVCodec ff_msmpeg4v2_encoder = {
4303     .name           = "msmpeg4v2",
4304     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4305     .type           = AVMEDIA_TYPE_VIDEO,
4306     .id             = AV_CODEC_ID_MSMPEG4V2,
4307     .priv_data_size = sizeof(MpegEncContext),
4308     .init           = ff_MPV_encode_init,
4309     .encode2        = ff_MPV_encode_picture,
4310     .close          = ff_MPV_encode_end,
4311     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4312     .priv_class     = &msmpeg4v2_class,
4313 };
4314
4315 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4316
4317 AVCodec ff_msmpeg4v3_encoder = {
4318     .name           = "msmpeg4",
4319     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4320     .type           = AVMEDIA_TYPE_VIDEO,
4321     .id             = AV_CODEC_ID_MSMPEG4V3,
4322     .priv_data_size = sizeof(MpegEncContext),
4323     .init           = ff_MPV_encode_init,
4324     .encode2        = ff_MPV_encode_picture,
4325     .close          = ff_MPV_encode_end,
4326     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4327     .priv_class     = &msmpeg4v3_class,
4328 };
4329
4330 FF_MPV_GENERIC_CLASS(wmv1)
4331
4332 AVCodec ff_wmv1_encoder = {
4333     .name           = "wmv1",
4334     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4335     .type           = AVMEDIA_TYPE_VIDEO,
4336     .id             = AV_CODEC_ID_WMV1,
4337     .priv_data_size = sizeof(MpegEncContext),
4338     .init           = ff_MPV_encode_init,
4339     .encode2        = ff_MPV_encode_picture,
4340     .close          = ff_MPV_encode_end,
4341     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4342     .priv_class     = &wmv1_class,
4343 };