]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
avcodec: add enum values on chroma sample location
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mjpegenc_common.h"
46 #include "mathops.h"
47 #include "mpegutils.h"
48 #include "mjpegenc.h"
49 #include "msmpeg4.h"
50 #include "pixblockdsp.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60
61 #define QUANT_BIAS_SHIFT 8
62
63 #define QMAT_SHIFT_MMX 16
64 #define QMAT_SHIFT 22
65
66 static int encode_picture(MpegEncContext *s, int picture_number);
67 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
68 static int sse_mb(MpegEncContext *s);
69 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
70 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
71
72 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
73 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
74
75 const AVOption ff_mpv_generic_options[] = {
76     FF_MPV_COMMON_OPTS
77     { NULL },
78 };
79
80 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
81                        uint16_t (*qmat16)[2][64],
82                        const uint16_t *quant_matrix,
83                        int bias, int qmin, int qmax, int intra)
84 {
85     FDCTDSPContext *fdsp = &s->fdsp;
86     int qscale;
87     int shift = 0;
88
89     for (qscale = qmin; qscale <= qmax; qscale++) {
90         int i;
91         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
92 #if CONFIG_FAANDCT
93             fdsp->fdct == ff_faandct            ||
94 #endif /* CONFIG_FAANDCT */
95             fdsp->fdct == ff_jpeg_fdct_islow_10) {
96             for (i = 0; i < 64; i++) {
97                 const int j = s->idsp.idct_permutation[i];
98                 /* 16 <= qscale * quant_matrix[i] <= 7905
99                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
100                  *             19952 <=              x  <= 249205026
101                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
102                  *           3444240 >= (1 << 36) / (x) >= 275 */
103
104                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
105                                         (qscale * quant_matrix[j]));
106             }
107         } else if (fdsp->fdct == ff_fdct_ifast) {
108             for (i = 0; i < 64; i++) {
109                 const int j = s->idsp.idct_permutation[i];
110                 /* 16 <= qscale * quant_matrix[i] <= 7905
111                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
112                  *             19952 <=              x  <= 249205026
113                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
114                  *           3444240 >= (1 << 36) / (x) >= 275 */
115
116                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
117                                         (ff_aanscales[i] * qscale *
118                                          quant_matrix[j]));
119             }
120         } else {
121             for (i = 0; i < 64; i++) {
122                 const int j = s->idsp.idct_permutation[i];
123                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
124                  * Assume x = qscale * quant_matrix[i]
125                  * So             16 <=              x  <= 7905
126                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
127                  * so          32768 >= (1 << 19) / (x) >= 67 */
128                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
129                                         (qscale * quant_matrix[j]));
130                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
131                 //                    (qscale * quant_matrix[i]);
132                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
133                                        (qscale * quant_matrix[j]);
134
135                 if (qmat16[qscale][0][i] == 0 ||
136                     qmat16[qscale][0][i] == 128 * 256)
137                     qmat16[qscale][0][i] = 128 * 256 - 1;
138                 qmat16[qscale][1][i] =
139                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
140                                 qmat16[qscale][0][i]);
141             }
142         }
143
144         for (i = intra; i < 64; i++) {
145             int64_t max = 8191;
146             if (fdsp->fdct == ff_fdct_ifast) {
147                 max = (8191LL * ff_aanscales[i]) >> 14;
148             }
149             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
150                 shift++;
151             }
152         }
153     }
154     if (shift) {
155         av_log(NULL, AV_LOG_INFO,
156                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
157                QMAT_SHIFT - shift);
158     }
159 }
160
161 static inline void update_qscale(MpegEncContext *s)
162 {
163     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
164                 (FF_LAMBDA_SHIFT + 7);
165     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
166
167     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
168                  FF_LAMBDA_SHIFT;
169 }
170
171 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
172 {
173     int i;
174
175     if (matrix) {
176         put_bits(pb, 1, 1);
177         for (i = 0; i < 64; i++) {
178             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
179         }
180     } else
181         put_bits(pb, 1, 0);
182 }
183
184 /**
185  * init s->current_picture.qscale_table from s->lambda_table
186  */
187 void ff_init_qscale_tab(MpegEncContext *s)
188 {
189     int8_t * const qscale_table = s->current_picture.qscale_table;
190     int i;
191
192     for (i = 0; i < s->mb_num; i++) {
193         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
194         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
195         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
196                                                   s->avctx->qmax);
197     }
198 }
199
200 static void update_duplicate_context_after_me(MpegEncContext *dst,
201                                               MpegEncContext *src)
202 {
203 #define COPY(a) dst->a= src->a
204     COPY(pict_type);
205     COPY(current_picture);
206     COPY(f_code);
207     COPY(b_code);
208     COPY(qscale);
209     COPY(lambda);
210     COPY(lambda2);
211     COPY(picture_in_gop_number);
212     COPY(gop_picture_number);
213     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
214     COPY(progressive_frame);    // FIXME don't set in encode_header
215     COPY(partitioned_frame);    // FIXME don't set in encode_header
216 #undef COPY
217 }
218
219 /**
220  * Set the given MpegEncContext to defaults for encoding.
221  * the changed fields will not depend upon the prior state of the MpegEncContext.
222  */
223 static void mpv_encode_defaults(MpegEncContext *s)
224 {
225     int i;
226     ff_mpv_common_defaults(s);
227
228     for (i = -16; i < 16; i++) {
229         default_fcode_tab[i + MAX_MV] = 1;
230     }
231     s->me.mv_penalty = default_mv_penalty;
232     s->fcode_tab     = default_fcode_tab;
233
234     s->input_picture_number  = 0;
235     s->picture_in_gop_number = 0;
236 }
237
238 /* init video encoder */
239 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
240 {
241     MpegEncContext *s = avctx->priv_data;
242     int i, ret, format_supported;
243
244     mpv_encode_defaults(s);
245
246     switch (avctx->codec_id) {
247     case AV_CODEC_ID_MPEG2VIDEO:
248         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
249             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
250             av_log(avctx, AV_LOG_ERROR,
251                    "only YUV420 and YUV422 are supported\n");
252             return -1;
253         }
254         break;
255     case AV_CODEC_ID_MJPEG:
256         format_supported = 0;
257         /* JPEG color space */
258         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
259             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
260             (avctx->color_range == AVCOL_RANGE_JPEG &&
261              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
262               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
263             format_supported = 1;
264         /* MPEG color space */
265         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
266                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
267                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
268             format_supported = 1;
269
270         if (!format_supported) {
271             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
272             return -1;
273         }
274         break;
275     default:
276         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
277             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
278             return -1;
279         }
280     }
281
282     switch (avctx->pix_fmt) {
283     case AV_PIX_FMT_YUVJ422P:
284     case AV_PIX_FMT_YUV422P:
285         s->chroma_format = CHROMA_422;
286         break;
287     case AV_PIX_FMT_YUVJ420P:
288     case AV_PIX_FMT_YUV420P:
289     default:
290         s->chroma_format = CHROMA_420;
291         break;
292     }
293
294     s->bit_rate = avctx->bit_rate;
295     s->width    = avctx->width;
296     s->height   = avctx->height;
297     if (avctx->gop_size > 600 &&
298         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
299         av_log(avctx, AV_LOG_ERROR,
300                "Warning keyframe interval too large! reducing it ...\n");
301         avctx->gop_size = 600;
302     }
303     s->gop_size     = avctx->gop_size;
304     s->avctx        = avctx;
305     s->flags        = avctx->flags;
306     s->flags2       = avctx->flags2;
307     if (avctx->max_b_frames > MAX_B_FRAMES) {
308         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
309                "is %d.\n", MAX_B_FRAMES);
310     }
311     s->max_b_frames = avctx->max_b_frames;
312     s->codec_id     = avctx->codec->id;
313     s->strict_std_compliance = avctx->strict_std_compliance;
314     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
315     s->mpeg_quant         = avctx->mpeg_quant;
316     s->rtp_mode           = !!avctx->rtp_payload_size;
317     s->intra_dc_precision = avctx->intra_dc_precision;
318     s->user_specified_pts = AV_NOPTS_VALUE;
319
320     if (s->gop_size <= 1) {
321         s->intra_only = 1;
322         s->gop_size   = 12;
323     } else {
324         s->intra_only = 0;
325     }
326
327     s->me_method = avctx->me_method;
328
329     /* Fixed QSCALE */
330     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
331
332     s->adaptive_quant = (s->avctx->lumi_masking ||
333                          s->avctx->dark_masking ||
334                          s->avctx->temporal_cplx_masking ||
335                          s->avctx->spatial_cplx_masking  ||
336                          s->avctx->p_masking      ||
337                          s->avctx->border_masking ||
338                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
339                         !s->fixed_qscale;
340
341     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
342
343     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
344         av_log(avctx, AV_LOG_ERROR,
345                "a vbv buffer size is needed, "
346                "for encoding with a maximum bitrate\n");
347         return -1;
348     }
349
350     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
351         av_log(avctx, AV_LOG_INFO,
352                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
353     }
354
355     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
356         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
357         return -1;
358     }
359
360     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
361         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
362         return -1;
363     }
364
365     if (avctx->rc_max_rate &&
366         avctx->rc_max_rate == avctx->bit_rate &&
367         avctx->rc_max_rate != avctx->rc_min_rate) {
368         av_log(avctx, AV_LOG_INFO,
369                "impossible bitrate constraints, this will fail\n");
370     }
371
372     if (avctx->rc_buffer_size &&
373         avctx->bit_rate * (int64_t)avctx->time_base.num >
374             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
375         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
376         return -1;
377     }
378
379     if (!s->fixed_qscale &&
380         avctx->bit_rate * av_q2d(avctx->time_base) >
381             avctx->bit_rate_tolerance) {
382         av_log(avctx, AV_LOG_ERROR,
383                "bitrate tolerance too small for bitrate\n");
384         return -1;
385     }
386
387     if (s->avctx->rc_max_rate &&
388         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
389         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
390          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
391         90000LL * (avctx->rc_buffer_size - 1) >
392             s->avctx->rc_max_rate * 0xFFFFLL) {
393         av_log(avctx, AV_LOG_INFO,
394                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
395                "specified vbv buffer is too large for the given bitrate!\n");
396     }
397
398     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
399         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
400         s->codec_id != AV_CODEC_ID_FLV1) {
401         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
402         return -1;
403     }
404
405     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
406         av_log(avctx, AV_LOG_ERROR,
407                "OBMC is only supported with simple mb decision\n");
408         return -1;
409     }
410
411     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
412         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
413         return -1;
414     }
415
416     if (s->max_b_frames                    &&
417         s->codec_id != AV_CODEC_ID_MPEG4      &&
418         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
419         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
420         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
421         return -1;
422     }
423
424     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
425          s->codec_id == AV_CODEC_ID_H263  ||
426          s->codec_id == AV_CODEC_ID_H263P) &&
427         (avctx->sample_aspect_ratio.num > 255 ||
428          avctx->sample_aspect_ratio.den > 255)) {
429         av_log(avctx, AV_LOG_ERROR,
430                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
431                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
432         return -1;
433     }
434
435     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
436         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
437         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
438         return -1;
439     }
440
441     // FIXME mpeg2 uses that too
442     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
443         av_log(avctx, AV_LOG_ERROR,
444                "mpeg2 style quantization not supported by codec\n");
445         return -1;
446     }
447
448     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
449         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
450         return -1;
451     }
452
453     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
454         s->avctx->mb_decision != FF_MB_DECISION_RD) {
455         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
456         return -1;
457     }
458
459     if (s->avctx->scenechange_threshold < 1000000000 &&
460         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
461         av_log(avctx, AV_LOG_ERROR,
462                "closed gop with scene change detection are not supported yet, "
463                "set threshold to 1000000000\n");
464         return -1;
465     }
466
467     if (s->flags & CODEC_FLAG_LOW_DELAY) {
468         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
469             av_log(avctx, AV_LOG_ERROR,
470                   "low delay forcing is only available for mpeg2\n");
471             return -1;
472         }
473         if (s->max_b_frames != 0) {
474             av_log(avctx, AV_LOG_ERROR,
475                    "b frames cannot be used with low delay\n");
476             return -1;
477         }
478     }
479
480     if (s->q_scale_type == 1) {
481         if (avctx->qmax > 12) {
482             av_log(avctx, AV_LOG_ERROR,
483                    "non linear quant only supports qmax <= 12 currently\n");
484             return -1;
485         }
486     }
487
488     if (s->avctx->thread_count > 1         &&
489         s->codec_id != AV_CODEC_ID_MPEG4      &&
490         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
491         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
492         (s->codec_id != AV_CODEC_ID_H263P)) {
493         av_log(avctx, AV_LOG_ERROR,
494                "multi threaded encoding not supported by codec\n");
495         return -1;
496     }
497
498     if (s->avctx->thread_count < 1) {
499         av_log(avctx, AV_LOG_ERROR,
500                "automatic thread number detection not supported by codec,"
501                "patch welcome\n");
502         return -1;
503     }
504
505     if (s->avctx->thread_count > 1)
506         s->rtp_mode = 1;
507
508     if (!avctx->time_base.den || !avctx->time_base.num) {
509         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
510         return -1;
511     }
512
513     i = (INT_MAX / 2 + 128) >> 8;
514     if (avctx->mb_threshold >= i) {
515         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
516                i - 1);
517         return -1;
518     }
519
520     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
521         av_log(avctx, AV_LOG_INFO,
522                "notice: b_frame_strategy only affects the first pass\n");
523         avctx->b_frame_strategy = 0;
524     }
525
526     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
527     if (i > 1) {
528         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
529         avctx->time_base.den /= i;
530         avctx->time_base.num /= i;
531         //return -1;
532     }
533
534     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
535         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
536         // (a + x * 3 / 8) / x
537         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
538         s->inter_quant_bias = 0;
539     } else {
540         s->intra_quant_bias = 0;
541         // (a - x / 4) / x
542         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
543     }
544
545     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
546         s->intra_quant_bias = avctx->intra_quant_bias;
547     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
548         s->inter_quant_bias = avctx->inter_quant_bias;
549
550     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
551         s->avctx->time_base.den > (1 << 16) - 1) {
552         av_log(avctx, AV_LOG_ERROR,
553                "timebase %d/%d not supported by MPEG 4 standard, "
554                "the maximum admitted value for the timebase denominator "
555                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
556                (1 << 16) - 1);
557         return -1;
558     }
559     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
560
561     switch (avctx->codec->id) {
562     case AV_CODEC_ID_MPEG1VIDEO:
563         s->out_format = FMT_MPEG1;
564         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
565         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
566         break;
567     case AV_CODEC_ID_MPEG2VIDEO:
568         s->out_format = FMT_MPEG1;
569         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
570         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
571         s->rtp_mode   = 1;
572         break;
573     case AV_CODEC_ID_MJPEG:
574         s->out_format = FMT_MJPEG;
575         s->intra_only = 1; /* force intra only for jpeg */
576         if (!CONFIG_MJPEG_ENCODER ||
577             ff_mjpeg_encode_init(s) < 0)
578             return -1;
579         avctx->delay = 0;
580         s->low_delay = 1;
581         break;
582     case AV_CODEC_ID_H261:
583         if (!CONFIG_H261_ENCODER)
584             return -1;
585         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
586             av_log(avctx, AV_LOG_ERROR,
587                    "The specified picture size of %dx%d is not valid for the "
588                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
589                     s->width, s->height);
590             return -1;
591         }
592         s->out_format = FMT_H261;
593         avctx->delay  = 0;
594         s->low_delay  = 1;
595         break;
596     case AV_CODEC_ID_H263:
597         if (!CONFIG_H263_ENCODER)
598         return -1;
599         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
600                              s->width, s->height) == 8) {
601             av_log(avctx, AV_LOG_INFO,
602                    "The specified picture size of %dx%d is not valid for "
603                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
604                    "352x288, 704x576, and 1408x1152."
605                    "Try H.263+.\n", s->width, s->height);
606             return -1;
607         }
608         s->out_format = FMT_H263;
609         avctx->delay  = 0;
610         s->low_delay  = 1;
611         break;
612     case AV_CODEC_ID_H263P:
613         s->out_format = FMT_H263;
614         s->h263_plus  = 1;
615         /* Fx */
616         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
617         s->modified_quant  = s->h263_aic;
618         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
619         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
620
621         /* /Fx */
622         /* These are just to be sure */
623         avctx->delay = 0;
624         s->low_delay = 1;
625         break;
626     case AV_CODEC_ID_FLV1:
627         s->out_format      = FMT_H263;
628         s->h263_flv        = 2; /* format = 1; 11-bit codes */
629         s->unrestricted_mv = 1;
630         s->rtp_mode  = 0; /* don't allow GOB */
631         avctx->delay = 0;
632         s->low_delay = 1;
633         break;
634     case AV_CODEC_ID_RV10:
635         s->out_format = FMT_H263;
636         avctx->delay  = 0;
637         s->low_delay  = 1;
638         break;
639     case AV_CODEC_ID_RV20:
640         s->out_format      = FMT_H263;
641         avctx->delay       = 0;
642         s->low_delay       = 1;
643         s->modified_quant  = 1;
644         s->h263_aic        = 1;
645         s->h263_plus       = 1;
646         s->loop_filter     = 1;
647         s->unrestricted_mv = 0;
648         break;
649     case AV_CODEC_ID_MPEG4:
650         s->out_format      = FMT_H263;
651         s->h263_pred       = 1;
652         s->unrestricted_mv = 1;
653         s->low_delay       = s->max_b_frames ? 0 : 1;
654         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
655         break;
656     case AV_CODEC_ID_MSMPEG4V2:
657         s->out_format      = FMT_H263;
658         s->h263_pred       = 1;
659         s->unrestricted_mv = 1;
660         s->msmpeg4_version = 2;
661         avctx->delay       = 0;
662         s->low_delay       = 1;
663         break;
664     case AV_CODEC_ID_MSMPEG4V3:
665         s->out_format        = FMT_H263;
666         s->h263_pred         = 1;
667         s->unrestricted_mv   = 1;
668         s->msmpeg4_version   = 3;
669         s->flipflop_rounding = 1;
670         avctx->delay         = 0;
671         s->low_delay         = 1;
672         break;
673     case AV_CODEC_ID_WMV1:
674         s->out_format        = FMT_H263;
675         s->h263_pred         = 1;
676         s->unrestricted_mv   = 1;
677         s->msmpeg4_version   = 4;
678         s->flipflop_rounding = 1;
679         avctx->delay         = 0;
680         s->low_delay         = 1;
681         break;
682     case AV_CODEC_ID_WMV2:
683         s->out_format        = FMT_H263;
684         s->h263_pred         = 1;
685         s->unrestricted_mv   = 1;
686         s->msmpeg4_version   = 5;
687         s->flipflop_rounding = 1;
688         avctx->delay         = 0;
689         s->low_delay         = 1;
690         break;
691     default:
692         return -1;
693     }
694
695     avctx->has_b_frames = !s->low_delay;
696
697     s->encoding = 1;
698
699     s->progressive_frame    =
700     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
701                                                 CODEC_FLAG_INTERLACED_ME) ||
702                                 s->alternate_scan);
703
704     /* init */
705     ff_mpv_idct_init(s);
706     if (ff_mpv_common_init(s) < 0)
707         return -1;
708
709     if (ARCH_X86)
710         ff_mpv_encode_init_x86(s);
711
712     ff_fdctdsp_init(&s->fdsp, avctx);
713     ff_me_cmp_init(&s->mecc, avctx);
714     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
715     ff_pixblockdsp_init(&s->pdsp, avctx);
716     ff_qpeldsp_init(&s->qdsp);
717
718     s->avctx->coded_frame = s->current_picture.f;
719
720     if (s->msmpeg4_version) {
721         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
722                           2 * 2 * (MAX_LEVEL + 1) *
723                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
724     }
725     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
726
727     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
728     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
729     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
730     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
731     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
732                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
733     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
734                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
735
736     if (s->avctx->noise_reduction) {
737         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
738                           2 * 64 * sizeof(uint16_t), fail);
739     }
740
741     if (CONFIG_H263_ENCODER)
742         ff_h263dsp_init(&s->h263dsp);
743     if (!s->dct_quantize)
744         s->dct_quantize = ff_dct_quantize_c;
745     if (!s->denoise_dct)
746         s->denoise_dct  = denoise_dct_c;
747     s->fast_dct_quantize = s->dct_quantize;
748     if (avctx->trellis)
749         s->dct_quantize  = dct_quantize_trellis_c;
750
751     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
752         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
753
754     s->quant_precision = 5;
755
756     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
757     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
758
759     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
760         ff_h261_encode_init(s);
761     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
762         ff_h263_encode_init(s);
763     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
764         ff_msmpeg4_encode_init(s);
765     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
766         && s->out_format == FMT_MPEG1)
767         ff_mpeg1_encode_init(s);
768
769     /* init q matrix */
770     for (i = 0; i < 64; i++) {
771         int j = s->idsp.idct_permutation[i];
772         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
773             s->mpeg_quant) {
774             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
775             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
776         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
777             s->intra_matrix[j] =
778             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
779         } else {
780             /* mpeg1/2 */
781             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
782             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
783         }
784         if (s->avctx->intra_matrix)
785             s->intra_matrix[j] = s->avctx->intra_matrix[i];
786         if (s->avctx->inter_matrix)
787             s->inter_matrix[j] = s->avctx->inter_matrix[i];
788     }
789
790     /* precompute matrix */
791     /* for mjpeg, we do include qscale in the matrix */
792     if (s->out_format != FMT_MJPEG) {
793         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
794                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
795                           31, 1);
796         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
797                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
798                           31, 0);
799     }
800
801     if (ff_rate_control_init(s) < 0)
802         return -1;
803
804 #if FF_API_ERROR_RATE
805     FF_DISABLE_DEPRECATION_WARNINGS
806     if (avctx->error_rate)
807         s->error_rate = avctx->error_rate;
808     FF_ENABLE_DEPRECATION_WARNINGS;
809 #endif
810
811 #if FF_API_NORMALIZE_AQP
812     FF_DISABLE_DEPRECATION_WARNINGS
813     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
814         s->mpv_flags |= FF_MPV_FLAG_NAQ;
815     FF_ENABLE_DEPRECATION_WARNINGS;
816 #endif
817
818 #if FF_API_MV0
819     FF_DISABLE_DEPRECATION_WARNINGS
820     if (avctx->flags & CODEC_FLAG_MV0)
821         s->mpv_flags |= FF_MPV_FLAG_MV0;
822     FF_ENABLE_DEPRECATION_WARNINGS
823 #endif
824
825     if (avctx->b_frame_strategy == 2) {
826         for (i = 0; i < s->max_b_frames + 2; i++) {
827             s->tmp_frames[i] = av_frame_alloc();
828             if (!s->tmp_frames[i])
829                 return AVERROR(ENOMEM);
830
831             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
832             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
833             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
834
835             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
836             if (ret < 0)
837                 return ret;
838         }
839     }
840
841     return 0;
842 fail:
843     ff_mpv_encode_end(avctx);
844     return AVERROR_UNKNOWN;
845 }
846
847 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
848 {
849     MpegEncContext *s = avctx->priv_data;
850     int i;
851
852     ff_rate_control_uninit(s);
853
854     ff_mpv_common_end(s);
855     if (CONFIG_MJPEG_ENCODER &&
856         s->out_format == FMT_MJPEG)
857         ff_mjpeg_encode_close(s);
858
859     av_freep(&avctx->extradata);
860
861     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
862         av_frame_free(&s->tmp_frames[i]);
863
864     ff_free_picture_tables(&s->new_picture);
865     ff_mpeg_unref_picture(s, &s->new_picture);
866
867     av_freep(&s->avctx->stats_out);
868     av_freep(&s->ac_stats);
869
870     av_freep(&s->q_intra_matrix);
871     av_freep(&s->q_inter_matrix);
872     av_freep(&s->q_intra_matrix16);
873     av_freep(&s->q_inter_matrix16);
874     av_freep(&s->input_picture);
875     av_freep(&s->reordered_input_picture);
876     av_freep(&s->dct_offset);
877
878     return 0;
879 }
880
881 static int get_sae(uint8_t *src, int ref, int stride)
882 {
883     int x,y;
884     int acc = 0;
885
886     for (y = 0; y < 16; y++) {
887         for (x = 0; x < 16; x++) {
888             acc += FFABS(src[x + y * stride] - ref);
889         }
890     }
891
892     return acc;
893 }
894
895 static int get_intra_count(MpegEncContext *s, uint8_t *src,
896                            uint8_t *ref, int stride)
897 {
898     int x, y, w, h;
899     int acc = 0;
900
901     w = s->width  & ~15;
902     h = s->height & ~15;
903
904     for (y = 0; y < h; y += 16) {
905         for (x = 0; x < w; x += 16) {
906             int offset = x + y * stride;
907             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
908                                       stride, 16);
909             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
910             int sae  = get_sae(src + offset, mean, stride);
911
912             acc += sae + 500 < sad;
913         }
914     }
915     return acc;
916 }
917
918
919 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
920 {
921     Picture *pic = NULL;
922     int64_t pts;
923     int i, display_picture_number = 0, ret;
924     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
925                                                  (s->low_delay ? 0 : 1);
926     int direct = 1;
927
928     if (pic_arg) {
929         pts = pic_arg->pts;
930         display_picture_number = s->input_picture_number++;
931
932         if (pts != AV_NOPTS_VALUE) {
933             if (s->user_specified_pts != AV_NOPTS_VALUE) {
934                 int64_t time = pts;
935                 int64_t last = s->user_specified_pts;
936
937                 if (time <= last) {
938                     av_log(s->avctx, AV_LOG_ERROR,
939                            "Error, Invalid timestamp=%"PRId64", "
940                            "last=%"PRId64"\n", pts, s->user_specified_pts);
941                     return -1;
942                 }
943
944                 if (!s->low_delay && display_picture_number == 1)
945                     s->dts_delta = time - last;
946             }
947             s->user_specified_pts = pts;
948         } else {
949             if (s->user_specified_pts != AV_NOPTS_VALUE) {
950                 s->user_specified_pts =
951                 pts = s->user_specified_pts + 1;
952                 av_log(s->avctx, AV_LOG_INFO,
953                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
954                        pts);
955             } else {
956                 pts = display_picture_number;
957             }
958         }
959     }
960
961     if (pic_arg) {
962         if (!pic_arg->buf[0]);
963             direct = 0;
964         if (pic_arg->linesize[0] != s->linesize)
965             direct = 0;
966         if (pic_arg->linesize[1] != s->uvlinesize)
967             direct = 0;
968         if (pic_arg->linesize[2] != s->uvlinesize)
969             direct = 0;
970
971         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
972                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
973
974         if (direct) {
975             i = ff_find_unused_picture(s, 1);
976             if (i < 0)
977                 return i;
978
979             pic = &s->picture[i];
980             pic->reference = 3;
981
982             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
983                 return ret;
984             if (ff_alloc_picture(s, pic, 1) < 0) {
985                 return -1;
986             }
987         } else {
988             i = ff_find_unused_picture(s, 0);
989             if (i < 0)
990                 return i;
991
992             pic = &s->picture[i];
993             pic->reference = 3;
994
995             if (ff_alloc_picture(s, pic, 0) < 0) {
996                 return -1;
997             }
998
999             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1000                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1001                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1002                 // empty
1003             } else {
1004                 int h_chroma_shift, v_chroma_shift;
1005                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1006                                                  &h_chroma_shift,
1007                                                  &v_chroma_shift);
1008
1009                 for (i = 0; i < 3; i++) {
1010                     int src_stride = pic_arg->linesize[i];
1011                     int dst_stride = i ? s->uvlinesize : s->linesize;
1012                     int h_shift = i ? h_chroma_shift : 0;
1013                     int v_shift = i ? v_chroma_shift : 0;
1014                     int w = s->width  >> h_shift;
1015                     int h = s->height >> v_shift;
1016                     uint8_t *src = pic_arg->data[i];
1017                     uint8_t *dst = pic->f->data[i];
1018
1019                     if (!s->avctx->rc_buffer_size)
1020                         dst += INPLACE_OFFSET;
1021
1022                     if (src_stride == dst_stride)
1023                         memcpy(dst, src, src_stride * h);
1024                     else {
1025                         while (h--) {
1026                             memcpy(dst, src, w);
1027                             dst += dst_stride;
1028                             src += src_stride;
1029                         }
1030                     }
1031                 }
1032             }
1033         }
1034         ret = av_frame_copy_props(pic->f, pic_arg);
1035         if (ret < 0)
1036             return ret;
1037
1038         pic->f->display_picture_number = display_picture_number;
1039         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1040     }
1041
1042     /* shift buffer entries */
1043     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1044         s->input_picture[i - 1] = s->input_picture[i];
1045
1046     s->input_picture[encoding_delay] = (Picture*) pic;
1047
1048     return 0;
1049 }
1050
1051 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1052 {
1053     int x, y, plane;
1054     int score = 0;
1055     int64_t score64 = 0;
1056
1057     for (plane = 0; plane < 3; plane++) {
1058         const int stride = p->f->linesize[plane];
1059         const int bw = plane ? 1 : 2;
1060         for (y = 0; y < s->mb_height * bw; y++) {
1061             for (x = 0; x < s->mb_width * bw; x++) {
1062                 int off = p->shared ? 0 : 16;
1063                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1064                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1065                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1066
1067                 switch (s->avctx->frame_skip_exp) {
1068                 case 0: score    =  FFMAX(score, v);          break;
1069                 case 1: score   += FFABS(v);                  break;
1070                 case 2: score   += v * v;                     break;
1071                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1072                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1073                 }
1074             }
1075         }
1076     }
1077
1078     if (score)
1079         score64 = score;
1080
1081     if (score64 < s->avctx->frame_skip_threshold)
1082         return 1;
1083     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1084         return 1;
1085     return 0;
1086 }
1087
1088 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1089 {
1090     AVPacket pkt = { 0 };
1091     int ret, got_output;
1092
1093     av_init_packet(&pkt);
1094     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1095     if (ret < 0)
1096         return ret;
1097
1098     ret = pkt.size;
1099     av_free_packet(&pkt);
1100     return ret;
1101 }
1102
1103 static int estimate_best_b_count(MpegEncContext *s)
1104 {
1105     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1106     AVCodecContext *c = avcodec_alloc_context3(NULL);
1107     const int scale = s->avctx->brd_scale;
1108     int i, j, out_size, p_lambda, b_lambda, lambda2;
1109     int64_t best_rd  = INT64_MAX;
1110     int best_b_count = -1;
1111
1112     assert(scale >= 0 && scale <= 3);
1113
1114     //emms_c();
1115     //s->next_picture_ptr->quality;
1116     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1117     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1118     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1119     if (!b_lambda) // FIXME we should do this somewhere else
1120         b_lambda = p_lambda;
1121     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1122                FF_LAMBDA_SHIFT;
1123
1124     c->width        = s->width  >> scale;
1125     c->height       = s->height >> scale;
1126     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1127     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1128     c->mb_decision  = s->avctx->mb_decision;
1129     c->me_cmp       = s->avctx->me_cmp;
1130     c->mb_cmp       = s->avctx->mb_cmp;
1131     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1132     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1133     c->time_base    = s->avctx->time_base;
1134     c->max_b_frames = s->max_b_frames;
1135
1136     if (avcodec_open2(c, codec, NULL) < 0)
1137         return -1;
1138
1139     for (i = 0; i < s->max_b_frames + 2; i++) {
1140         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1141                                                 s->next_picture_ptr;
1142
1143         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1144             pre_input = *pre_input_ptr;
1145
1146             if (!pre_input.shared && i) {
1147                 pre_input.f->data[0] += INPLACE_OFFSET;
1148                 pre_input.f->data[1] += INPLACE_OFFSET;
1149                 pre_input.f->data[2] += INPLACE_OFFSET;
1150             }
1151
1152             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1153                                        s->tmp_frames[i]->linesize[0],
1154                                        pre_input.f->data[0],
1155                                        pre_input.f->linesize[0],
1156                                        c->width, c->height);
1157             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1158                                        s->tmp_frames[i]->linesize[1],
1159                                        pre_input.f->data[1],
1160                                        pre_input.f->linesize[1],
1161                                        c->width >> 1, c->height >> 1);
1162             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1163                                        s->tmp_frames[i]->linesize[2],
1164                                        pre_input.f->data[2],
1165                                        pre_input.f->linesize[2],
1166                                        c->width >> 1, c->height >> 1);
1167         }
1168     }
1169
1170     for (j = 0; j < s->max_b_frames + 1; j++) {
1171         int64_t rd = 0;
1172
1173         if (!s->input_picture[j])
1174             break;
1175
1176         c->error[0] = c->error[1] = c->error[2] = 0;
1177
1178         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1179         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1180
1181         out_size = encode_frame(c, s->tmp_frames[0]);
1182
1183         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1184
1185         for (i = 0; i < s->max_b_frames + 1; i++) {
1186             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1187
1188             s->tmp_frames[i + 1]->pict_type = is_p ?
1189                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1190             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1191
1192             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1193
1194             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1195         }
1196
1197         /* get the delayed frames */
1198         while (out_size) {
1199             out_size = encode_frame(c, NULL);
1200             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1201         }
1202
1203         rd += c->error[0] + c->error[1] + c->error[2];
1204
1205         if (rd < best_rd) {
1206             best_rd = rd;
1207             best_b_count = j;
1208         }
1209     }
1210
1211     avcodec_close(c);
1212     av_freep(&c);
1213
1214     return best_b_count;
1215 }
1216
1217 static int select_input_picture(MpegEncContext *s)
1218 {
1219     int i, ret;
1220
1221     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1222         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1223     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1224
1225     /* set next picture type & ordering */
1226     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1227         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1228             !s->next_picture_ptr || s->intra_only) {
1229             s->reordered_input_picture[0] = s->input_picture[0];
1230             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1231             s->reordered_input_picture[0]->f->coded_picture_number =
1232                 s->coded_picture_number++;
1233         } else {
1234             int b_frames;
1235
1236             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1237                 if (s->picture_in_gop_number < s->gop_size &&
1238                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1239                     // FIXME check that te gop check above is +-1 correct
1240                     av_frame_unref(s->input_picture[0]->f);
1241
1242                     emms_c();
1243                     ff_vbv_update(s, 0);
1244
1245                     goto no_output_pic;
1246                 }
1247             }
1248
1249             if (s->flags & CODEC_FLAG_PASS2) {
1250                 for (i = 0; i < s->max_b_frames + 1; i++) {
1251                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1252
1253                     if (pict_num >= s->rc_context.num_entries)
1254                         break;
1255                     if (!s->input_picture[i]) {
1256                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1257                         break;
1258                     }
1259
1260                     s->input_picture[i]->f->pict_type =
1261                         s->rc_context.entry[pict_num].new_pict_type;
1262                 }
1263             }
1264
1265             if (s->avctx->b_frame_strategy == 0) {
1266                 b_frames = s->max_b_frames;
1267                 while (b_frames && !s->input_picture[b_frames])
1268                     b_frames--;
1269             } else if (s->avctx->b_frame_strategy == 1) {
1270                 for (i = 1; i < s->max_b_frames + 1; i++) {
1271                     if (s->input_picture[i] &&
1272                         s->input_picture[i]->b_frame_score == 0) {
1273                         s->input_picture[i]->b_frame_score =
1274                             get_intra_count(s,
1275                                             s->input_picture[i    ]->f->data[0],
1276                                             s->input_picture[i - 1]->f->data[0],
1277                                             s->linesize) + 1;
1278                     }
1279                 }
1280                 for (i = 0; i < s->max_b_frames + 1; i++) {
1281                     if (!s->input_picture[i] ||
1282                         s->input_picture[i]->b_frame_score - 1 >
1283                             s->mb_num / s->avctx->b_sensitivity)
1284                         break;
1285                 }
1286
1287                 b_frames = FFMAX(0, i - 1);
1288
1289                 /* reset scores */
1290                 for (i = 0; i < b_frames + 1; i++) {
1291                     s->input_picture[i]->b_frame_score = 0;
1292                 }
1293             } else if (s->avctx->b_frame_strategy == 2) {
1294                 b_frames = estimate_best_b_count(s);
1295             } else {
1296                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1297                 b_frames = 0;
1298             }
1299
1300             emms_c();
1301
1302             for (i = b_frames - 1; i >= 0; i--) {
1303                 int type = s->input_picture[i]->f->pict_type;
1304                 if (type && type != AV_PICTURE_TYPE_B)
1305                     b_frames = i;
1306             }
1307             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1308                 b_frames == s->max_b_frames) {
1309                 av_log(s->avctx, AV_LOG_ERROR,
1310                        "warning, too many b frames in a row\n");
1311             }
1312
1313             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1314                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1315                     s->gop_size > s->picture_in_gop_number) {
1316                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1317                 } else {
1318                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1319                         b_frames = 0;
1320                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1321                 }
1322             }
1323
1324             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1325                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1326                 b_frames--;
1327
1328             s->reordered_input_picture[0] = s->input_picture[b_frames];
1329             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1330                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1331             s->reordered_input_picture[0]->f->coded_picture_number =
1332                 s->coded_picture_number++;
1333             for (i = 0; i < b_frames; i++) {
1334                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1335                 s->reordered_input_picture[i + 1]->f->pict_type =
1336                     AV_PICTURE_TYPE_B;
1337                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1338                     s->coded_picture_number++;
1339             }
1340         }
1341     }
1342 no_output_pic:
1343     if (s->reordered_input_picture[0]) {
1344         s->reordered_input_picture[0]->reference =
1345            s->reordered_input_picture[0]->f->pict_type !=
1346                AV_PICTURE_TYPE_B ? 3 : 0;
1347
1348         ff_mpeg_unref_picture(s, &s->new_picture);
1349         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1350             return ret;
1351
1352         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1353             // input is a shared pix, so we can't modifiy it -> alloc a new
1354             // one & ensure that the shared one is reuseable
1355
1356             Picture *pic;
1357             int i = ff_find_unused_picture(s, 0);
1358             if (i < 0)
1359                 return i;
1360             pic = &s->picture[i];
1361
1362             pic->reference = s->reordered_input_picture[0]->reference;
1363             if (ff_alloc_picture(s, pic, 0) < 0) {
1364                 return -1;
1365             }
1366
1367             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1368             if (ret < 0)
1369                 return ret;
1370
1371             /* mark us unused / free shared pic */
1372             av_frame_unref(s->reordered_input_picture[0]->f);
1373             s->reordered_input_picture[0]->shared = 0;
1374
1375             s->current_picture_ptr = pic;
1376         } else {
1377             // input is not a shared pix -> reuse buffer for current_pix
1378             s->current_picture_ptr = s->reordered_input_picture[0];
1379             for (i = 0; i < 4; i++) {
1380                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1381             }
1382         }
1383         ff_mpeg_unref_picture(s, &s->current_picture);
1384         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1385                                        s->current_picture_ptr)) < 0)
1386             return ret;
1387
1388         s->picture_number = s->new_picture.f->display_picture_number;
1389     } else {
1390         ff_mpeg_unref_picture(s, &s->new_picture);
1391     }
1392     return 0;
1393 }
1394
1395 static void frame_end(MpegEncContext *s)
1396 {
1397     int i;
1398
1399     if (s->unrestricted_mv &&
1400         s->current_picture.reference &&
1401         !s->intra_only) {
1402         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1403         int hshift = desc->log2_chroma_w;
1404         int vshift = desc->log2_chroma_h;
1405         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1406                                 s->h_edge_pos, s->v_edge_pos,
1407                                 EDGE_WIDTH, EDGE_WIDTH,
1408                                 EDGE_TOP | EDGE_BOTTOM);
1409         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1410                                 s->h_edge_pos >> hshift,
1411                                 s->v_edge_pos >> vshift,
1412                                 EDGE_WIDTH >> hshift,
1413                                 EDGE_WIDTH >> vshift,
1414                                 EDGE_TOP | EDGE_BOTTOM);
1415         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1416                                 s->h_edge_pos >> hshift,
1417                                 s->v_edge_pos >> vshift,
1418                                 EDGE_WIDTH >> hshift,
1419                                 EDGE_WIDTH >> vshift,
1420                                 EDGE_TOP | EDGE_BOTTOM);
1421     }
1422
1423     emms_c();
1424
1425     s->last_pict_type                 = s->pict_type;
1426     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1427     if (s->pict_type!= AV_PICTURE_TYPE_B)
1428         s->last_non_b_pict_type = s->pict_type;
1429
1430     if (s->encoding) {
1431         /* release non-reference frames */
1432         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1433             if (!s->picture[i].reference)
1434                 ff_mpeg_unref_picture(s, &s->picture[i]);
1435         }
1436     }
1437
1438     s->avctx->coded_frame = s->current_picture_ptr->f;
1439
1440 }
1441
1442 static void update_noise_reduction(MpegEncContext *s)
1443 {
1444     int intra, i;
1445
1446     for (intra = 0; intra < 2; intra++) {
1447         if (s->dct_count[intra] > (1 << 16)) {
1448             for (i = 0; i < 64; i++) {
1449                 s->dct_error_sum[intra][i] >>= 1;
1450             }
1451             s->dct_count[intra] >>= 1;
1452         }
1453
1454         for (i = 0; i < 64; i++) {
1455             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1456                                        s->dct_count[intra] +
1457                                        s->dct_error_sum[intra][i] / 2) /
1458                                       (s->dct_error_sum[intra][i] + 1);
1459         }
1460     }
1461 }
1462
1463 static int frame_start(MpegEncContext *s)
1464 {
1465     int ret;
1466
1467     /* mark & release old frames */
1468     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1469         s->last_picture_ptr != s->next_picture_ptr &&
1470         s->last_picture_ptr->f->buf[0]) {
1471         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1472     }
1473
1474     s->current_picture_ptr->f->pict_type = s->pict_type;
1475     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1476
1477     ff_mpeg_unref_picture(s, &s->current_picture);
1478     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1479                                    s->current_picture_ptr)) < 0)
1480         return ret;
1481
1482     if (s->pict_type != AV_PICTURE_TYPE_B) {
1483         s->last_picture_ptr = s->next_picture_ptr;
1484         if (!s->droppable)
1485             s->next_picture_ptr = s->current_picture_ptr;
1486     }
1487
1488     if (s->last_picture_ptr) {
1489         ff_mpeg_unref_picture(s, &s->last_picture);
1490         if (s->last_picture_ptr->f->buf[0] &&
1491             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1492                                        s->last_picture_ptr)) < 0)
1493             return ret;
1494     }
1495     if (s->next_picture_ptr) {
1496         ff_mpeg_unref_picture(s, &s->next_picture);
1497         if (s->next_picture_ptr->f->buf[0] &&
1498             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1499                                        s->next_picture_ptr)) < 0)
1500             return ret;
1501     }
1502
1503     if (s->picture_structure!= PICT_FRAME) {
1504         int i;
1505         for (i = 0; i < 4; i++) {
1506             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1507                 s->current_picture.f->data[i] +=
1508                     s->current_picture.f->linesize[i];
1509             }
1510             s->current_picture.f->linesize[i] *= 2;
1511             s->last_picture.f->linesize[i]    *= 2;
1512             s->next_picture.f->linesize[i]    *= 2;
1513         }
1514     }
1515
1516     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1517         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1518         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1519     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1520         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1521         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1522     } else {
1523         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1524         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1525     }
1526
1527     if (s->dct_error_sum) {
1528         assert(s->avctx->noise_reduction && s->encoding);
1529         update_noise_reduction(s);
1530     }
1531
1532     return 0;
1533 }
1534
1535 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1536                           const AVFrame *pic_arg, int *got_packet)
1537 {
1538     MpegEncContext *s = avctx->priv_data;
1539     int i, stuffing_count, ret;
1540     int context_count = s->slice_context_count;
1541
1542     s->picture_in_gop_number++;
1543
1544     if (load_input_picture(s, pic_arg) < 0)
1545         return -1;
1546
1547     if (select_input_picture(s) < 0) {
1548         return -1;
1549     }
1550
1551     /* output? */
1552     if (s->new_picture.f->data[0]) {
1553         if (!pkt->data &&
1554             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1555             return ret;
1556         if (s->mb_info) {
1557             s->mb_info_ptr = av_packet_new_side_data(pkt,
1558                                  AV_PKT_DATA_H263_MB_INFO,
1559                                  s->mb_width*s->mb_height*12);
1560             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1561         }
1562
1563         for (i = 0; i < context_count; i++) {
1564             int start_y = s->thread_context[i]->start_mb_y;
1565             int   end_y = s->thread_context[i]->  end_mb_y;
1566             int h       = s->mb_height;
1567             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1568             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1569
1570             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1571         }
1572
1573         s->pict_type = s->new_picture.f->pict_type;
1574         //emms_c();
1575         ret = frame_start(s);
1576         if (ret < 0)
1577             return ret;
1578 vbv_retry:
1579         if (encode_picture(s, s->picture_number) < 0)
1580             return -1;
1581
1582         avctx->header_bits = s->header_bits;
1583         avctx->mv_bits     = s->mv_bits;
1584         avctx->misc_bits   = s->misc_bits;
1585         avctx->i_tex_bits  = s->i_tex_bits;
1586         avctx->p_tex_bits  = s->p_tex_bits;
1587         avctx->i_count     = s->i_count;
1588         // FIXME f/b_count in avctx
1589         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1590         avctx->skip_count  = s->skip_count;
1591
1592         frame_end(s);
1593
1594         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1595             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1596
1597         if (avctx->rc_buffer_size) {
1598             RateControlContext *rcc = &s->rc_context;
1599             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1600
1601             if (put_bits_count(&s->pb) > max_size &&
1602                 s->lambda < s->avctx->lmax) {
1603                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1604                                        (s->qscale + 1) / s->qscale);
1605                 if (s->adaptive_quant) {
1606                     int i;
1607                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1608                         s->lambda_table[i] =
1609                             FFMAX(s->lambda_table[i] + 1,
1610                                   s->lambda_table[i] * (s->qscale + 1) /
1611                                   s->qscale);
1612                 }
1613                 s->mb_skipped = 0;        // done in frame_start()
1614                 // done in encode_picture() so we must undo it
1615                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1616                     if (s->flipflop_rounding          ||
1617                         s->codec_id == AV_CODEC_ID_H263P ||
1618                         s->codec_id == AV_CODEC_ID_MPEG4)
1619                         s->no_rounding ^= 1;
1620                 }
1621                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1622                     s->time_base       = s->last_time_base;
1623                     s->last_non_b_time = s->time - s->pp_time;
1624                 }
1625                 for (i = 0; i < context_count; i++) {
1626                     PutBitContext *pb = &s->thread_context[i]->pb;
1627                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1628                 }
1629                 goto vbv_retry;
1630             }
1631
1632             assert(s->avctx->rc_max_rate);
1633         }
1634
1635         if (s->flags & CODEC_FLAG_PASS1)
1636             ff_write_pass1_stats(s);
1637
1638         for (i = 0; i < 4; i++) {
1639             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1640             avctx->error[i] += s->current_picture_ptr->f->error[i];
1641         }
1642
1643         if (s->flags & CODEC_FLAG_PASS1)
1644             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1645                    avctx->i_tex_bits + avctx->p_tex_bits ==
1646                        put_bits_count(&s->pb));
1647         flush_put_bits(&s->pb);
1648         s->frame_bits  = put_bits_count(&s->pb);
1649
1650         stuffing_count = ff_vbv_update(s, s->frame_bits);
1651         if (stuffing_count) {
1652             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1653                     stuffing_count + 50) {
1654                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1655                 return -1;
1656             }
1657
1658             switch (s->codec_id) {
1659             case AV_CODEC_ID_MPEG1VIDEO:
1660             case AV_CODEC_ID_MPEG2VIDEO:
1661                 while (stuffing_count--) {
1662                     put_bits(&s->pb, 8, 0);
1663                 }
1664             break;
1665             case AV_CODEC_ID_MPEG4:
1666                 put_bits(&s->pb, 16, 0);
1667                 put_bits(&s->pb, 16, 0x1C3);
1668                 stuffing_count -= 4;
1669                 while (stuffing_count--) {
1670                     put_bits(&s->pb, 8, 0xFF);
1671                 }
1672             break;
1673             default:
1674                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1675             }
1676             flush_put_bits(&s->pb);
1677             s->frame_bits  = put_bits_count(&s->pb);
1678         }
1679
1680         /* update mpeg1/2 vbv_delay for CBR */
1681         if (s->avctx->rc_max_rate                          &&
1682             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1683             s->out_format == FMT_MPEG1                     &&
1684             90000LL * (avctx->rc_buffer_size - 1) <=
1685                 s->avctx->rc_max_rate * 0xFFFFLL) {
1686             int vbv_delay, min_delay;
1687             double inbits  = s->avctx->rc_max_rate *
1688                              av_q2d(s->avctx->time_base);
1689             int    minbits = s->frame_bits - 8 *
1690                              (s->vbv_delay_ptr - s->pb.buf - 1);
1691             double bits    = s->rc_context.buffer_index + minbits - inbits;
1692
1693             if (bits < 0)
1694                 av_log(s->avctx, AV_LOG_ERROR,
1695                        "Internal error, negative bits\n");
1696
1697             assert(s->repeat_first_field == 0);
1698
1699             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1700             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1701                         s->avctx->rc_max_rate;
1702
1703             vbv_delay = FFMAX(vbv_delay, min_delay);
1704
1705             assert(vbv_delay < 0xFFFF);
1706
1707             s->vbv_delay_ptr[0] &= 0xF8;
1708             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1709             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1710             s->vbv_delay_ptr[2] &= 0x07;
1711             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1712             avctx->vbv_delay     = vbv_delay * 300;
1713         }
1714         s->total_bits     += s->frame_bits;
1715         avctx->frame_bits  = s->frame_bits;
1716
1717         pkt->pts = s->current_picture.f->pts;
1718         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1719             if (!s->current_picture.f->coded_picture_number)
1720                 pkt->dts = pkt->pts - s->dts_delta;
1721             else
1722                 pkt->dts = s->reordered_pts;
1723             s->reordered_pts = pkt->pts;
1724         } else
1725             pkt->dts = pkt->pts;
1726         if (s->current_picture.f->key_frame)
1727             pkt->flags |= AV_PKT_FLAG_KEY;
1728         if (s->mb_info)
1729             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1730     } else {
1731         s->frame_bits = 0;
1732     }
1733     assert((s->frame_bits & 7) == 0);
1734
1735     pkt->size = s->frame_bits / 8;
1736     *got_packet = !!pkt->size;
1737     return 0;
1738 }
1739
1740 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1741                                                 int n, int threshold)
1742 {
1743     static const char tab[64] = {
1744         3, 2, 2, 1, 1, 1, 1, 1,
1745         1, 1, 1, 1, 1, 1, 1, 1,
1746         1, 1, 1, 1, 1, 1, 1, 1,
1747         0, 0, 0, 0, 0, 0, 0, 0,
1748         0, 0, 0, 0, 0, 0, 0, 0,
1749         0, 0, 0, 0, 0, 0, 0, 0,
1750         0, 0, 0, 0, 0, 0, 0, 0,
1751         0, 0, 0, 0, 0, 0, 0, 0
1752     };
1753     int score = 0;
1754     int run = 0;
1755     int i;
1756     int16_t *block = s->block[n];
1757     const int last_index = s->block_last_index[n];
1758     int skip_dc;
1759
1760     if (threshold < 0) {
1761         skip_dc = 0;
1762         threshold = -threshold;
1763     } else
1764         skip_dc = 1;
1765
1766     /* Are all we could set to zero already zero? */
1767     if (last_index <= skip_dc - 1)
1768         return;
1769
1770     for (i = 0; i <= last_index; i++) {
1771         const int j = s->intra_scantable.permutated[i];
1772         const int level = FFABS(block[j]);
1773         if (level == 1) {
1774             if (skip_dc && i == 0)
1775                 continue;
1776             score += tab[run];
1777             run = 0;
1778         } else if (level > 1) {
1779             return;
1780         } else {
1781             run++;
1782         }
1783     }
1784     if (score >= threshold)
1785         return;
1786     for (i = skip_dc; i <= last_index; i++) {
1787         const int j = s->intra_scantable.permutated[i];
1788         block[j] = 0;
1789     }
1790     if (block[0])
1791         s->block_last_index[n] = 0;
1792     else
1793         s->block_last_index[n] = -1;
1794 }
1795
1796 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1797                                int last_index)
1798 {
1799     int i;
1800     const int maxlevel = s->max_qcoeff;
1801     const int minlevel = s->min_qcoeff;
1802     int overflow = 0;
1803
1804     if (s->mb_intra) {
1805         i = 1; // skip clipping of intra dc
1806     } else
1807         i = 0;
1808
1809     for (; i <= last_index; i++) {
1810         const int j = s->intra_scantable.permutated[i];
1811         int level = block[j];
1812
1813         if (level > maxlevel) {
1814             level = maxlevel;
1815             overflow++;
1816         } else if (level < minlevel) {
1817             level = minlevel;
1818             overflow++;
1819         }
1820
1821         block[j] = level;
1822     }
1823
1824     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1825         av_log(s->avctx, AV_LOG_INFO,
1826                "warning, clipping %d dct coefficients to %d..%d\n",
1827                overflow, minlevel, maxlevel);
1828 }
1829
1830 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1831 {
1832     int x, y;
1833     // FIXME optimize
1834     for (y = 0; y < 8; y++) {
1835         for (x = 0; x < 8; x++) {
1836             int x2, y2;
1837             int sum = 0;
1838             int sqr = 0;
1839             int count = 0;
1840
1841             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1842                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1843                     int v = ptr[x2 + y2 * stride];
1844                     sum += v;
1845                     sqr += v * v;
1846                     count++;
1847                 }
1848             }
1849             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1850         }
1851     }
1852 }
1853
1854 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1855                                                 int motion_x, int motion_y,
1856                                                 int mb_block_height,
1857                                                 int mb_block_count)
1858 {
1859     int16_t weight[8][64];
1860     int16_t orig[8][64];
1861     const int mb_x = s->mb_x;
1862     const int mb_y = s->mb_y;
1863     int i;
1864     int skip_dct[8];
1865     int dct_offset = s->linesize * 8; // default for progressive frames
1866     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1867     ptrdiff_t wrap_y, wrap_c;
1868
1869     for (i = 0; i < mb_block_count; i++)
1870         skip_dct[i] = s->skipdct;
1871
1872     if (s->adaptive_quant) {
1873         const int last_qp = s->qscale;
1874         const int mb_xy = mb_x + mb_y * s->mb_stride;
1875
1876         s->lambda = s->lambda_table[mb_xy];
1877         update_qscale(s);
1878
1879         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1880             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1881             s->dquant = s->qscale - last_qp;
1882
1883             if (s->out_format == FMT_H263) {
1884                 s->dquant = av_clip(s->dquant, -2, 2);
1885
1886                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1887                     if (!s->mb_intra) {
1888                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1889                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1890                                 s->dquant = 0;
1891                         }
1892                         if (s->mv_type == MV_TYPE_8X8)
1893                             s->dquant = 0;
1894                     }
1895                 }
1896             }
1897         }
1898         ff_set_qscale(s, last_qp + s->dquant);
1899     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1900         ff_set_qscale(s, s->qscale + s->dquant);
1901
1902     wrap_y = s->linesize;
1903     wrap_c = s->uvlinesize;
1904     ptr_y  = s->new_picture.f->data[0] +
1905              (mb_y * 16 * wrap_y)              + mb_x * 16;
1906     ptr_cb = s->new_picture.f->data[1] +
1907              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1908     ptr_cr = s->new_picture.f->data[2] +
1909              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1910
1911     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1912         uint8_t *ebuf = s->edge_emu_buffer + 32;
1913         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1914                                  wrap_y, wrap_y,
1915                                  16, 16, mb_x * 16, mb_y * 16,
1916                                  s->width, s->height);
1917         ptr_y = ebuf;
1918         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1919                                  wrap_c, wrap_c,
1920                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1921                                  s->width >> 1, s->height >> 1);
1922         ptr_cb = ebuf + 18 * wrap_y;
1923         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1924                                  wrap_c, wrap_c,
1925                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1926                                  s->width >> 1, s->height >> 1);
1927         ptr_cr = ebuf + 18 * wrap_y + 8;
1928     }
1929
1930     if (s->mb_intra) {
1931         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1932             int progressive_score, interlaced_score;
1933
1934             s->interlaced_dct = 0;
1935             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
1936                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1937                                                      NULL, wrap_y, 8) - 400;
1938
1939             if (progressive_score > 0) {
1940                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
1941                                                         NULL, wrap_y * 2, 8) +
1942                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
1943                                                         NULL, wrap_y * 2, 8);
1944                 if (progressive_score > interlaced_score) {
1945                     s->interlaced_dct = 1;
1946
1947                     dct_offset = wrap_y;
1948                     wrap_y <<= 1;
1949                     if (s->chroma_format == CHROMA_422)
1950                         wrap_c <<= 1;
1951                 }
1952             }
1953         }
1954
1955         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
1956         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
1957         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
1958         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
1959
1960         if (s->flags & CODEC_FLAG_GRAY) {
1961             skip_dct[4] = 1;
1962             skip_dct[5] = 1;
1963         } else {
1964             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1965             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1966             if (!s->chroma_y_shift) { /* 422 */
1967                 s->pdsp.get_pixels(s->block[6],
1968                                    ptr_cb + (dct_offset >> 1), wrap_c);
1969                 s->pdsp.get_pixels(s->block[7],
1970                                    ptr_cr + (dct_offset >> 1), wrap_c);
1971             }
1972         }
1973     } else {
1974         op_pixels_func (*op_pix)[4];
1975         qpel_mc_func (*op_qpix)[16];
1976         uint8_t *dest_y, *dest_cb, *dest_cr;
1977
1978         dest_y  = s->dest[0];
1979         dest_cb = s->dest[1];
1980         dest_cr = s->dest[2];
1981
1982         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1983             op_pix  = s->hdsp.put_pixels_tab;
1984             op_qpix = s->qdsp.put_qpel_pixels_tab;
1985         } else {
1986             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1987             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
1988         }
1989
1990         if (s->mv_dir & MV_DIR_FORWARD) {
1991             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
1992                           s->last_picture.f->data,
1993                           op_pix, op_qpix);
1994             op_pix  = s->hdsp.avg_pixels_tab;
1995             op_qpix = s->qdsp.avg_qpel_pixels_tab;
1996         }
1997         if (s->mv_dir & MV_DIR_BACKWARD) {
1998             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
1999                           s->next_picture.f->data,
2000                           op_pix, op_qpix);
2001         }
2002
2003         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2004             int progressive_score, interlaced_score;
2005
2006             s->interlaced_dct = 0;
2007             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2008                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2009                                                      ptr_y + wrap_y * 8,
2010                                                      wrap_y, 8) - 400;
2011
2012             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2013                 progressive_score -= 400;
2014
2015             if (progressive_score > 0) {
2016                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2017                                                         wrap_y * 2, 8) +
2018                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2019                                                         ptr_y + wrap_y,
2020                                                         wrap_y * 2, 8);
2021
2022                 if (progressive_score > interlaced_score) {
2023                     s->interlaced_dct = 1;
2024
2025                     dct_offset = wrap_y;
2026                     wrap_y <<= 1;
2027                     if (s->chroma_format == CHROMA_422)
2028                         wrap_c <<= 1;
2029                 }
2030             }
2031         }
2032
2033         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2034         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2035         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2036                             dest_y + dct_offset, wrap_y);
2037         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2038                             dest_y + dct_offset + 8, wrap_y);
2039
2040         if (s->flags & CODEC_FLAG_GRAY) {
2041             skip_dct[4] = 1;
2042             skip_dct[5] = 1;
2043         } else {
2044             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2045             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2046             if (!s->chroma_y_shift) { /* 422 */
2047                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2048                                     dest_cb + (dct_offset >> 1), wrap_c);
2049                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2050                                     dest_cr + (dct_offset >> 1), wrap_c);
2051             }
2052         }
2053         /* pre quantization */
2054         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2055                 2 * s->qscale * s->qscale) {
2056             // FIXME optimize
2057             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2058                 skip_dct[0] = 1;
2059             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2060                 skip_dct[1] = 1;
2061             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2062                                wrap_y, 8) < 20 * s->qscale)
2063                 skip_dct[2] = 1;
2064             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2065                                wrap_y, 8) < 20 * s->qscale)
2066                 skip_dct[3] = 1;
2067             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2068                 skip_dct[4] = 1;
2069             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2070                 skip_dct[5] = 1;
2071             if (!s->chroma_y_shift) { /* 422 */
2072                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2073                                    dest_cb + (dct_offset >> 1),
2074                                    wrap_c, 8) < 20 * s->qscale)
2075                     skip_dct[6] = 1;
2076                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2077                                    dest_cr + (dct_offset >> 1),
2078                                    wrap_c, 8) < 20 * s->qscale)
2079                     skip_dct[7] = 1;
2080             }
2081         }
2082     }
2083
2084     if (s->quantizer_noise_shaping) {
2085         if (!skip_dct[0])
2086             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2087         if (!skip_dct[1])
2088             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2089         if (!skip_dct[2])
2090             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2091         if (!skip_dct[3])
2092             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2093         if (!skip_dct[4])
2094             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2095         if (!skip_dct[5])
2096             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2097         if (!s->chroma_y_shift) { /* 422 */
2098             if (!skip_dct[6])
2099                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2100                                   wrap_c);
2101             if (!skip_dct[7])
2102                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2103                                   wrap_c);
2104         }
2105         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2106     }
2107
2108     /* DCT & quantize */
2109     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2110     {
2111         for (i = 0; i < mb_block_count; i++) {
2112             if (!skip_dct[i]) {
2113                 int overflow;
2114                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2115                 // FIXME we could decide to change to quantizer instead of
2116                 // clipping
2117                 // JS: I don't think that would be a good idea it could lower
2118                 //     quality instead of improve it. Just INTRADC clipping
2119                 //     deserves changes in quantizer
2120                 if (overflow)
2121                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2122             } else
2123                 s->block_last_index[i] = -1;
2124         }
2125         if (s->quantizer_noise_shaping) {
2126             for (i = 0; i < mb_block_count; i++) {
2127                 if (!skip_dct[i]) {
2128                     s->block_last_index[i] =
2129                         dct_quantize_refine(s, s->block[i], weight[i],
2130                                             orig[i], i, s->qscale);
2131                 }
2132             }
2133         }
2134
2135         if (s->luma_elim_threshold && !s->mb_intra)
2136             for (i = 0; i < 4; i++)
2137                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2138         if (s->chroma_elim_threshold && !s->mb_intra)
2139             for (i = 4; i < mb_block_count; i++)
2140                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2141
2142         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2143             for (i = 0; i < mb_block_count; i++) {
2144                 if (s->block_last_index[i] == -1)
2145                     s->coded_score[i] = INT_MAX / 256;
2146             }
2147         }
2148     }
2149
2150     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2151         s->block_last_index[4] =
2152         s->block_last_index[5] = 0;
2153         s->block[4][0] =
2154         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2155     }
2156
2157     // non c quantize code returns incorrect block_last_index FIXME
2158     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2159         for (i = 0; i < mb_block_count; i++) {
2160             int j;
2161             if (s->block_last_index[i] > 0) {
2162                 for (j = 63; j > 0; j--) {
2163                     if (s->block[i][s->intra_scantable.permutated[j]])
2164                         break;
2165                 }
2166                 s->block_last_index[i] = j;
2167             }
2168         }
2169     }
2170
2171     /* huffman encode */
2172     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2173     case AV_CODEC_ID_MPEG1VIDEO:
2174     case AV_CODEC_ID_MPEG2VIDEO:
2175         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2176             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2177         break;
2178     case AV_CODEC_ID_MPEG4:
2179         if (CONFIG_MPEG4_ENCODER)
2180             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2181         break;
2182     case AV_CODEC_ID_MSMPEG4V2:
2183     case AV_CODEC_ID_MSMPEG4V3:
2184     case AV_CODEC_ID_WMV1:
2185         if (CONFIG_MSMPEG4_ENCODER)
2186             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2187         break;
2188     case AV_CODEC_ID_WMV2:
2189         if (CONFIG_WMV2_ENCODER)
2190             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2191         break;
2192     case AV_CODEC_ID_H261:
2193         if (CONFIG_H261_ENCODER)
2194             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2195         break;
2196     case AV_CODEC_ID_H263:
2197     case AV_CODEC_ID_H263P:
2198     case AV_CODEC_ID_FLV1:
2199     case AV_CODEC_ID_RV10:
2200     case AV_CODEC_ID_RV20:
2201         if (CONFIG_H263_ENCODER)
2202             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2203         break;
2204     case AV_CODEC_ID_MJPEG:
2205         if (CONFIG_MJPEG_ENCODER)
2206             ff_mjpeg_encode_mb(s, s->block);
2207         break;
2208     default:
2209         assert(0);
2210     }
2211 }
2212
2213 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2214 {
2215     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2216     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2217 }
2218
2219 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2220     int i;
2221
2222     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2223
2224     /* mpeg1 */
2225     d->mb_skip_run= s->mb_skip_run;
2226     for(i=0; i<3; i++)
2227         d->last_dc[i] = s->last_dc[i];
2228
2229     /* statistics */
2230     d->mv_bits= s->mv_bits;
2231     d->i_tex_bits= s->i_tex_bits;
2232     d->p_tex_bits= s->p_tex_bits;
2233     d->i_count= s->i_count;
2234     d->f_count= s->f_count;
2235     d->b_count= s->b_count;
2236     d->skip_count= s->skip_count;
2237     d->misc_bits= s->misc_bits;
2238     d->last_bits= 0;
2239
2240     d->mb_skipped= 0;
2241     d->qscale= s->qscale;
2242     d->dquant= s->dquant;
2243
2244     d->esc3_level_length= s->esc3_level_length;
2245 }
2246
2247 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2248     int i;
2249
2250     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2251     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2252
2253     /* mpeg1 */
2254     d->mb_skip_run= s->mb_skip_run;
2255     for(i=0; i<3; i++)
2256         d->last_dc[i] = s->last_dc[i];
2257
2258     /* statistics */
2259     d->mv_bits= s->mv_bits;
2260     d->i_tex_bits= s->i_tex_bits;
2261     d->p_tex_bits= s->p_tex_bits;
2262     d->i_count= s->i_count;
2263     d->f_count= s->f_count;
2264     d->b_count= s->b_count;
2265     d->skip_count= s->skip_count;
2266     d->misc_bits= s->misc_bits;
2267
2268     d->mb_intra= s->mb_intra;
2269     d->mb_skipped= s->mb_skipped;
2270     d->mv_type= s->mv_type;
2271     d->mv_dir= s->mv_dir;
2272     d->pb= s->pb;
2273     if(s->data_partitioning){
2274         d->pb2= s->pb2;
2275         d->tex_pb= s->tex_pb;
2276     }
2277     d->block= s->block;
2278     for(i=0; i<8; i++)
2279         d->block_last_index[i]= s->block_last_index[i];
2280     d->interlaced_dct= s->interlaced_dct;
2281     d->qscale= s->qscale;
2282
2283     d->esc3_level_length= s->esc3_level_length;
2284 }
2285
2286 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2287                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2288                            int *dmin, int *next_block, int motion_x, int motion_y)
2289 {
2290     int score;
2291     uint8_t *dest_backup[3];
2292
2293     copy_context_before_encode(s, backup, type);
2294
2295     s->block= s->blocks[*next_block];
2296     s->pb= pb[*next_block];
2297     if(s->data_partitioning){
2298         s->pb2   = pb2   [*next_block];
2299         s->tex_pb= tex_pb[*next_block];
2300     }
2301
2302     if(*next_block){
2303         memcpy(dest_backup, s->dest, sizeof(s->dest));
2304         s->dest[0] = s->rd_scratchpad;
2305         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2306         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2307         assert(s->linesize >= 32); //FIXME
2308     }
2309
2310     encode_mb(s, motion_x, motion_y);
2311
2312     score= put_bits_count(&s->pb);
2313     if(s->data_partitioning){
2314         score+= put_bits_count(&s->pb2);
2315         score+= put_bits_count(&s->tex_pb);
2316     }
2317
2318     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2319         ff_mpv_decode_mb(s, s->block);
2320
2321         score *= s->lambda2;
2322         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2323     }
2324
2325     if(*next_block){
2326         memcpy(s->dest, dest_backup, sizeof(s->dest));
2327     }
2328
2329     if(score<*dmin){
2330         *dmin= score;
2331         *next_block^=1;
2332
2333         copy_context_after_encode(best, s, type);
2334     }
2335 }
2336
2337 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2338     uint32_t *sq = ff_square_tab + 256;
2339     int acc=0;
2340     int x,y;
2341
2342     if(w==16 && h==16)
2343         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2344     else if(w==8 && h==8)
2345         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2346
2347     for(y=0; y<h; y++){
2348         for(x=0; x<w; x++){
2349             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2350         }
2351     }
2352
2353     assert(acc>=0);
2354
2355     return acc;
2356 }
2357
2358 static int sse_mb(MpegEncContext *s){
2359     int w= 16;
2360     int h= 16;
2361
2362     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2363     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2364
2365     if(w==16 && h==16)
2366       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2367         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2368                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2369                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2370       }else{
2371         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2372                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2373                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2374       }
2375     else
2376         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2377                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2378                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2379 }
2380
2381 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2382     MpegEncContext *s= *(void**)arg;
2383
2384
2385     s->me.pre_pass=1;
2386     s->me.dia_size= s->avctx->pre_dia_size;
2387     s->first_slice_line=1;
2388     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2389         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2390             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2391         }
2392         s->first_slice_line=0;
2393     }
2394
2395     s->me.pre_pass=0;
2396
2397     return 0;
2398 }
2399
2400 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2401     MpegEncContext *s= *(void**)arg;
2402
2403     s->me.dia_size= s->avctx->dia_size;
2404     s->first_slice_line=1;
2405     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2406         s->mb_x=0; //for block init below
2407         ff_init_block_index(s);
2408         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2409             s->block_index[0]+=2;
2410             s->block_index[1]+=2;
2411             s->block_index[2]+=2;
2412             s->block_index[3]+=2;
2413
2414             /* compute motion vector & mb_type and store in context */
2415             if(s->pict_type==AV_PICTURE_TYPE_B)
2416                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2417             else
2418                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2419         }
2420         s->first_slice_line=0;
2421     }
2422     return 0;
2423 }
2424
2425 static int mb_var_thread(AVCodecContext *c, void *arg){
2426     MpegEncContext *s= *(void**)arg;
2427     int mb_x, mb_y;
2428
2429     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2430         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2431             int xx = mb_x * 16;
2432             int yy = mb_y * 16;
2433             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2434             int varc;
2435             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2436
2437             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2438                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2439
2440             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2441             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2442             s->me.mb_var_sum_temp    += varc;
2443         }
2444     }
2445     return 0;
2446 }
2447
2448 static void write_slice_end(MpegEncContext *s){
2449     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2450         if(s->partitioned_frame){
2451             ff_mpeg4_merge_partitions(s);
2452         }
2453
2454         ff_mpeg4_stuffing(&s->pb);
2455     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2456         ff_mjpeg_encode_stuffing(&s->pb);
2457     }
2458
2459     avpriv_align_put_bits(&s->pb);
2460     flush_put_bits(&s->pb);
2461
2462     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2463         s->misc_bits+= get_bits_diff(s);
2464 }
2465
2466 static void write_mb_info(MpegEncContext *s)
2467 {
2468     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2469     int offset = put_bits_count(&s->pb);
2470     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2471     int gobn = s->mb_y / s->gob_index;
2472     int pred_x, pred_y;
2473     if (CONFIG_H263_ENCODER)
2474         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2475     bytestream_put_le32(&ptr, offset);
2476     bytestream_put_byte(&ptr, s->qscale);
2477     bytestream_put_byte(&ptr, gobn);
2478     bytestream_put_le16(&ptr, mba);
2479     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2480     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2481     /* 4MV not implemented */
2482     bytestream_put_byte(&ptr, 0); /* hmv2 */
2483     bytestream_put_byte(&ptr, 0); /* vmv2 */
2484 }
2485
2486 static void update_mb_info(MpegEncContext *s, int startcode)
2487 {
2488     if (!s->mb_info)
2489         return;
2490     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2491         s->mb_info_size += 12;
2492         s->prev_mb_info = s->last_mb_info;
2493     }
2494     if (startcode) {
2495         s->prev_mb_info = put_bits_count(&s->pb)/8;
2496         /* This might have incremented mb_info_size above, and we return without
2497          * actually writing any info into that slot yet. But in that case,
2498          * this will be called again at the start of the after writing the
2499          * start code, actually writing the mb info. */
2500         return;
2501     }
2502
2503     s->last_mb_info = put_bits_count(&s->pb)/8;
2504     if (!s->mb_info_size)
2505         s->mb_info_size += 12;
2506     write_mb_info(s);
2507 }
2508
2509 static int encode_thread(AVCodecContext *c, void *arg){
2510     MpegEncContext *s= *(void**)arg;
2511     int mb_x, mb_y, pdif = 0;
2512     int chr_h= 16>>s->chroma_y_shift;
2513     int i, j;
2514     MpegEncContext best_s, backup_s;
2515     uint8_t bit_buf[2][MAX_MB_BYTES];
2516     uint8_t bit_buf2[2][MAX_MB_BYTES];
2517     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2518     PutBitContext pb[2], pb2[2], tex_pb[2];
2519
2520     for(i=0; i<2; i++){
2521         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2522         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2523         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2524     }
2525
2526     s->last_bits= put_bits_count(&s->pb);
2527     s->mv_bits=0;
2528     s->misc_bits=0;
2529     s->i_tex_bits=0;
2530     s->p_tex_bits=0;
2531     s->i_count=0;
2532     s->f_count=0;
2533     s->b_count=0;
2534     s->skip_count=0;
2535
2536     for(i=0; i<3; i++){
2537         /* init last dc values */
2538         /* note: quant matrix value (8) is implied here */
2539         s->last_dc[i] = 128 << s->intra_dc_precision;
2540
2541         s->current_picture.f->error[i] = 0;
2542     }
2543     s->mb_skip_run = 0;
2544     memset(s->last_mv, 0, sizeof(s->last_mv));
2545
2546     s->last_mv_dir = 0;
2547
2548     switch(s->codec_id){
2549     case AV_CODEC_ID_H263:
2550     case AV_CODEC_ID_H263P:
2551     case AV_CODEC_ID_FLV1:
2552         if (CONFIG_H263_ENCODER)
2553             s->gob_index = ff_h263_get_gob_height(s);
2554         break;
2555     case AV_CODEC_ID_MPEG4:
2556         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2557             ff_mpeg4_init_partitions(s);
2558         break;
2559     }
2560
2561     s->resync_mb_x=0;
2562     s->resync_mb_y=0;
2563     s->first_slice_line = 1;
2564     s->ptr_lastgob = s->pb.buf;
2565     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2566         s->mb_x=0;
2567         s->mb_y= mb_y;
2568
2569         ff_set_qscale(s, s->qscale);
2570         ff_init_block_index(s);
2571
2572         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2573             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2574             int mb_type= s->mb_type[xy];
2575 //            int d;
2576             int dmin= INT_MAX;
2577             int dir;
2578
2579             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2580                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2581                 return -1;
2582             }
2583             if(s->data_partitioning){
2584                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2585                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2586                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2587                     return -1;
2588                 }
2589             }
2590
2591             s->mb_x = mb_x;
2592             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2593             ff_update_block_index(s);
2594
2595             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2596                 ff_h261_reorder_mb_index(s);
2597                 xy= s->mb_y*s->mb_stride + s->mb_x;
2598                 mb_type= s->mb_type[xy];
2599             }
2600
2601             /* write gob / video packet header  */
2602             if(s->rtp_mode){
2603                 int current_packet_size, is_gob_start;
2604
2605                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2606
2607                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2608
2609                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2610
2611                 switch(s->codec_id){
2612                 case AV_CODEC_ID_H263:
2613                 case AV_CODEC_ID_H263P:
2614                     if(!s->h263_slice_structured)
2615                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2616                     break;
2617                 case AV_CODEC_ID_MPEG2VIDEO:
2618                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2619                 case AV_CODEC_ID_MPEG1VIDEO:
2620                     if(s->mb_skip_run) is_gob_start=0;
2621                     break;
2622                 }
2623
2624                 if(is_gob_start){
2625                     if(s->start_mb_y != mb_y || mb_x!=0){
2626                         write_slice_end(s);
2627
2628                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2629                             ff_mpeg4_init_partitions(s);
2630                         }
2631                     }
2632
2633                     assert((put_bits_count(&s->pb)&7) == 0);
2634                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2635
2636                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2637                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2638                         int d = 100 / s->error_rate;
2639                         if(r % d == 0){
2640                             current_packet_size=0;
2641                             s->pb.buf_ptr= s->ptr_lastgob;
2642                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2643                         }
2644                     }
2645
2646                     if (s->avctx->rtp_callback){
2647                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2648                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2649                     }
2650                     update_mb_info(s, 1);
2651
2652                     switch(s->codec_id){
2653                     case AV_CODEC_ID_MPEG4:
2654                         if (CONFIG_MPEG4_ENCODER) {
2655                             ff_mpeg4_encode_video_packet_header(s);
2656                             ff_mpeg4_clean_buffers(s);
2657                         }
2658                     break;
2659                     case AV_CODEC_ID_MPEG1VIDEO:
2660                     case AV_CODEC_ID_MPEG2VIDEO:
2661                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2662                             ff_mpeg1_encode_slice_header(s);
2663                             ff_mpeg1_clean_buffers(s);
2664                         }
2665                     break;
2666                     case AV_CODEC_ID_H263:
2667                     case AV_CODEC_ID_H263P:
2668                         if (CONFIG_H263_ENCODER)
2669                             ff_h263_encode_gob_header(s, mb_y);
2670                     break;
2671                     }
2672
2673                     if(s->flags&CODEC_FLAG_PASS1){
2674                         int bits= put_bits_count(&s->pb);
2675                         s->misc_bits+= bits - s->last_bits;
2676                         s->last_bits= bits;
2677                     }
2678
2679                     s->ptr_lastgob += current_packet_size;
2680                     s->first_slice_line=1;
2681                     s->resync_mb_x=mb_x;
2682                     s->resync_mb_y=mb_y;
2683                 }
2684             }
2685
2686             if(  (s->resync_mb_x   == s->mb_x)
2687                && s->resync_mb_y+1 == s->mb_y){
2688                 s->first_slice_line=0;
2689             }
2690
2691             s->mb_skipped=0;
2692             s->dquant=0; //only for QP_RD
2693
2694             update_mb_info(s, 0);
2695
2696             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2697                 int next_block=0;
2698                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2699
2700                 copy_context_before_encode(&backup_s, s, -1);
2701                 backup_s.pb= s->pb;
2702                 best_s.data_partitioning= s->data_partitioning;
2703                 best_s.partitioned_frame= s->partitioned_frame;
2704                 if(s->data_partitioning){
2705                     backup_s.pb2= s->pb2;
2706                     backup_s.tex_pb= s->tex_pb;
2707                 }
2708
2709                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2710                     s->mv_dir = MV_DIR_FORWARD;
2711                     s->mv_type = MV_TYPE_16X16;
2712                     s->mb_intra= 0;
2713                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2714                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2715                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2716                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2717                 }
2718                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2719                     s->mv_dir = MV_DIR_FORWARD;
2720                     s->mv_type = MV_TYPE_FIELD;
2721                     s->mb_intra= 0;
2722                     for(i=0; i<2; i++){
2723                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2724                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2725                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2726                     }
2727                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2728                                  &dmin, &next_block, 0, 0);
2729                 }
2730                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2731                     s->mv_dir = MV_DIR_FORWARD;
2732                     s->mv_type = MV_TYPE_16X16;
2733                     s->mb_intra= 0;
2734                     s->mv[0][0][0] = 0;
2735                     s->mv[0][0][1] = 0;
2736                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2737                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2738                 }
2739                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2740                     s->mv_dir = MV_DIR_FORWARD;
2741                     s->mv_type = MV_TYPE_8X8;
2742                     s->mb_intra= 0;
2743                     for(i=0; i<4; i++){
2744                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2745                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2746                     }
2747                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2748                                  &dmin, &next_block, 0, 0);
2749                 }
2750                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2751                     s->mv_dir = MV_DIR_FORWARD;
2752                     s->mv_type = MV_TYPE_16X16;
2753                     s->mb_intra= 0;
2754                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2755                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2756                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2757                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2758                 }
2759                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2760                     s->mv_dir = MV_DIR_BACKWARD;
2761                     s->mv_type = MV_TYPE_16X16;
2762                     s->mb_intra= 0;
2763                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2764                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2765                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2766                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2767                 }
2768                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2769                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2770                     s->mv_type = MV_TYPE_16X16;
2771                     s->mb_intra= 0;
2772                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2773                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2774                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2775                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2776                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2777                                  &dmin, &next_block, 0, 0);
2778                 }
2779                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2780                     s->mv_dir = MV_DIR_FORWARD;
2781                     s->mv_type = MV_TYPE_FIELD;
2782                     s->mb_intra= 0;
2783                     for(i=0; i<2; i++){
2784                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2785                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2786                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2787                     }
2788                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2789                                  &dmin, &next_block, 0, 0);
2790                 }
2791                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2792                     s->mv_dir = MV_DIR_BACKWARD;
2793                     s->mv_type = MV_TYPE_FIELD;
2794                     s->mb_intra= 0;
2795                     for(i=0; i<2; i++){
2796                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2797                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2798                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2799                     }
2800                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2801                                  &dmin, &next_block, 0, 0);
2802                 }
2803                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2804                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2805                     s->mv_type = MV_TYPE_FIELD;
2806                     s->mb_intra= 0;
2807                     for(dir=0; dir<2; dir++){
2808                         for(i=0; i<2; i++){
2809                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2810                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2811                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2812                         }
2813                     }
2814                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2815                                  &dmin, &next_block, 0, 0);
2816                 }
2817                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2818                     s->mv_dir = 0;
2819                     s->mv_type = MV_TYPE_16X16;
2820                     s->mb_intra= 1;
2821                     s->mv[0][0][0] = 0;
2822                     s->mv[0][0][1] = 0;
2823                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2824                                  &dmin, &next_block, 0, 0);
2825                     if(s->h263_pred || s->h263_aic){
2826                         if(best_s.mb_intra)
2827                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2828                         else
2829                             ff_clean_intra_table_entries(s); //old mode?
2830                     }
2831                 }
2832
2833                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2834                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2835                         const int last_qp= backup_s.qscale;
2836                         int qpi, qp, dc[6];
2837                         int16_t ac[6][16];
2838                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2839                         static const int dquant_tab[4]={-1,1,-2,2};
2840
2841                         assert(backup_s.dquant == 0);
2842
2843                         //FIXME intra
2844                         s->mv_dir= best_s.mv_dir;
2845                         s->mv_type = MV_TYPE_16X16;
2846                         s->mb_intra= best_s.mb_intra;
2847                         s->mv[0][0][0] = best_s.mv[0][0][0];
2848                         s->mv[0][0][1] = best_s.mv[0][0][1];
2849                         s->mv[1][0][0] = best_s.mv[1][0][0];
2850                         s->mv[1][0][1] = best_s.mv[1][0][1];
2851
2852                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2853                         for(; qpi<4; qpi++){
2854                             int dquant= dquant_tab[qpi];
2855                             qp= last_qp + dquant;
2856                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2857                                 continue;
2858                             backup_s.dquant= dquant;
2859                             if(s->mb_intra && s->dc_val[0]){
2860                                 for(i=0; i<6; i++){
2861                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2862                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2863                                 }
2864                             }
2865
2866                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2867                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2868                             if(best_s.qscale != qp){
2869                                 if(s->mb_intra && s->dc_val[0]){
2870                                     for(i=0; i<6; i++){
2871                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2872                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2873                                     }
2874                                 }
2875                             }
2876                         }
2877                     }
2878                 }
2879                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2880                     int mx= s->b_direct_mv_table[xy][0];
2881                     int my= s->b_direct_mv_table[xy][1];
2882
2883                     backup_s.dquant = 0;
2884                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2885                     s->mb_intra= 0;
2886                     ff_mpeg4_set_direct_mv(s, mx, my);
2887                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2888                                  &dmin, &next_block, mx, my);
2889                 }
2890                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2891                     backup_s.dquant = 0;
2892                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2893                     s->mb_intra= 0;
2894                     ff_mpeg4_set_direct_mv(s, 0, 0);
2895                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2896                                  &dmin, &next_block, 0, 0);
2897                 }
2898                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2899                     int coded=0;
2900                     for(i=0; i<6; i++)
2901                         coded |= s->block_last_index[i];
2902                     if(coded){
2903                         int mx,my;
2904                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2905                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2906                             mx=my=0; //FIXME find the one we actually used
2907                             ff_mpeg4_set_direct_mv(s, mx, my);
2908                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2909                             mx= s->mv[1][0][0];
2910                             my= s->mv[1][0][1];
2911                         }else{
2912                             mx= s->mv[0][0][0];
2913                             my= s->mv[0][0][1];
2914                         }
2915
2916                         s->mv_dir= best_s.mv_dir;
2917                         s->mv_type = best_s.mv_type;
2918                         s->mb_intra= 0;
2919 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2920                         s->mv[0][0][1] = best_s.mv[0][0][1];
2921                         s->mv[1][0][0] = best_s.mv[1][0][0];
2922                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2923                         backup_s.dquant= 0;
2924                         s->skipdct=1;
2925                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2926                                         &dmin, &next_block, mx, my);
2927                         s->skipdct=0;
2928                     }
2929                 }
2930
2931                 s->current_picture.qscale_table[xy] = best_s.qscale;
2932
2933                 copy_context_after_encode(s, &best_s, -1);
2934
2935                 pb_bits_count= put_bits_count(&s->pb);
2936                 flush_put_bits(&s->pb);
2937                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2938                 s->pb= backup_s.pb;
2939
2940                 if(s->data_partitioning){
2941                     pb2_bits_count= put_bits_count(&s->pb2);
2942                     flush_put_bits(&s->pb2);
2943                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2944                     s->pb2= backup_s.pb2;
2945
2946                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2947                     flush_put_bits(&s->tex_pb);
2948                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2949                     s->tex_pb= backup_s.tex_pb;
2950                 }
2951                 s->last_bits= put_bits_count(&s->pb);
2952
2953                 if (CONFIG_H263_ENCODER &&
2954                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2955                     ff_h263_update_motion_val(s);
2956
2957                 if(next_block==0){ //FIXME 16 vs linesize16
2958                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2959                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2960                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2961                 }
2962
2963                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2964                     ff_mpv_decode_mb(s, s->block);
2965             } else {
2966                 int motion_x = 0, motion_y = 0;
2967                 s->mv_type=MV_TYPE_16X16;
2968                 // only one MB-Type possible
2969
2970                 switch(mb_type){
2971                 case CANDIDATE_MB_TYPE_INTRA:
2972                     s->mv_dir = 0;
2973                     s->mb_intra= 1;
2974                     motion_x= s->mv[0][0][0] = 0;
2975                     motion_y= s->mv[0][0][1] = 0;
2976                     break;
2977                 case CANDIDATE_MB_TYPE_INTER:
2978                     s->mv_dir = MV_DIR_FORWARD;
2979                     s->mb_intra= 0;
2980                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2981                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2982                     break;
2983                 case CANDIDATE_MB_TYPE_INTER_I:
2984                     s->mv_dir = MV_DIR_FORWARD;
2985                     s->mv_type = MV_TYPE_FIELD;
2986                     s->mb_intra= 0;
2987                     for(i=0; i<2; i++){
2988                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2989                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2990                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2991                     }
2992                     break;
2993                 case CANDIDATE_MB_TYPE_INTER4V:
2994                     s->mv_dir = MV_DIR_FORWARD;
2995                     s->mv_type = MV_TYPE_8X8;
2996                     s->mb_intra= 0;
2997                     for(i=0; i<4; i++){
2998                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2999                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3000                     }
3001                     break;
3002                 case CANDIDATE_MB_TYPE_DIRECT:
3003                     if (CONFIG_MPEG4_ENCODER) {
3004                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3005                         s->mb_intra= 0;
3006                         motion_x=s->b_direct_mv_table[xy][0];
3007                         motion_y=s->b_direct_mv_table[xy][1];
3008                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3009                     }
3010                     break;
3011                 case CANDIDATE_MB_TYPE_DIRECT0:
3012                     if (CONFIG_MPEG4_ENCODER) {
3013                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3014                         s->mb_intra= 0;
3015                         ff_mpeg4_set_direct_mv(s, 0, 0);
3016                     }
3017                     break;
3018                 case CANDIDATE_MB_TYPE_BIDIR:
3019                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3020                     s->mb_intra= 0;
3021                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3022                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3023                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3024                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3025                     break;
3026                 case CANDIDATE_MB_TYPE_BACKWARD:
3027                     s->mv_dir = MV_DIR_BACKWARD;
3028                     s->mb_intra= 0;
3029                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3030                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3031                     break;
3032                 case CANDIDATE_MB_TYPE_FORWARD:
3033                     s->mv_dir = MV_DIR_FORWARD;
3034                     s->mb_intra= 0;
3035                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3036                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3037                     break;
3038                 case CANDIDATE_MB_TYPE_FORWARD_I:
3039                     s->mv_dir = MV_DIR_FORWARD;
3040                     s->mv_type = MV_TYPE_FIELD;
3041                     s->mb_intra= 0;
3042                     for(i=0; i<2; i++){
3043                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3044                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3045                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3046                     }
3047                     break;
3048                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3049                     s->mv_dir = MV_DIR_BACKWARD;
3050                     s->mv_type = MV_TYPE_FIELD;
3051                     s->mb_intra= 0;
3052                     for(i=0; i<2; i++){
3053                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3054                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3055                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3056                     }
3057                     break;
3058                 case CANDIDATE_MB_TYPE_BIDIR_I:
3059                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3060                     s->mv_type = MV_TYPE_FIELD;
3061                     s->mb_intra= 0;
3062                     for(dir=0; dir<2; dir++){
3063                         for(i=0; i<2; i++){
3064                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3065                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3066                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3067                         }
3068                     }
3069                     break;
3070                 default:
3071                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3072                 }
3073
3074                 encode_mb(s, motion_x, motion_y);
3075
3076                 // RAL: Update last macroblock type
3077                 s->last_mv_dir = s->mv_dir;
3078
3079                 if (CONFIG_H263_ENCODER &&
3080                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3081                     ff_h263_update_motion_val(s);
3082
3083                 ff_mpv_decode_mb(s, s->block);
3084             }
3085
3086             /* clean the MV table in IPS frames for direct mode in B frames */
3087             if(s->mb_intra /* && I,P,S_TYPE */){
3088                 s->p_mv_table[xy][0]=0;
3089                 s->p_mv_table[xy][1]=0;
3090             }
3091
3092             if(s->flags&CODEC_FLAG_PSNR){
3093                 int w= 16;
3094                 int h= 16;
3095
3096                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3097                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3098
3099                 s->current_picture.f->error[0] += sse(
3100                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3101                     s->dest[0], w, h, s->linesize);
3102                 s->current_picture.f->error[1] += sse(
3103                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3104                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3105                 s->current_picture.f->error[2] += sse(
3106                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3107                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3108             }
3109             if(s->loop_filter){
3110                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3111                     ff_h263_loop_filter(s);
3112             }
3113             av_dlog(s->avctx, "MB %d %d bits\n",
3114                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3115         }
3116     }
3117
3118     //not beautiful here but we must write it before flushing so it has to be here
3119     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3120         ff_msmpeg4_encode_ext_header(s);
3121
3122     write_slice_end(s);
3123
3124     /* Send the last GOB if RTP */
3125     if (s->avctx->rtp_callback) {
3126         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3127         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3128         /* Call the RTP callback to send the last GOB */
3129         emms_c();
3130         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3131     }
3132
3133     return 0;
3134 }
3135
3136 #define MERGE(field) dst->field += src->field; src->field=0
3137 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3138     MERGE(me.scene_change_score);
3139     MERGE(me.mc_mb_var_sum_temp);
3140     MERGE(me.mb_var_sum_temp);
3141 }
3142
3143 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3144     int i;
3145
3146     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3147     MERGE(dct_count[1]);
3148     MERGE(mv_bits);
3149     MERGE(i_tex_bits);
3150     MERGE(p_tex_bits);
3151     MERGE(i_count);
3152     MERGE(f_count);
3153     MERGE(b_count);
3154     MERGE(skip_count);
3155     MERGE(misc_bits);
3156     MERGE(er.error_count);
3157     MERGE(padding_bug_score);
3158     MERGE(current_picture.f->error[0]);
3159     MERGE(current_picture.f->error[1]);
3160     MERGE(current_picture.f->error[2]);
3161
3162     if(dst->avctx->noise_reduction){
3163         for(i=0; i<64; i++){
3164             MERGE(dct_error_sum[0][i]);
3165             MERGE(dct_error_sum[1][i]);
3166         }
3167     }
3168
3169     assert(put_bits_count(&src->pb) % 8 ==0);
3170     assert(put_bits_count(&dst->pb) % 8 ==0);
3171     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3172     flush_put_bits(&dst->pb);
3173 }
3174
3175 static int estimate_qp(MpegEncContext *s, int dry_run){
3176     if (s->next_lambda){
3177         s->current_picture_ptr->f->quality =
3178         s->current_picture.f->quality = s->next_lambda;
3179         if(!dry_run) s->next_lambda= 0;
3180     } else if (!s->fixed_qscale) {
3181         s->current_picture_ptr->f->quality =
3182         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3183         if (s->current_picture.f->quality < 0)
3184             return -1;
3185     }
3186
3187     if(s->adaptive_quant){
3188         switch(s->codec_id){
3189         case AV_CODEC_ID_MPEG4:
3190             if (CONFIG_MPEG4_ENCODER)
3191                 ff_clean_mpeg4_qscales(s);
3192             break;
3193         case AV_CODEC_ID_H263:
3194         case AV_CODEC_ID_H263P:
3195         case AV_CODEC_ID_FLV1:
3196             if (CONFIG_H263_ENCODER)
3197                 ff_clean_h263_qscales(s);
3198             break;
3199         default:
3200             ff_init_qscale_tab(s);
3201         }
3202
3203         s->lambda= s->lambda_table[0];
3204         //FIXME broken
3205     }else
3206         s->lambda = s->current_picture.f->quality;
3207     update_qscale(s);
3208     return 0;
3209 }
3210
3211 /* must be called before writing the header */
3212 static void set_frame_distances(MpegEncContext * s){
3213     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3214     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3215
3216     if(s->pict_type==AV_PICTURE_TYPE_B){
3217         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3218         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3219     }else{
3220         s->pp_time= s->time - s->last_non_b_time;
3221         s->last_non_b_time= s->time;
3222         assert(s->picture_number==0 || s->pp_time > 0);
3223     }
3224 }
3225
3226 static int encode_picture(MpegEncContext *s, int picture_number)
3227 {
3228     int i, ret;
3229     int bits;
3230     int context_count = s->slice_context_count;
3231
3232     s->picture_number = picture_number;
3233
3234     /* Reset the average MB variance */
3235     s->me.mb_var_sum_temp    =
3236     s->me.mc_mb_var_sum_temp = 0;
3237
3238     /* we need to initialize some time vars before we can encode b-frames */
3239     // RAL: Condition added for MPEG1VIDEO
3240     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3241         set_frame_distances(s);
3242     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3243         ff_set_mpeg4_time(s);
3244
3245     s->me.scene_change_score=0;
3246
3247 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3248
3249     if(s->pict_type==AV_PICTURE_TYPE_I){
3250         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3251         else                        s->no_rounding=0;
3252     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3253         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3254             s->no_rounding ^= 1;
3255     }
3256
3257     if(s->flags & CODEC_FLAG_PASS2){
3258         if (estimate_qp(s,1) < 0)
3259             return -1;
3260         ff_get_2pass_fcode(s);
3261     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3262         if(s->pict_type==AV_PICTURE_TYPE_B)
3263             s->lambda= s->last_lambda_for[s->pict_type];
3264         else
3265             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3266         update_qscale(s);
3267     }
3268
3269     s->mb_intra=0; //for the rate distortion & bit compare functions
3270     for(i=1; i<context_count; i++){
3271         ret = ff_update_duplicate_context(s->thread_context[i], s);
3272         if (ret < 0)
3273             return ret;
3274     }
3275
3276     if(ff_init_me(s)<0)
3277         return -1;
3278
3279     /* Estimate motion for every MB */
3280     if(s->pict_type != AV_PICTURE_TYPE_I){
3281         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3282         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3283         if (s->pict_type != AV_PICTURE_TYPE_B) {
3284             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3285                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3286             }
3287         }
3288
3289         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3290     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3291         /* I-Frame */
3292         for(i=0; i<s->mb_stride*s->mb_height; i++)
3293             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3294
3295         if(!s->fixed_qscale){
3296             /* finding spatial complexity for I-frame rate control */
3297             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3298         }
3299     }
3300     for(i=1; i<context_count; i++){
3301         merge_context_after_me(s, s->thread_context[i]);
3302     }
3303     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3304     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3305     emms_c();
3306
3307     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3308         s->pict_type= AV_PICTURE_TYPE_I;
3309         for(i=0; i<s->mb_stride*s->mb_height; i++)
3310             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3311         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3312                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3313     }
3314
3315     if(!s->umvplus){
3316         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3317             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3318
3319             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3320                 int a,b;
3321                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3322                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3323                 s->f_code= FFMAX3(s->f_code, a, b);
3324             }
3325
3326             ff_fix_long_p_mvs(s);
3327             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3328             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3329                 int j;
3330                 for(i=0; i<2; i++){
3331                     for(j=0; j<2; j++)
3332                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3333                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3334                 }
3335             }
3336         }
3337
3338         if(s->pict_type==AV_PICTURE_TYPE_B){
3339             int a, b;
3340
3341             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3342             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3343             s->f_code = FFMAX(a, b);
3344
3345             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3346             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3347             s->b_code = FFMAX(a, b);
3348
3349             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3350             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3351             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3352             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3353             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3354                 int dir, j;
3355                 for(dir=0; dir<2; dir++){
3356                     for(i=0; i<2; i++){
3357                         for(j=0; j<2; j++){
3358                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3359                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3360                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3361                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3362                         }
3363                     }
3364                 }
3365             }
3366         }
3367     }
3368
3369     if (estimate_qp(s, 0) < 0)
3370         return -1;
3371
3372     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3373         s->qscale= 3; //reduce clipping problems
3374
3375     if (s->out_format == FMT_MJPEG) {
3376         /* for mjpeg, we do include qscale in the matrix */
3377         for(i=1;i<64;i++){
3378             int j = s->idsp.idct_permutation[i];
3379
3380             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3381         }
3382         s->y_dc_scale_table=
3383         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3384         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3385         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3386                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3387         s->qscale= 8;
3388     }
3389
3390     //FIXME var duplication
3391     s->current_picture_ptr->f->key_frame =
3392     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3393     s->current_picture_ptr->f->pict_type =
3394     s->current_picture.f->pict_type = s->pict_type;
3395
3396     if (s->current_picture.f->key_frame)
3397         s->picture_in_gop_number=0;
3398
3399     s->last_bits= put_bits_count(&s->pb);
3400     switch(s->out_format) {
3401     case FMT_MJPEG:
3402         if (CONFIG_MJPEG_ENCODER)
3403             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3404                                            s->intra_matrix);
3405         break;
3406     case FMT_H261:
3407         if (CONFIG_H261_ENCODER)
3408             ff_h261_encode_picture_header(s, picture_number);
3409         break;
3410     case FMT_H263:
3411         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3412             ff_wmv2_encode_picture_header(s, picture_number);
3413         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3414             ff_msmpeg4_encode_picture_header(s, picture_number);
3415         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3416             ff_mpeg4_encode_picture_header(s, picture_number);
3417         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3418             ff_rv10_encode_picture_header(s, picture_number);
3419         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3420             ff_rv20_encode_picture_header(s, picture_number);
3421         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3422             ff_flv_encode_picture_header(s, picture_number);
3423         else if (CONFIG_H263_ENCODER)
3424             ff_h263_encode_picture_header(s, picture_number);
3425         break;
3426     case FMT_MPEG1:
3427         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3428             ff_mpeg1_encode_picture_header(s, picture_number);
3429         break;
3430     default:
3431         assert(0);
3432     }
3433     bits= put_bits_count(&s->pb);
3434     s->header_bits= bits - s->last_bits;
3435
3436     for(i=1; i<context_count; i++){
3437         update_duplicate_context_after_me(s->thread_context[i], s);
3438     }
3439     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3440     for(i=1; i<context_count; i++){
3441         merge_context_after_encode(s, s->thread_context[i]);
3442     }
3443     emms_c();
3444     return 0;
3445 }
3446
3447 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3448     const int intra= s->mb_intra;
3449     int i;
3450
3451     s->dct_count[intra]++;
3452
3453     for(i=0; i<64; i++){
3454         int level= block[i];
3455
3456         if(level){
3457             if(level>0){
3458                 s->dct_error_sum[intra][i] += level;
3459                 level -= s->dct_offset[intra][i];
3460                 if(level<0) level=0;
3461             }else{
3462                 s->dct_error_sum[intra][i] -= level;
3463                 level += s->dct_offset[intra][i];
3464                 if(level>0) level=0;
3465             }
3466             block[i]= level;
3467         }
3468     }
3469 }
3470
3471 static int dct_quantize_trellis_c(MpegEncContext *s,
3472                                   int16_t *block, int n,
3473                                   int qscale, int *overflow){
3474     const int *qmat;
3475     const uint8_t *scantable= s->intra_scantable.scantable;
3476     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3477     int max=0;
3478     unsigned int threshold1, threshold2;
3479     int bias=0;
3480     int run_tab[65];
3481     int level_tab[65];
3482     int score_tab[65];
3483     int survivor[65];
3484     int survivor_count;
3485     int last_run=0;
3486     int last_level=0;
3487     int last_score= 0;
3488     int last_i;
3489     int coeff[2][64];
3490     int coeff_count[64];
3491     int qmul, qadd, start_i, last_non_zero, i, dc;
3492     const int esc_length= s->ac_esc_length;
3493     uint8_t * length;
3494     uint8_t * last_length;
3495     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3496
3497     s->fdsp.fdct(block);
3498
3499     if(s->dct_error_sum)
3500         s->denoise_dct(s, block);
3501     qmul= qscale*16;
3502     qadd= ((qscale-1)|1)*8;
3503
3504     if (s->mb_intra) {
3505         int q;
3506         if (!s->h263_aic) {
3507             if (n < 4)
3508                 q = s->y_dc_scale;
3509             else
3510                 q = s->c_dc_scale;
3511             q = q << 3;
3512         } else{
3513             /* For AIC we skip quant/dequant of INTRADC */
3514             q = 1 << 3;
3515             qadd=0;
3516         }
3517
3518         /* note: block[0] is assumed to be positive */
3519         block[0] = (block[0] + (q >> 1)) / q;
3520         start_i = 1;
3521         last_non_zero = 0;
3522         qmat = s->q_intra_matrix[qscale];
3523         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3524             bias= 1<<(QMAT_SHIFT-1);
3525         length     = s->intra_ac_vlc_length;
3526         last_length= s->intra_ac_vlc_last_length;
3527     } else {
3528         start_i = 0;
3529         last_non_zero = -1;
3530         qmat = s->q_inter_matrix[qscale];
3531         length     = s->inter_ac_vlc_length;
3532         last_length= s->inter_ac_vlc_last_length;
3533     }
3534     last_i= start_i;
3535
3536     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3537     threshold2= (threshold1<<1);
3538
3539     for(i=63; i>=start_i; i--) {
3540         const int j = scantable[i];
3541         int level = block[j] * qmat[j];
3542
3543         if(((unsigned)(level+threshold1))>threshold2){
3544             last_non_zero = i;
3545             break;
3546         }
3547     }
3548
3549     for(i=start_i; i<=last_non_zero; i++) {
3550         const int j = scantable[i];
3551         int level = block[j] * qmat[j];
3552
3553 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3554 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3555         if(((unsigned)(level+threshold1))>threshold2){
3556             if(level>0){
3557                 level= (bias + level)>>QMAT_SHIFT;
3558                 coeff[0][i]= level;
3559                 coeff[1][i]= level-1;
3560 //                coeff[2][k]= level-2;
3561             }else{
3562                 level= (bias - level)>>QMAT_SHIFT;
3563                 coeff[0][i]= -level;
3564                 coeff[1][i]= -level+1;
3565 //                coeff[2][k]= -level+2;
3566             }
3567             coeff_count[i]= FFMIN(level, 2);
3568             assert(coeff_count[i]);
3569             max |=level;
3570         }else{
3571             coeff[0][i]= (level>>31)|1;
3572             coeff_count[i]= 1;
3573         }
3574     }
3575
3576     *overflow= s->max_qcoeff < max; //overflow might have happened
3577
3578     if(last_non_zero < start_i){
3579         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3580         return last_non_zero;
3581     }
3582
3583     score_tab[start_i]= 0;
3584     survivor[0]= start_i;
3585     survivor_count= 1;
3586
3587     for(i=start_i; i<=last_non_zero; i++){
3588         int level_index, j, zero_distortion;
3589         int dct_coeff= FFABS(block[ scantable[i] ]);
3590         int best_score=256*256*256*120;
3591
3592         if (s->fdsp.fdct == ff_fdct_ifast)
3593             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3594         zero_distortion= dct_coeff*dct_coeff;
3595
3596         for(level_index=0; level_index < coeff_count[i]; level_index++){
3597             int distortion;
3598             int level= coeff[level_index][i];
3599             const int alevel= FFABS(level);
3600             int unquant_coeff;
3601
3602             assert(level);
3603
3604             if(s->out_format == FMT_H263){
3605                 unquant_coeff= alevel*qmul + qadd;
3606             }else{ //MPEG1
3607                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3608                 if(s->mb_intra){
3609                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3610                         unquant_coeff =   (unquant_coeff - 1) | 1;
3611                 }else{
3612                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3613                         unquant_coeff =   (unquant_coeff - 1) | 1;
3614                 }
3615                 unquant_coeff<<= 3;
3616             }
3617
3618             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3619             level+=64;
3620             if((level&(~127)) == 0){
3621                 for(j=survivor_count-1; j>=0; j--){
3622                     int run= i - survivor[j];
3623                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3624                     score += score_tab[i-run];
3625
3626                     if(score < best_score){
3627                         best_score= score;
3628                         run_tab[i+1]= run;
3629                         level_tab[i+1]= level-64;
3630                     }
3631                 }
3632
3633                 if(s->out_format == FMT_H263){
3634                     for(j=survivor_count-1; j>=0; j--){
3635                         int run= i - survivor[j];
3636                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3637                         score += score_tab[i-run];
3638                         if(score < last_score){
3639                             last_score= score;
3640                             last_run= run;
3641                             last_level= level-64;
3642                             last_i= i+1;
3643                         }
3644                     }
3645                 }
3646             }else{
3647                 distortion += esc_length*lambda;
3648                 for(j=survivor_count-1; j>=0; j--){
3649                     int run= i - survivor[j];
3650                     int score= distortion + score_tab[i-run];
3651
3652                     if(score < best_score){
3653                         best_score= score;
3654                         run_tab[i+1]= run;
3655                         level_tab[i+1]= level-64;
3656                     }
3657                 }
3658
3659                 if(s->out_format == FMT_H263){
3660                   for(j=survivor_count-1; j>=0; j--){
3661                         int run= i - survivor[j];
3662                         int score= distortion + score_tab[i-run];
3663                         if(score < last_score){
3664                             last_score= score;
3665                             last_run= run;
3666                             last_level= level-64;
3667                             last_i= i+1;
3668                         }
3669                     }
3670                 }
3671             }
3672         }
3673
3674         score_tab[i+1]= best_score;
3675
3676         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3677         if(last_non_zero <= 27){
3678             for(; survivor_count; survivor_count--){
3679                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3680                     break;
3681             }
3682         }else{
3683             for(; survivor_count; survivor_count--){
3684                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3685                     break;
3686             }
3687         }
3688
3689         survivor[ survivor_count++ ]= i+1;
3690     }
3691
3692     if(s->out_format != FMT_H263){
3693         last_score= 256*256*256*120;
3694         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3695             int score= score_tab[i];
3696             if(i) score += lambda*2; //FIXME exacter?
3697
3698             if(score < last_score){
3699                 last_score= score;
3700                 last_i= i;
3701                 last_level= level_tab[i];
3702                 last_run= run_tab[i];
3703             }
3704         }
3705     }
3706
3707     s->coded_score[n] = last_score;
3708
3709     dc= FFABS(block[0]);
3710     last_non_zero= last_i - 1;
3711     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3712
3713     if(last_non_zero < start_i)
3714         return last_non_zero;
3715
3716     if(last_non_zero == 0 && start_i == 0){
3717         int best_level= 0;
3718         int best_score= dc * dc;
3719
3720         for(i=0; i<coeff_count[0]; i++){
3721             int level= coeff[i][0];
3722             int alevel= FFABS(level);
3723             int unquant_coeff, score, distortion;
3724
3725             if(s->out_format == FMT_H263){
3726                     unquant_coeff= (alevel*qmul + qadd)>>3;
3727             }else{ //MPEG1
3728                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3729                     unquant_coeff =   (unquant_coeff - 1) | 1;
3730             }
3731             unquant_coeff = (unquant_coeff + 4) >> 3;
3732             unquant_coeff<<= 3 + 3;
3733
3734             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3735             level+=64;
3736             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3737             else                    score= distortion + esc_length*lambda;
3738
3739             if(score < best_score){
3740                 best_score= score;
3741                 best_level= level - 64;
3742             }
3743         }
3744         block[0]= best_level;
3745         s->coded_score[n] = best_score - dc*dc;
3746         if(best_level == 0) return -1;
3747         else                return last_non_zero;
3748     }
3749
3750     i= last_i;
3751     assert(last_level);
3752
3753     block[ perm_scantable[last_non_zero] ]= last_level;
3754     i -= last_run + 1;
3755
3756     for(; i>start_i; i -= run_tab[i] + 1){
3757         block[ perm_scantable[i-1] ]= level_tab[i];
3758     }
3759
3760     return last_non_zero;
3761 }
3762
3763 //#define REFINE_STATS 1
3764 static int16_t basis[64][64];
3765
3766 static void build_basis(uint8_t *perm){
3767     int i, j, x, y;
3768     emms_c();
3769     for(i=0; i<8; i++){
3770         for(j=0; j<8; j++){
3771             for(y=0; y<8; y++){
3772                 for(x=0; x<8; x++){
3773                     double s= 0.25*(1<<BASIS_SHIFT);
3774                     int index= 8*i + j;
3775                     int perm_index= perm[index];
3776                     if(i==0) s*= sqrt(0.5);
3777                     if(j==0) s*= sqrt(0.5);
3778                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3779                 }
3780             }
3781         }
3782     }
3783 }
3784
3785 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3786                         int16_t *block, int16_t *weight, int16_t *orig,
3787                         int n, int qscale){
3788     int16_t rem[64];
3789     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3790     const uint8_t *scantable= s->intra_scantable.scantable;
3791     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3792 //    unsigned int threshold1, threshold2;
3793 //    int bias=0;
3794     int run_tab[65];
3795     int prev_run=0;
3796     int prev_level=0;
3797     int qmul, qadd, start_i, last_non_zero, i, dc;
3798     uint8_t * length;
3799     uint8_t * last_length;
3800     int lambda;
3801     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3802 #ifdef REFINE_STATS
3803 static int count=0;
3804 static int after_last=0;
3805 static int to_zero=0;
3806 static int from_zero=0;
3807 static int raise=0;
3808 static int lower=0;
3809 static int messed_sign=0;
3810 #endif
3811
3812     if(basis[0][0] == 0)
3813         build_basis(s->idsp.idct_permutation);
3814
3815     qmul= qscale*2;
3816     qadd= (qscale-1)|1;
3817     if (s->mb_intra) {
3818         if (!s->h263_aic) {
3819             if (n < 4)
3820                 q = s->y_dc_scale;
3821             else
3822                 q = s->c_dc_scale;
3823         } else{
3824             /* For AIC we skip quant/dequant of INTRADC */
3825             q = 1;
3826             qadd=0;
3827         }
3828         q <<= RECON_SHIFT-3;
3829         /* note: block[0] is assumed to be positive */
3830         dc= block[0]*q;
3831 //        block[0] = (block[0] + (q >> 1)) / q;
3832         start_i = 1;
3833 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3834 //            bias= 1<<(QMAT_SHIFT-1);
3835         length     = s->intra_ac_vlc_length;
3836         last_length= s->intra_ac_vlc_last_length;
3837     } else {
3838         dc= 0;
3839         start_i = 0;
3840         length     = s->inter_ac_vlc_length;
3841         last_length= s->inter_ac_vlc_last_length;
3842     }
3843     last_non_zero = s->block_last_index[n];
3844
3845 #ifdef REFINE_STATS
3846 {START_TIMER
3847 #endif
3848     dc += (1<<(RECON_SHIFT-1));
3849     for(i=0; i<64; i++){
3850         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3851     }
3852 #ifdef REFINE_STATS
3853 STOP_TIMER("memset rem[]")}
3854 #endif
3855     sum=0;
3856     for(i=0; i<64; i++){
3857         int one= 36;
3858         int qns=4;
3859         int w;
3860
3861         w= FFABS(weight[i]) + qns*one;
3862         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3863
3864         weight[i] = w;
3865 //        w=weight[i] = (63*qns + (w/2)) / w;
3866
3867         assert(w>0);
3868         assert(w<(1<<6));
3869         sum += w*w;
3870     }
3871     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3872 #ifdef REFINE_STATS
3873 {START_TIMER
3874 #endif
3875     run=0;
3876     rle_index=0;
3877     for(i=start_i; i<=last_non_zero; i++){
3878         int j= perm_scantable[i];
3879         const int level= block[j];
3880         int coeff;
3881
3882         if(level){
3883             if(level<0) coeff= qmul*level - qadd;
3884             else        coeff= qmul*level + qadd;
3885             run_tab[rle_index++]=run;
3886             run=0;
3887
3888             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
3889         }else{
3890             run++;
3891         }
3892     }
3893 #ifdef REFINE_STATS
3894 if(last_non_zero>0){
3895 STOP_TIMER("init rem[]")
3896 }
3897 }
3898
3899 {START_TIMER
3900 #endif
3901     for(;;){
3902         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
3903         int best_coeff=0;
3904         int best_change=0;
3905         int run2, best_unquant_change=0, analyze_gradient;
3906 #ifdef REFINE_STATS
3907 {START_TIMER
3908 #endif
3909         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3910
3911         if(analyze_gradient){
3912 #ifdef REFINE_STATS
3913 {START_TIMER
3914 #endif
3915             for(i=0; i<64; i++){
3916                 int w= weight[i];
3917
3918                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3919             }
3920 #ifdef REFINE_STATS
3921 STOP_TIMER("rem*w*w")}
3922 {START_TIMER
3923 #endif
3924             s->fdsp.fdct(d1);
3925 #ifdef REFINE_STATS
3926 STOP_TIMER("dct")}
3927 #endif
3928         }
3929
3930         if(start_i){
3931             const int level= block[0];
3932             int change, old_coeff;
3933
3934             assert(s->mb_intra);
3935
3936             old_coeff= q*level;
3937
3938             for(change=-1; change<=1; change+=2){
3939                 int new_level= level + change;
3940                 int score, new_coeff;
3941
3942                 new_coeff= q*new_level;
3943                 if(new_coeff >= 2048 || new_coeff < 0)
3944                     continue;
3945
3946                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
3947                                                   new_coeff - old_coeff);
3948                 if(score<best_score){
3949                     best_score= score;
3950                     best_coeff= 0;
3951                     best_change= change;
3952                     best_unquant_change= new_coeff - old_coeff;
3953                 }
3954             }
3955         }
3956
3957         run=0;
3958         rle_index=0;
3959         run2= run_tab[rle_index++];
3960         prev_level=0;
3961         prev_run=0;
3962
3963         for(i=start_i; i<64; i++){
3964             int j= perm_scantable[i];
3965             const int level= block[j];
3966             int change, old_coeff;
3967
3968             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3969                 break;
3970
3971             if(level){
3972                 if(level<0) old_coeff= qmul*level - qadd;
3973                 else        old_coeff= qmul*level + qadd;
3974                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3975             }else{
3976                 old_coeff=0;
3977                 run2--;
3978                 assert(run2>=0 || i >= last_non_zero );
3979             }
3980
3981             for(change=-1; change<=1; change+=2){
3982                 int new_level= level + change;
3983                 int score, new_coeff, unquant_change;
3984
3985                 score=0;
3986                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3987                    continue;
3988
3989                 if(new_level){
3990                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3991                     else            new_coeff= qmul*new_level + qadd;
3992                     if(new_coeff >= 2048 || new_coeff <= -2048)
3993                         continue;
3994                     //FIXME check for overflow
3995
3996                     if(level){
3997                         if(level < 63 && level > -63){
3998                             if(i < last_non_zero)
3999                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4000                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4001                             else
4002                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4003                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4004                         }
4005                     }else{
4006                         assert(FFABS(new_level)==1);
4007
4008                         if(analyze_gradient){
4009                             int g= d1[ scantable[i] ];
4010                             if(g && (g^new_level) >= 0)
4011                                 continue;
4012                         }
4013
4014                         if(i < last_non_zero){
4015                             int next_i= i + run2 + 1;
4016                             int next_level= block[ perm_scantable[next_i] ] + 64;
4017
4018                             if(next_level&(~127))
4019                                 next_level= 0;
4020
4021                             if(next_i < last_non_zero)
4022                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4023                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4024                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4025                             else
4026                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4027                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4028                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4029                         }else{
4030                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4031                             if(prev_level){
4032                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4033                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4034                             }
4035                         }
4036                     }
4037                 }else{
4038                     new_coeff=0;
4039                     assert(FFABS(level)==1);
4040
4041                     if(i < last_non_zero){
4042                         int next_i= i + run2 + 1;
4043                         int next_level= block[ perm_scantable[next_i] ] + 64;
4044
4045                         if(next_level&(~127))
4046                             next_level= 0;
4047
4048                         if(next_i < last_non_zero)
4049                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4050                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4051                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4052                         else
4053                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4054                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4055                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4056                     }else{
4057                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4058                         if(prev_level){
4059                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4060                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4061                         }
4062                     }
4063                 }
4064
4065                 score *= lambda;
4066
4067                 unquant_change= new_coeff - old_coeff;
4068                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4069
4070                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4071                                                    unquant_change);
4072                 if(score<best_score){
4073                     best_score= score;
4074                     best_coeff= i;
4075                     best_change= change;
4076                     best_unquant_change= unquant_change;
4077                 }
4078             }
4079             if(level){
4080                 prev_level= level + 64;
4081                 if(prev_level&(~127))
4082                     prev_level= 0;
4083                 prev_run= run;
4084                 run=0;
4085             }else{
4086                 run++;
4087             }
4088         }
4089 #ifdef REFINE_STATS
4090 STOP_TIMER("iterative step")}
4091 #endif
4092
4093         if(best_change){
4094             int j= perm_scantable[ best_coeff ];
4095
4096             block[j] += best_change;
4097
4098             if(best_coeff > last_non_zero){
4099                 last_non_zero= best_coeff;
4100                 assert(block[j]);
4101 #ifdef REFINE_STATS
4102 after_last++;
4103 #endif
4104             }else{
4105 #ifdef REFINE_STATS
4106 if(block[j]){
4107     if(block[j] - best_change){
4108         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4109             raise++;
4110         }else{
4111             lower++;
4112         }
4113     }else{
4114         from_zero++;
4115     }
4116 }else{
4117     to_zero++;
4118 }
4119 #endif
4120                 for(; last_non_zero>=start_i; last_non_zero--){
4121                     if(block[perm_scantable[last_non_zero]])
4122                         break;
4123                 }
4124             }
4125 #ifdef REFINE_STATS
4126 count++;
4127 if(256*256*256*64 % count == 0){
4128     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4129 }
4130 #endif
4131             run=0;
4132             rle_index=0;
4133             for(i=start_i; i<=last_non_zero; i++){
4134                 int j= perm_scantable[i];
4135                 const int level= block[j];
4136
4137                  if(level){
4138                      run_tab[rle_index++]=run;
4139                      run=0;
4140                  }else{
4141                      run++;
4142                  }
4143             }
4144
4145             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4146         }else{
4147             break;
4148         }
4149     }
4150 #ifdef REFINE_STATS
4151 if(last_non_zero>0){
4152 STOP_TIMER("iterative search")
4153 }
4154 }
4155 #endif
4156
4157     return last_non_zero;
4158 }
4159
4160 int ff_dct_quantize_c(MpegEncContext *s,
4161                         int16_t *block, int n,
4162                         int qscale, int *overflow)
4163 {
4164     int i, j, level, last_non_zero, q, start_i;
4165     const int *qmat;
4166     const uint8_t *scantable= s->intra_scantable.scantable;
4167     int bias;
4168     int max=0;
4169     unsigned int threshold1, threshold2;
4170
4171     s->fdsp.fdct(block);
4172
4173     if(s->dct_error_sum)
4174         s->denoise_dct(s, block);
4175
4176     if (s->mb_intra) {
4177         if (!s->h263_aic) {
4178             if (n < 4)
4179                 q = s->y_dc_scale;
4180             else
4181                 q = s->c_dc_scale;
4182             q = q << 3;
4183         } else
4184             /* For AIC we skip quant/dequant of INTRADC */
4185             q = 1 << 3;
4186
4187         /* note: block[0] is assumed to be positive */
4188         block[0] = (block[0] + (q >> 1)) / q;
4189         start_i = 1;
4190         last_non_zero = 0;
4191         qmat = s->q_intra_matrix[qscale];
4192         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4193     } else {
4194         start_i = 0;
4195         last_non_zero = -1;
4196         qmat = s->q_inter_matrix[qscale];
4197         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4198     }
4199     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4200     threshold2= (threshold1<<1);
4201     for(i=63;i>=start_i;i--) {
4202         j = scantable[i];
4203         level = block[j] * qmat[j];
4204
4205         if(((unsigned)(level+threshold1))>threshold2){
4206             last_non_zero = i;
4207             break;
4208         }else{
4209             block[j]=0;
4210         }
4211     }
4212     for(i=start_i; i<=last_non_zero; i++) {
4213         j = scantable[i];
4214         level = block[j] * qmat[j];
4215
4216 //        if(   bias+level >= (1<<QMAT_SHIFT)
4217 //           || bias-level >= (1<<QMAT_SHIFT)){
4218         if(((unsigned)(level+threshold1))>threshold2){
4219             if(level>0){
4220                 level= (bias + level)>>QMAT_SHIFT;
4221                 block[j]= level;
4222             }else{
4223                 level= (bias - level)>>QMAT_SHIFT;
4224                 block[j]= -level;
4225             }
4226             max |=level;
4227         }else{
4228             block[j]=0;
4229         }
4230     }
4231     *overflow= s->max_qcoeff < max; //overflow might have happened
4232
4233     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4234     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4235         ff_block_permute(block, s->idsp.idct_permutation,
4236                          scantable, last_non_zero);
4237
4238     return last_non_zero;
4239 }
4240
4241 #define OFFSET(x) offsetof(MpegEncContext, x)
4242 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4243 static const AVOption h263_options[] = {
4244     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4245     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4246     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4247     FF_MPV_COMMON_OPTS
4248     { NULL },
4249 };
4250
4251 static const AVClass h263_class = {
4252     .class_name = "H.263 encoder",
4253     .item_name  = av_default_item_name,
4254     .option     = h263_options,
4255     .version    = LIBAVUTIL_VERSION_INT,
4256 };
4257
4258 AVCodec ff_h263_encoder = {
4259     .name           = "h263",
4260     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4261     .type           = AVMEDIA_TYPE_VIDEO,
4262     .id             = AV_CODEC_ID_H263,
4263     .priv_data_size = sizeof(MpegEncContext),
4264     .init           = ff_mpv_encode_init,
4265     .encode2        = ff_mpv_encode_picture,
4266     .close          = ff_mpv_encode_end,
4267     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4268     .priv_class     = &h263_class,
4269 };
4270
4271 static const AVOption h263p_options[] = {
4272     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4273     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4274     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4275     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4276     FF_MPV_COMMON_OPTS
4277     { NULL },
4278 };
4279 static const AVClass h263p_class = {
4280     .class_name = "H.263p encoder",
4281     .item_name  = av_default_item_name,
4282     .option     = h263p_options,
4283     .version    = LIBAVUTIL_VERSION_INT,
4284 };
4285
4286 AVCodec ff_h263p_encoder = {
4287     .name           = "h263p",
4288     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4289     .type           = AVMEDIA_TYPE_VIDEO,
4290     .id             = AV_CODEC_ID_H263P,
4291     .priv_data_size = sizeof(MpegEncContext),
4292     .init           = ff_mpv_encode_init,
4293     .encode2        = ff_mpv_encode_picture,
4294     .close          = ff_mpv_encode_end,
4295     .capabilities   = CODEC_CAP_SLICE_THREADS,
4296     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4297     .priv_class     = &h263p_class,
4298 };
4299
4300 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4301
4302 AVCodec ff_msmpeg4v2_encoder = {
4303     .name           = "msmpeg4v2",
4304     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4305     .type           = AVMEDIA_TYPE_VIDEO,
4306     .id             = AV_CODEC_ID_MSMPEG4V2,
4307     .priv_data_size = sizeof(MpegEncContext),
4308     .init           = ff_mpv_encode_init,
4309     .encode2        = ff_mpv_encode_picture,
4310     .close          = ff_mpv_encode_end,
4311     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4312     .priv_class     = &msmpeg4v2_class,
4313 };
4314
4315 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4316
4317 AVCodec ff_msmpeg4v3_encoder = {
4318     .name           = "msmpeg4",
4319     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4320     .type           = AVMEDIA_TYPE_VIDEO,
4321     .id             = AV_CODEC_ID_MSMPEG4V3,
4322     .priv_data_size = sizeof(MpegEncContext),
4323     .init           = ff_mpv_encode_init,
4324     .encode2        = ff_mpv_encode_picture,
4325     .close          = ff_mpv_encode_end,
4326     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4327     .priv_class     = &msmpeg4v3_class,
4328 };
4329
4330 FF_MPV_GENERIC_CLASS(wmv1)
4331
4332 AVCodec ff_wmv1_encoder = {
4333     .name           = "wmv1",
4334     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4335     .type           = AVMEDIA_TYPE_VIDEO,
4336     .id             = AV_CODEC_ID_WMV1,
4337     .priv_data_size = sizeof(MpegEncContext),
4338     .init           = ff_mpv_encode_init,
4339     .encode2        = ff_mpv_encode_picture,
4340     .close          = ff_mpv_encode_end,
4341     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4342     .priv_class     = &wmv1_class,
4343 };