]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
mpegvideo: Move QUANT_BIAS_SHIFT define to the only place it is used
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mjpegenc_common.h"
46 #include "mathops.h"
47 #include "mpegutils.h"
48 #include "mjpegenc.h"
49 #include "msmpeg4.h"
50 #include "pixblockdsp.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60
61 #define QUANT_BIAS_SHIFT 8
62
63 static int encode_picture(MpegEncContext *s, int picture_number);
64 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
65 static int sse_mb(MpegEncContext *s);
66 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
67 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
68
69 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
70 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
71
72 const AVOption ff_mpv_generic_options[] = {
73     FF_MPV_COMMON_OPTS
74     { NULL },
75 };
76
77 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
78                        uint16_t (*qmat16)[2][64],
79                        const uint16_t *quant_matrix,
80                        int bias, int qmin, int qmax, int intra)
81 {
82     FDCTDSPContext *fdsp = &s->fdsp;
83     int qscale;
84     int shift = 0;
85
86     for (qscale = qmin; qscale <= qmax; qscale++) {
87         int i;
88         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
89             fdsp->fdct == ff_jpeg_fdct_islow_10 ||
90             fdsp->fdct == ff_faandct) {
91             for (i = 0; i < 64; i++) {
92                 const int j = s->idsp.idct_permutation[i];
93                 /* 16 <= qscale * quant_matrix[i] <= 7905
94                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
95                  *             19952 <=              x  <= 249205026
96                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
97                  *           3444240 >= (1 << 36) / (x) >= 275 */
98
99                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
100                                         (qscale * quant_matrix[j]));
101             }
102         } else if (fdsp->fdct == ff_fdct_ifast) {
103             for (i = 0; i < 64; i++) {
104                 const int j = s->idsp.idct_permutation[i];
105                 /* 16 <= qscale * quant_matrix[i] <= 7905
106                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
107                  *             19952 <=              x  <= 249205026
108                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
109                  *           3444240 >= (1 << 36) / (x) >= 275 */
110
111                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
112                                         (ff_aanscales[i] * qscale *
113                                          quant_matrix[j]));
114             }
115         } else {
116             for (i = 0; i < 64; i++) {
117                 const int j = s->idsp.idct_permutation[i];
118                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
119                  * Assume x = qscale * quant_matrix[i]
120                  * So             16 <=              x  <= 7905
121                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
122                  * so          32768 >= (1 << 19) / (x) >= 67 */
123                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
124                                         (qscale * quant_matrix[j]));
125                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
126                 //                    (qscale * quant_matrix[i]);
127                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
128                                        (qscale * quant_matrix[j]);
129
130                 if (qmat16[qscale][0][i] == 0 ||
131                     qmat16[qscale][0][i] == 128 * 256)
132                     qmat16[qscale][0][i] = 128 * 256 - 1;
133                 qmat16[qscale][1][i] =
134                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
135                                 qmat16[qscale][0][i]);
136             }
137         }
138
139         for (i = intra; i < 64; i++) {
140             int64_t max = 8191;
141             if (fdsp->fdct == ff_fdct_ifast) {
142                 max = (8191LL * ff_aanscales[i]) >> 14;
143             }
144             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
145                 shift++;
146             }
147         }
148     }
149     if (shift) {
150         av_log(NULL, AV_LOG_INFO,
151                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
152                QMAT_SHIFT - shift);
153     }
154 }
155
156 static inline void update_qscale(MpegEncContext *s)
157 {
158     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
159                 (FF_LAMBDA_SHIFT + 7);
160     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
161
162     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
163                  FF_LAMBDA_SHIFT;
164 }
165
166 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
167 {
168     int i;
169
170     if (matrix) {
171         put_bits(pb, 1, 1);
172         for (i = 0; i < 64; i++) {
173             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
174         }
175     } else
176         put_bits(pb, 1, 0);
177 }
178
179 /**
180  * init s->current_picture.qscale_table from s->lambda_table
181  */
182 void ff_init_qscale_tab(MpegEncContext *s)
183 {
184     int8_t * const qscale_table = s->current_picture.qscale_table;
185     int i;
186
187     for (i = 0; i < s->mb_num; i++) {
188         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
189         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
190         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
191                                                   s->avctx->qmax);
192     }
193 }
194
195 static void update_duplicate_context_after_me(MpegEncContext *dst,
196                                               MpegEncContext *src)
197 {
198 #define COPY(a) dst->a= src->a
199     COPY(pict_type);
200     COPY(current_picture);
201     COPY(f_code);
202     COPY(b_code);
203     COPY(qscale);
204     COPY(lambda);
205     COPY(lambda2);
206     COPY(picture_in_gop_number);
207     COPY(gop_picture_number);
208     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
209     COPY(progressive_frame);    // FIXME don't set in encode_header
210     COPY(partitioned_frame);    // FIXME don't set in encode_header
211 #undef COPY
212 }
213
214 /**
215  * Set the given MpegEncContext to defaults for encoding.
216  * the changed fields will not depend upon the prior state of the MpegEncContext.
217  */
218 static void MPV_encode_defaults(MpegEncContext *s)
219 {
220     int i;
221     ff_MPV_common_defaults(s);
222
223     for (i = -16; i < 16; i++) {
224         default_fcode_tab[i + MAX_MV] = 1;
225     }
226     s->me.mv_penalty = default_mv_penalty;
227     s->fcode_tab     = default_fcode_tab;
228
229     s->input_picture_number  = 0;
230     s->picture_in_gop_number = 0;
231 }
232
233 /* init video encoder */
234 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
235 {
236     MpegEncContext *s = avctx->priv_data;
237     int i, ret, format_supported;
238
239     MPV_encode_defaults(s);
240
241     switch (avctx->codec_id) {
242     case AV_CODEC_ID_MPEG2VIDEO:
243         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
244             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
245             av_log(avctx, AV_LOG_ERROR,
246                    "only YUV420 and YUV422 are supported\n");
247             return -1;
248         }
249         break;
250     case AV_CODEC_ID_MJPEG:
251         format_supported = 0;
252         /* JPEG color space */
253         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
254             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
255             (avctx->color_range == AVCOL_RANGE_JPEG &&
256              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
257               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
258             format_supported = 1;
259         /* MPEG color space */
260         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
261                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
262                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
263             format_supported = 1;
264
265         if (!format_supported) {
266             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
267             return -1;
268         }
269         break;
270     default:
271         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
272             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
273             return -1;
274         }
275     }
276
277     switch (avctx->pix_fmt) {
278     case AV_PIX_FMT_YUVJ422P:
279     case AV_PIX_FMT_YUV422P:
280         s->chroma_format = CHROMA_422;
281         break;
282     case AV_PIX_FMT_YUVJ420P:
283     case AV_PIX_FMT_YUV420P:
284     default:
285         s->chroma_format = CHROMA_420;
286         break;
287     }
288
289     s->bit_rate = avctx->bit_rate;
290     s->width    = avctx->width;
291     s->height   = avctx->height;
292     if (avctx->gop_size > 600 &&
293         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
294         av_log(avctx, AV_LOG_ERROR,
295                "Warning keyframe interval too large! reducing it ...\n");
296         avctx->gop_size = 600;
297     }
298     s->gop_size     = avctx->gop_size;
299     s->avctx        = avctx;
300     s->flags        = avctx->flags;
301     s->flags2       = avctx->flags2;
302     if (avctx->max_b_frames > MAX_B_FRAMES) {
303         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
304                "is %d.\n", MAX_B_FRAMES);
305     }
306     s->max_b_frames = avctx->max_b_frames;
307     s->codec_id     = avctx->codec->id;
308     s->strict_std_compliance = avctx->strict_std_compliance;
309     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
310     s->mpeg_quant         = avctx->mpeg_quant;
311     s->rtp_mode           = !!avctx->rtp_payload_size;
312     s->intra_dc_precision = avctx->intra_dc_precision;
313     s->user_specified_pts = AV_NOPTS_VALUE;
314
315     if (s->gop_size <= 1) {
316         s->intra_only = 1;
317         s->gop_size   = 12;
318     } else {
319         s->intra_only = 0;
320     }
321
322     s->me_method = avctx->me_method;
323
324     /* Fixed QSCALE */
325     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
326
327     s->adaptive_quant = (s->avctx->lumi_masking ||
328                          s->avctx->dark_masking ||
329                          s->avctx->temporal_cplx_masking ||
330                          s->avctx->spatial_cplx_masking  ||
331                          s->avctx->p_masking      ||
332                          s->avctx->border_masking ||
333                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
334                         !s->fixed_qscale;
335
336     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
337
338     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
339         av_log(avctx, AV_LOG_ERROR,
340                "a vbv buffer size is needed, "
341                "for encoding with a maximum bitrate\n");
342         return -1;
343     }
344
345     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
346         av_log(avctx, AV_LOG_INFO,
347                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
348     }
349
350     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
351         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
352         return -1;
353     }
354
355     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
356         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
357         return -1;
358     }
359
360     if (avctx->rc_max_rate &&
361         avctx->rc_max_rate == avctx->bit_rate &&
362         avctx->rc_max_rate != avctx->rc_min_rate) {
363         av_log(avctx, AV_LOG_INFO,
364                "impossible bitrate constraints, this will fail\n");
365     }
366
367     if (avctx->rc_buffer_size &&
368         avctx->bit_rate * (int64_t)avctx->time_base.num >
369             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
370         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
371         return -1;
372     }
373
374     if (!s->fixed_qscale &&
375         avctx->bit_rate * av_q2d(avctx->time_base) >
376             avctx->bit_rate_tolerance) {
377         av_log(avctx, AV_LOG_ERROR,
378                "bitrate tolerance too small for bitrate\n");
379         return -1;
380     }
381
382     if (s->avctx->rc_max_rate &&
383         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
384         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
385          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
386         90000LL * (avctx->rc_buffer_size - 1) >
387             s->avctx->rc_max_rate * 0xFFFFLL) {
388         av_log(avctx, AV_LOG_INFO,
389                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
390                "specified vbv buffer is too large for the given bitrate!\n");
391     }
392
393     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
394         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
395         s->codec_id != AV_CODEC_ID_FLV1) {
396         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
397         return -1;
398     }
399
400     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
401         av_log(avctx, AV_LOG_ERROR,
402                "OBMC is only supported with simple mb decision\n");
403         return -1;
404     }
405
406     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
407         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
408         return -1;
409     }
410
411     if (s->max_b_frames                    &&
412         s->codec_id != AV_CODEC_ID_MPEG4      &&
413         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
414         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
415         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
416         return -1;
417     }
418
419     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
420          s->codec_id == AV_CODEC_ID_H263  ||
421          s->codec_id == AV_CODEC_ID_H263P) &&
422         (avctx->sample_aspect_ratio.num > 255 ||
423          avctx->sample_aspect_ratio.den > 255)) {
424         av_log(avctx, AV_LOG_ERROR,
425                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
426                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
427         return -1;
428     }
429
430     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
431         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
432         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
433         return -1;
434     }
435
436     // FIXME mpeg2 uses that too
437     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
438         av_log(avctx, AV_LOG_ERROR,
439                "mpeg2 style quantization not supported by codec\n");
440         return -1;
441     }
442
443     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
444         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
445         return -1;
446     }
447
448     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
449         s->avctx->mb_decision != FF_MB_DECISION_RD) {
450         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
451         return -1;
452     }
453
454     if (s->avctx->scenechange_threshold < 1000000000 &&
455         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
456         av_log(avctx, AV_LOG_ERROR,
457                "closed gop with scene change detection are not supported yet, "
458                "set threshold to 1000000000\n");
459         return -1;
460     }
461
462     if (s->flags & CODEC_FLAG_LOW_DELAY) {
463         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
464             av_log(avctx, AV_LOG_ERROR,
465                   "low delay forcing is only available for mpeg2\n");
466             return -1;
467         }
468         if (s->max_b_frames != 0) {
469             av_log(avctx, AV_LOG_ERROR,
470                    "b frames cannot be used with low delay\n");
471             return -1;
472         }
473     }
474
475     if (s->q_scale_type == 1) {
476         if (avctx->qmax > 12) {
477             av_log(avctx, AV_LOG_ERROR,
478                    "non linear quant only supports qmax <= 12 currently\n");
479             return -1;
480         }
481     }
482
483     if (s->avctx->thread_count > 1         &&
484         s->codec_id != AV_CODEC_ID_MPEG4      &&
485         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
486         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
487         (s->codec_id != AV_CODEC_ID_H263P)) {
488         av_log(avctx, AV_LOG_ERROR,
489                "multi threaded encoding not supported by codec\n");
490         return -1;
491     }
492
493     if (s->avctx->thread_count < 1) {
494         av_log(avctx, AV_LOG_ERROR,
495                "automatic thread number detection not supported by codec,"
496                "patch welcome\n");
497         return -1;
498     }
499
500     if (s->avctx->thread_count > 1)
501         s->rtp_mode = 1;
502
503     if (!avctx->time_base.den || !avctx->time_base.num) {
504         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
505         return -1;
506     }
507
508     i = (INT_MAX / 2 + 128) >> 8;
509     if (avctx->mb_threshold >= i) {
510         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
511                i - 1);
512         return -1;
513     }
514
515     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
516         av_log(avctx, AV_LOG_INFO,
517                "notice: b_frame_strategy only affects the first pass\n");
518         avctx->b_frame_strategy = 0;
519     }
520
521     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
522     if (i > 1) {
523         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
524         avctx->time_base.den /= i;
525         avctx->time_base.num /= i;
526         //return -1;
527     }
528
529     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
530         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
531         // (a + x * 3 / 8) / x
532         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
533         s->inter_quant_bias = 0;
534     } else {
535         s->intra_quant_bias = 0;
536         // (a - x / 4) / x
537         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
538     }
539
540     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
541         s->intra_quant_bias = avctx->intra_quant_bias;
542     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
543         s->inter_quant_bias = avctx->inter_quant_bias;
544
545     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
546         s->avctx->time_base.den > (1 << 16) - 1) {
547         av_log(avctx, AV_LOG_ERROR,
548                "timebase %d/%d not supported by MPEG 4 standard, "
549                "the maximum admitted value for the timebase denominator "
550                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
551                (1 << 16) - 1);
552         return -1;
553     }
554     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
555
556     switch (avctx->codec->id) {
557     case AV_CODEC_ID_MPEG1VIDEO:
558         s->out_format = FMT_MPEG1;
559         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
560         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
561         break;
562     case AV_CODEC_ID_MPEG2VIDEO:
563         s->out_format = FMT_MPEG1;
564         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
565         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
566         s->rtp_mode   = 1;
567         break;
568     case AV_CODEC_ID_MJPEG:
569         s->out_format = FMT_MJPEG;
570         s->intra_only = 1; /* force intra only for jpeg */
571         if (!CONFIG_MJPEG_ENCODER ||
572             ff_mjpeg_encode_init(s) < 0)
573             return -1;
574         avctx->delay = 0;
575         s->low_delay = 1;
576         break;
577     case AV_CODEC_ID_H261:
578         if (!CONFIG_H261_ENCODER)
579             return -1;
580         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
581             av_log(avctx, AV_LOG_ERROR,
582                    "The specified picture size of %dx%d is not valid for the "
583                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
584                     s->width, s->height);
585             return -1;
586         }
587         s->out_format = FMT_H261;
588         avctx->delay  = 0;
589         s->low_delay  = 1;
590         break;
591     case AV_CODEC_ID_H263:
592         if (!CONFIG_H263_ENCODER)
593         return -1;
594         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
595                              s->width, s->height) == 8) {
596             av_log(avctx, AV_LOG_INFO,
597                    "The specified picture size of %dx%d is not valid for "
598                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
599                    "352x288, 704x576, and 1408x1152."
600                    "Try H.263+.\n", s->width, s->height);
601             return -1;
602         }
603         s->out_format = FMT_H263;
604         avctx->delay  = 0;
605         s->low_delay  = 1;
606         break;
607     case AV_CODEC_ID_H263P:
608         s->out_format = FMT_H263;
609         s->h263_plus  = 1;
610         /* Fx */
611         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
612         s->modified_quant  = s->h263_aic;
613         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
614         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
615
616         /* /Fx */
617         /* These are just to be sure */
618         avctx->delay = 0;
619         s->low_delay = 1;
620         break;
621     case AV_CODEC_ID_FLV1:
622         s->out_format      = FMT_H263;
623         s->h263_flv        = 2; /* format = 1; 11-bit codes */
624         s->unrestricted_mv = 1;
625         s->rtp_mode  = 0; /* don't allow GOB */
626         avctx->delay = 0;
627         s->low_delay = 1;
628         break;
629     case AV_CODEC_ID_RV10:
630         s->out_format = FMT_H263;
631         avctx->delay  = 0;
632         s->low_delay  = 1;
633         break;
634     case AV_CODEC_ID_RV20:
635         s->out_format      = FMT_H263;
636         avctx->delay       = 0;
637         s->low_delay       = 1;
638         s->modified_quant  = 1;
639         s->h263_aic        = 1;
640         s->h263_plus       = 1;
641         s->loop_filter     = 1;
642         s->unrestricted_mv = 0;
643         break;
644     case AV_CODEC_ID_MPEG4:
645         s->out_format      = FMT_H263;
646         s->h263_pred       = 1;
647         s->unrestricted_mv = 1;
648         s->low_delay       = s->max_b_frames ? 0 : 1;
649         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
650         break;
651     case AV_CODEC_ID_MSMPEG4V2:
652         s->out_format      = FMT_H263;
653         s->h263_pred       = 1;
654         s->unrestricted_mv = 1;
655         s->msmpeg4_version = 2;
656         avctx->delay       = 0;
657         s->low_delay       = 1;
658         break;
659     case AV_CODEC_ID_MSMPEG4V3:
660         s->out_format        = FMT_H263;
661         s->h263_pred         = 1;
662         s->unrestricted_mv   = 1;
663         s->msmpeg4_version   = 3;
664         s->flipflop_rounding = 1;
665         avctx->delay         = 0;
666         s->low_delay         = 1;
667         break;
668     case AV_CODEC_ID_WMV1:
669         s->out_format        = FMT_H263;
670         s->h263_pred         = 1;
671         s->unrestricted_mv   = 1;
672         s->msmpeg4_version   = 4;
673         s->flipflop_rounding = 1;
674         avctx->delay         = 0;
675         s->low_delay         = 1;
676         break;
677     case AV_CODEC_ID_WMV2:
678         s->out_format        = FMT_H263;
679         s->h263_pred         = 1;
680         s->unrestricted_mv   = 1;
681         s->msmpeg4_version   = 5;
682         s->flipflop_rounding = 1;
683         avctx->delay         = 0;
684         s->low_delay         = 1;
685         break;
686     default:
687         return -1;
688     }
689
690     avctx->has_b_frames = !s->low_delay;
691
692     s->encoding = 1;
693
694     s->progressive_frame    =
695     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
696                                                 CODEC_FLAG_INTERLACED_ME) ||
697                                 s->alternate_scan);
698
699     /* init */
700     if (ff_MPV_common_init(s) < 0)
701         return -1;
702
703     if (ARCH_X86)
704         ff_MPV_encode_init_x86(s);
705
706     ff_fdctdsp_init(&s->fdsp, avctx);
707     ff_me_cmp_init(&s->mecc, avctx);
708     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
709     ff_pixblockdsp_init(&s->pdsp, avctx);
710     ff_qpeldsp_init(&s->qdsp);
711
712     s->avctx->coded_frame = s->current_picture.f;
713
714     if (s->msmpeg4_version) {
715         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
716                           2 * 2 * (MAX_LEVEL + 1) *
717                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
718     }
719     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
720
721     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
722     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
723     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
724     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
725     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
726                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
727     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
728                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
729
730     if (s->avctx->noise_reduction) {
731         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
732                           2 * 64 * sizeof(uint16_t), fail);
733     }
734
735     if (CONFIG_H263_ENCODER)
736         ff_h263dsp_init(&s->h263dsp);
737     if (!s->dct_quantize)
738         s->dct_quantize = ff_dct_quantize_c;
739     if (!s->denoise_dct)
740         s->denoise_dct  = denoise_dct_c;
741     s->fast_dct_quantize = s->dct_quantize;
742     if (avctx->trellis)
743         s->dct_quantize  = dct_quantize_trellis_c;
744
745     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
746         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
747
748     s->quant_precision = 5;
749
750     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
751     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
752
753     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
754         ff_h261_encode_init(s);
755     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
756         ff_h263_encode_init(s);
757     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
758         ff_msmpeg4_encode_init(s);
759     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
760         && s->out_format == FMT_MPEG1)
761         ff_mpeg1_encode_init(s);
762
763     /* init q matrix */
764     for (i = 0; i < 64; i++) {
765         int j = s->idsp.idct_permutation[i];
766         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
767             s->mpeg_quant) {
768             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
769             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
770         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
771             s->intra_matrix[j] =
772             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
773         } else {
774             /* mpeg1/2 */
775             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
776             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
777         }
778         if (s->avctx->intra_matrix)
779             s->intra_matrix[j] = s->avctx->intra_matrix[i];
780         if (s->avctx->inter_matrix)
781             s->inter_matrix[j] = s->avctx->inter_matrix[i];
782     }
783
784     /* precompute matrix */
785     /* for mjpeg, we do include qscale in the matrix */
786     if (s->out_format != FMT_MJPEG) {
787         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
788                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
789                           31, 1);
790         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
791                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
792                           31, 0);
793     }
794
795     if (ff_rate_control_init(s) < 0)
796         return -1;
797
798 #if FF_API_ERROR_RATE
799     FF_DISABLE_DEPRECATION_WARNINGS
800     if (avctx->error_rate)
801         s->error_rate = avctx->error_rate;
802     FF_ENABLE_DEPRECATION_WARNINGS;
803 #endif
804
805 #if FF_API_NORMALIZE_AQP
806     FF_DISABLE_DEPRECATION_WARNINGS
807     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
808         s->mpv_flags |= FF_MPV_FLAG_NAQ;
809     FF_ENABLE_DEPRECATION_WARNINGS;
810 #endif
811
812 #if FF_API_MV0
813     FF_DISABLE_DEPRECATION_WARNINGS
814     if (avctx->flags & CODEC_FLAG_MV0)
815         s->mpv_flags |= FF_MPV_FLAG_MV0;
816     FF_ENABLE_DEPRECATION_WARNINGS
817 #endif
818
819     if (avctx->b_frame_strategy == 2) {
820         for (i = 0; i < s->max_b_frames + 2; i++) {
821             s->tmp_frames[i] = av_frame_alloc();
822             if (!s->tmp_frames[i])
823                 return AVERROR(ENOMEM);
824
825             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
826             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
827             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
828
829             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
830             if (ret < 0)
831                 return ret;
832         }
833     }
834
835     return 0;
836 fail:
837     ff_MPV_encode_end(avctx);
838     return AVERROR_UNKNOWN;
839 }
840
841 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
842 {
843     MpegEncContext *s = avctx->priv_data;
844     int i;
845
846     ff_rate_control_uninit(s);
847
848     ff_MPV_common_end(s);
849     if (CONFIG_MJPEG_ENCODER &&
850         s->out_format == FMT_MJPEG)
851         ff_mjpeg_encode_close(s);
852
853     av_freep(&avctx->extradata);
854
855     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
856         av_frame_free(&s->tmp_frames[i]);
857
858     ff_free_picture_tables(&s->new_picture);
859     ff_mpeg_unref_picture(s, &s->new_picture);
860
861     av_freep(&s->avctx->stats_out);
862     av_freep(&s->ac_stats);
863
864     av_freep(&s->q_intra_matrix);
865     av_freep(&s->q_inter_matrix);
866     av_freep(&s->q_intra_matrix16);
867     av_freep(&s->q_inter_matrix16);
868     av_freep(&s->input_picture);
869     av_freep(&s->reordered_input_picture);
870     av_freep(&s->dct_offset);
871
872     return 0;
873 }
874
875 static int get_sae(uint8_t *src, int ref, int stride)
876 {
877     int x,y;
878     int acc = 0;
879
880     for (y = 0; y < 16; y++) {
881         for (x = 0; x < 16; x++) {
882             acc += FFABS(src[x + y * stride] - ref);
883         }
884     }
885
886     return acc;
887 }
888
889 static int get_intra_count(MpegEncContext *s, uint8_t *src,
890                            uint8_t *ref, int stride)
891 {
892     int x, y, w, h;
893     int acc = 0;
894
895     w = s->width  & ~15;
896     h = s->height & ~15;
897
898     for (y = 0; y < h; y += 16) {
899         for (x = 0; x < w; x += 16) {
900             int offset = x + y * stride;
901             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
902                                       stride, 16);
903             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
904             int sae  = get_sae(src + offset, mean, stride);
905
906             acc += sae + 500 < sad;
907         }
908     }
909     return acc;
910 }
911
912
913 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
914 {
915     Picture *pic = NULL;
916     int64_t pts;
917     int i, display_picture_number = 0, ret;
918     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
919                                                  (s->low_delay ? 0 : 1);
920     int direct = 1;
921
922     if (pic_arg) {
923         pts = pic_arg->pts;
924         display_picture_number = s->input_picture_number++;
925
926         if (pts != AV_NOPTS_VALUE) {
927             if (s->user_specified_pts != AV_NOPTS_VALUE) {
928                 int64_t time = pts;
929                 int64_t last = s->user_specified_pts;
930
931                 if (time <= last) {
932                     av_log(s->avctx, AV_LOG_ERROR,
933                            "Error, Invalid timestamp=%"PRId64", "
934                            "last=%"PRId64"\n", pts, s->user_specified_pts);
935                     return -1;
936                 }
937
938                 if (!s->low_delay && display_picture_number == 1)
939                     s->dts_delta = time - last;
940             }
941             s->user_specified_pts = pts;
942         } else {
943             if (s->user_specified_pts != AV_NOPTS_VALUE) {
944                 s->user_specified_pts =
945                 pts = s->user_specified_pts + 1;
946                 av_log(s->avctx, AV_LOG_INFO,
947                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
948                        pts);
949             } else {
950                 pts = display_picture_number;
951             }
952         }
953     }
954
955     if (pic_arg) {
956         if (!pic_arg->buf[0]);
957             direct = 0;
958         if (pic_arg->linesize[0] != s->linesize)
959             direct = 0;
960         if (pic_arg->linesize[1] != s->uvlinesize)
961             direct = 0;
962         if (pic_arg->linesize[2] != s->uvlinesize)
963             direct = 0;
964
965         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
966                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
967
968         if (direct) {
969             i = ff_find_unused_picture(s, 1);
970             if (i < 0)
971                 return i;
972
973             pic = &s->picture[i];
974             pic->reference = 3;
975
976             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
977                 return ret;
978             if (ff_alloc_picture(s, pic, 1) < 0) {
979                 return -1;
980             }
981         } else {
982             i = ff_find_unused_picture(s, 0);
983             if (i < 0)
984                 return i;
985
986             pic = &s->picture[i];
987             pic->reference = 3;
988
989             if (ff_alloc_picture(s, pic, 0) < 0) {
990                 return -1;
991             }
992
993             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
994                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
995                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
996                 // empty
997             } else {
998                 int h_chroma_shift, v_chroma_shift;
999                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1000                                                  &h_chroma_shift,
1001                                                  &v_chroma_shift);
1002
1003                 for (i = 0; i < 3; i++) {
1004                     int src_stride = pic_arg->linesize[i];
1005                     int dst_stride = i ? s->uvlinesize : s->linesize;
1006                     int h_shift = i ? h_chroma_shift : 0;
1007                     int v_shift = i ? v_chroma_shift : 0;
1008                     int w = s->width  >> h_shift;
1009                     int h = s->height >> v_shift;
1010                     uint8_t *src = pic_arg->data[i];
1011                     uint8_t *dst = pic->f->data[i];
1012
1013                     if (!s->avctx->rc_buffer_size)
1014                         dst += INPLACE_OFFSET;
1015
1016                     if (src_stride == dst_stride)
1017                         memcpy(dst, src, src_stride * h);
1018                     else {
1019                         while (h--) {
1020                             memcpy(dst, src, w);
1021                             dst += dst_stride;
1022                             src += src_stride;
1023                         }
1024                     }
1025                 }
1026             }
1027         }
1028         ret = av_frame_copy_props(pic->f, pic_arg);
1029         if (ret < 0)
1030             return ret;
1031
1032         pic->f->display_picture_number = display_picture_number;
1033         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1034     }
1035
1036     /* shift buffer entries */
1037     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1038         s->input_picture[i - 1] = s->input_picture[i];
1039
1040     s->input_picture[encoding_delay] = (Picture*) pic;
1041
1042     return 0;
1043 }
1044
1045 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1046 {
1047     int x, y, plane;
1048     int score = 0;
1049     int64_t score64 = 0;
1050
1051     for (plane = 0; plane < 3; plane++) {
1052         const int stride = p->f->linesize[plane];
1053         const int bw = plane ? 1 : 2;
1054         for (y = 0; y < s->mb_height * bw; y++) {
1055             for (x = 0; x < s->mb_width * bw; x++) {
1056                 int off = p->shared ? 0 : 16;
1057                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1058                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1059                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1060
1061                 switch (s->avctx->frame_skip_exp) {
1062                 case 0: score    =  FFMAX(score, v);          break;
1063                 case 1: score   += FFABS(v);                  break;
1064                 case 2: score   += v * v;                     break;
1065                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1066                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1067                 }
1068             }
1069         }
1070     }
1071
1072     if (score)
1073         score64 = score;
1074
1075     if (score64 < s->avctx->frame_skip_threshold)
1076         return 1;
1077     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1078         return 1;
1079     return 0;
1080 }
1081
1082 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1083 {
1084     AVPacket pkt = { 0 };
1085     int ret, got_output;
1086
1087     av_init_packet(&pkt);
1088     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1089     if (ret < 0)
1090         return ret;
1091
1092     ret = pkt.size;
1093     av_free_packet(&pkt);
1094     return ret;
1095 }
1096
1097 static int estimate_best_b_count(MpegEncContext *s)
1098 {
1099     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1100     AVCodecContext *c = avcodec_alloc_context3(NULL);
1101     const int scale = s->avctx->brd_scale;
1102     int i, j, out_size, p_lambda, b_lambda, lambda2;
1103     int64_t best_rd  = INT64_MAX;
1104     int best_b_count = -1;
1105
1106     assert(scale >= 0 && scale <= 3);
1107
1108     //emms_c();
1109     //s->next_picture_ptr->quality;
1110     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1111     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1112     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1113     if (!b_lambda) // FIXME we should do this somewhere else
1114         b_lambda = p_lambda;
1115     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1116                FF_LAMBDA_SHIFT;
1117
1118     c->width        = s->width  >> scale;
1119     c->height       = s->height >> scale;
1120     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1121     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1122     c->mb_decision  = s->avctx->mb_decision;
1123     c->me_cmp       = s->avctx->me_cmp;
1124     c->mb_cmp       = s->avctx->mb_cmp;
1125     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1126     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1127     c->time_base    = s->avctx->time_base;
1128     c->max_b_frames = s->max_b_frames;
1129
1130     if (avcodec_open2(c, codec, NULL) < 0)
1131         return -1;
1132
1133     for (i = 0; i < s->max_b_frames + 2; i++) {
1134         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1135                                                 s->next_picture_ptr;
1136
1137         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1138             pre_input = *pre_input_ptr;
1139
1140             if (!pre_input.shared && i) {
1141                 pre_input.f->data[0] += INPLACE_OFFSET;
1142                 pre_input.f->data[1] += INPLACE_OFFSET;
1143                 pre_input.f->data[2] += INPLACE_OFFSET;
1144             }
1145
1146             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1147                                        s->tmp_frames[i]->linesize[0],
1148                                        pre_input.f->data[0],
1149                                        pre_input.f->linesize[0],
1150                                        c->width, c->height);
1151             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1152                                        s->tmp_frames[i]->linesize[1],
1153                                        pre_input.f->data[1],
1154                                        pre_input.f->linesize[1],
1155                                        c->width >> 1, c->height >> 1);
1156             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1157                                        s->tmp_frames[i]->linesize[2],
1158                                        pre_input.f->data[2],
1159                                        pre_input.f->linesize[2],
1160                                        c->width >> 1, c->height >> 1);
1161         }
1162     }
1163
1164     for (j = 0; j < s->max_b_frames + 1; j++) {
1165         int64_t rd = 0;
1166
1167         if (!s->input_picture[j])
1168             break;
1169
1170         c->error[0] = c->error[1] = c->error[2] = 0;
1171
1172         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1173         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1174
1175         out_size = encode_frame(c, s->tmp_frames[0]);
1176
1177         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1178
1179         for (i = 0; i < s->max_b_frames + 1; i++) {
1180             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1181
1182             s->tmp_frames[i + 1]->pict_type = is_p ?
1183                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1184             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1185
1186             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1187
1188             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1189         }
1190
1191         /* get the delayed frames */
1192         while (out_size) {
1193             out_size = encode_frame(c, NULL);
1194             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1195         }
1196
1197         rd += c->error[0] + c->error[1] + c->error[2];
1198
1199         if (rd < best_rd) {
1200             best_rd = rd;
1201             best_b_count = j;
1202         }
1203     }
1204
1205     avcodec_close(c);
1206     av_freep(&c);
1207
1208     return best_b_count;
1209 }
1210
1211 static int select_input_picture(MpegEncContext *s)
1212 {
1213     int i, ret;
1214
1215     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1216         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1217     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1218
1219     /* set next picture type & ordering */
1220     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1221         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1222             s->next_picture_ptr == NULL || s->intra_only) {
1223             s->reordered_input_picture[0] = s->input_picture[0];
1224             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1225             s->reordered_input_picture[0]->f->coded_picture_number =
1226                 s->coded_picture_number++;
1227         } else {
1228             int b_frames;
1229
1230             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1231                 if (s->picture_in_gop_number < s->gop_size &&
1232                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1233                     // FIXME check that te gop check above is +-1 correct
1234                     av_frame_unref(s->input_picture[0]->f);
1235
1236                     emms_c();
1237                     ff_vbv_update(s, 0);
1238
1239                     goto no_output_pic;
1240                 }
1241             }
1242
1243             if (s->flags & CODEC_FLAG_PASS2) {
1244                 for (i = 0; i < s->max_b_frames + 1; i++) {
1245                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1246
1247                     if (pict_num >= s->rc_context.num_entries)
1248                         break;
1249                     if (!s->input_picture[i]) {
1250                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1251                         break;
1252                     }
1253
1254                     s->input_picture[i]->f->pict_type =
1255                         s->rc_context.entry[pict_num].new_pict_type;
1256                 }
1257             }
1258
1259             if (s->avctx->b_frame_strategy == 0) {
1260                 b_frames = s->max_b_frames;
1261                 while (b_frames && !s->input_picture[b_frames])
1262                     b_frames--;
1263             } else if (s->avctx->b_frame_strategy == 1) {
1264                 for (i = 1; i < s->max_b_frames + 1; i++) {
1265                     if (s->input_picture[i] &&
1266                         s->input_picture[i]->b_frame_score == 0) {
1267                         s->input_picture[i]->b_frame_score =
1268                             get_intra_count(s,
1269                                             s->input_picture[i    ]->f->data[0],
1270                                             s->input_picture[i - 1]->f->data[0],
1271                                             s->linesize) + 1;
1272                     }
1273                 }
1274                 for (i = 0; i < s->max_b_frames + 1; i++) {
1275                     if (s->input_picture[i] == NULL ||
1276                         s->input_picture[i]->b_frame_score - 1 >
1277                             s->mb_num / s->avctx->b_sensitivity)
1278                         break;
1279                 }
1280
1281                 b_frames = FFMAX(0, i - 1);
1282
1283                 /* reset scores */
1284                 for (i = 0; i < b_frames + 1; i++) {
1285                     s->input_picture[i]->b_frame_score = 0;
1286                 }
1287             } else if (s->avctx->b_frame_strategy == 2) {
1288                 b_frames = estimate_best_b_count(s);
1289             } else {
1290                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1291                 b_frames = 0;
1292             }
1293
1294             emms_c();
1295
1296             for (i = b_frames - 1; i >= 0; i--) {
1297                 int type = s->input_picture[i]->f->pict_type;
1298                 if (type && type != AV_PICTURE_TYPE_B)
1299                     b_frames = i;
1300             }
1301             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1302                 b_frames == s->max_b_frames) {
1303                 av_log(s->avctx, AV_LOG_ERROR,
1304                        "warning, too many b frames in a row\n");
1305             }
1306
1307             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1308                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1309                     s->gop_size > s->picture_in_gop_number) {
1310                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1311                 } else {
1312                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1313                         b_frames = 0;
1314                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1315                 }
1316             }
1317
1318             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1319                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1320                 b_frames--;
1321
1322             s->reordered_input_picture[0] = s->input_picture[b_frames];
1323             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1324                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1325             s->reordered_input_picture[0]->f->coded_picture_number =
1326                 s->coded_picture_number++;
1327             for (i = 0; i < b_frames; i++) {
1328                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1329                 s->reordered_input_picture[i + 1]->f->pict_type =
1330                     AV_PICTURE_TYPE_B;
1331                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1332                     s->coded_picture_number++;
1333             }
1334         }
1335     }
1336 no_output_pic:
1337     if (s->reordered_input_picture[0]) {
1338         s->reordered_input_picture[0]->reference =
1339            s->reordered_input_picture[0]->f->pict_type !=
1340                AV_PICTURE_TYPE_B ? 3 : 0;
1341
1342         ff_mpeg_unref_picture(s, &s->new_picture);
1343         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1344             return ret;
1345
1346         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1347             // input is a shared pix, so we can't modifiy it -> alloc a new
1348             // one & ensure that the shared one is reuseable
1349
1350             Picture *pic;
1351             int i = ff_find_unused_picture(s, 0);
1352             if (i < 0)
1353                 return i;
1354             pic = &s->picture[i];
1355
1356             pic->reference = s->reordered_input_picture[0]->reference;
1357             if (ff_alloc_picture(s, pic, 0) < 0) {
1358                 return -1;
1359             }
1360
1361             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1362             if (ret < 0)
1363                 return ret;
1364
1365             /* mark us unused / free shared pic */
1366             av_frame_unref(s->reordered_input_picture[0]->f);
1367             s->reordered_input_picture[0]->shared = 0;
1368
1369             s->current_picture_ptr = pic;
1370         } else {
1371             // input is not a shared pix -> reuse buffer for current_pix
1372             s->current_picture_ptr = s->reordered_input_picture[0];
1373             for (i = 0; i < 4; i++) {
1374                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1375             }
1376         }
1377         ff_mpeg_unref_picture(s, &s->current_picture);
1378         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1379                                        s->current_picture_ptr)) < 0)
1380             return ret;
1381
1382         s->picture_number = s->new_picture.f->display_picture_number;
1383     } else {
1384         ff_mpeg_unref_picture(s, &s->new_picture);
1385     }
1386     return 0;
1387 }
1388
1389 static void frame_end(MpegEncContext *s)
1390 {
1391     int i;
1392
1393     if (s->unrestricted_mv &&
1394         s->current_picture.reference &&
1395         !s->intra_only) {
1396         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1397         int hshift = desc->log2_chroma_w;
1398         int vshift = desc->log2_chroma_h;
1399         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1400                                 s->h_edge_pos, s->v_edge_pos,
1401                                 EDGE_WIDTH, EDGE_WIDTH,
1402                                 EDGE_TOP | EDGE_BOTTOM);
1403         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1404                                 s->h_edge_pos >> hshift,
1405                                 s->v_edge_pos >> vshift,
1406                                 EDGE_WIDTH >> hshift,
1407                                 EDGE_WIDTH >> vshift,
1408                                 EDGE_TOP | EDGE_BOTTOM);
1409         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1410                                 s->h_edge_pos >> hshift,
1411                                 s->v_edge_pos >> vshift,
1412                                 EDGE_WIDTH >> hshift,
1413                                 EDGE_WIDTH >> vshift,
1414                                 EDGE_TOP | EDGE_BOTTOM);
1415     }
1416
1417     emms_c();
1418
1419     s->last_pict_type                 = s->pict_type;
1420     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1421     if (s->pict_type!= AV_PICTURE_TYPE_B)
1422         s->last_non_b_pict_type = s->pict_type;
1423
1424     if (s->encoding) {
1425         /* release non-reference frames */
1426         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1427             if (!s->picture[i].reference)
1428                 ff_mpeg_unref_picture(s, &s->picture[i]);
1429         }
1430     }
1431
1432     s->avctx->coded_frame = s->current_picture_ptr->f;
1433
1434 }
1435
1436 static void update_noise_reduction(MpegEncContext *s)
1437 {
1438     int intra, i;
1439
1440     for (intra = 0; intra < 2; intra++) {
1441         if (s->dct_count[intra] > (1 << 16)) {
1442             for (i = 0; i < 64; i++) {
1443                 s->dct_error_sum[intra][i] >>= 1;
1444             }
1445             s->dct_count[intra] >>= 1;
1446         }
1447
1448         for (i = 0; i < 64; i++) {
1449             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1450                                        s->dct_count[intra] +
1451                                        s->dct_error_sum[intra][i] / 2) /
1452                                       (s->dct_error_sum[intra][i] + 1);
1453         }
1454     }
1455 }
1456
1457 static int frame_start(MpegEncContext *s)
1458 {
1459     int ret;
1460
1461     /* mark & release old frames */
1462     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1463         s->last_picture_ptr != s->next_picture_ptr &&
1464         s->last_picture_ptr->f->buf[0]) {
1465         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1466     }
1467
1468     s->current_picture_ptr->f->pict_type = s->pict_type;
1469     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1470
1471     ff_mpeg_unref_picture(s, &s->current_picture);
1472     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1473                                    s->current_picture_ptr)) < 0)
1474         return ret;
1475
1476     if (s->pict_type != AV_PICTURE_TYPE_B) {
1477         s->last_picture_ptr = s->next_picture_ptr;
1478         if (!s->droppable)
1479             s->next_picture_ptr = s->current_picture_ptr;
1480     }
1481
1482     if (s->last_picture_ptr) {
1483         ff_mpeg_unref_picture(s, &s->last_picture);
1484         if (s->last_picture_ptr->f->buf[0] &&
1485             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1486                                        s->last_picture_ptr)) < 0)
1487             return ret;
1488     }
1489     if (s->next_picture_ptr) {
1490         ff_mpeg_unref_picture(s, &s->next_picture);
1491         if (s->next_picture_ptr->f->buf[0] &&
1492             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1493                                        s->next_picture_ptr)) < 0)
1494             return ret;
1495     }
1496
1497     if (s->picture_structure!= PICT_FRAME) {
1498         int i;
1499         for (i = 0; i < 4; i++) {
1500             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1501                 s->current_picture.f->data[i] +=
1502                     s->current_picture.f->linesize[i];
1503             }
1504             s->current_picture.f->linesize[i] *= 2;
1505             s->last_picture.f->linesize[i]    *= 2;
1506             s->next_picture.f->linesize[i]    *= 2;
1507         }
1508     }
1509
1510     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1511         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1512         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1513     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1514         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1515         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1516     } else {
1517         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1518         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1519     }
1520
1521     if (s->dct_error_sum) {
1522         assert(s->avctx->noise_reduction && s->encoding);
1523         update_noise_reduction(s);
1524     }
1525
1526     return 0;
1527 }
1528
1529 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1530                           const AVFrame *pic_arg, int *got_packet)
1531 {
1532     MpegEncContext *s = avctx->priv_data;
1533     int i, stuffing_count, ret;
1534     int context_count = s->slice_context_count;
1535
1536     s->picture_in_gop_number++;
1537
1538     if (load_input_picture(s, pic_arg) < 0)
1539         return -1;
1540
1541     if (select_input_picture(s) < 0) {
1542         return -1;
1543     }
1544
1545     /* output? */
1546     if (s->new_picture.f->data[0]) {
1547         if (!pkt->data &&
1548             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1549             return ret;
1550         if (s->mb_info) {
1551             s->mb_info_ptr = av_packet_new_side_data(pkt,
1552                                  AV_PKT_DATA_H263_MB_INFO,
1553                                  s->mb_width*s->mb_height*12);
1554             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1555         }
1556
1557         for (i = 0; i < context_count; i++) {
1558             int start_y = s->thread_context[i]->start_mb_y;
1559             int   end_y = s->thread_context[i]->  end_mb_y;
1560             int h       = s->mb_height;
1561             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1562             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1563
1564             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1565         }
1566
1567         s->pict_type = s->new_picture.f->pict_type;
1568         //emms_c();
1569         ret = frame_start(s);
1570         if (ret < 0)
1571             return ret;
1572 vbv_retry:
1573         if (encode_picture(s, s->picture_number) < 0)
1574             return -1;
1575
1576         avctx->header_bits = s->header_bits;
1577         avctx->mv_bits     = s->mv_bits;
1578         avctx->misc_bits   = s->misc_bits;
1579         avctx->i_tex_bits  = s->i_tex_bits;
1580         avctx->p_tex_bits  = s->p_tex_bits;
1581         avctx->i_count     = s->i_count;
1582         // FIXME f/b_count in avctx
1583         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1584         avctx->skip_count  = s->skip_count;
1585
1586         frame_end(s);
1587
1588         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1589             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1590
1591         if (avctx->rc_buffer_size) {
1592             RateControlContext *rcc = &s->rc_context;
1593             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1594
1595             if (put_bits_count(&s->pb) > max_size &&
1596                 s->lambda < s->avctx->lmax) {
1597                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1598                                        (s->qscale + 1) / s->qscale);
1599                 if (s->adaptive_quant) {
1600                     int i;
1601                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1602                         s->lambda_table[i] =
1603                             FFMAX(s->lambda_table[i] + 1,
1604                                   s->lambda_table[i] * (s->qscale + 1) /
1605                                   s->qscale);
1606                 }
1607                 s->mb_skipped = 0;        // done in frame_start()
1608                 // done in encode_picture() so we must undo it
1609                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1610                     if (s->flipflop_rounding          ||
1611                         s->codec_id == AV_CODEC_ID_H263P ||
1612                         s->codec_id == AV_CODEC_ID_MPEG4)
1613                         s->no_rounding ^= 1;
1614                 }
1615                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1616                     s->time_base       = s->last_time_base;
1617                     s->last_non_b_time = s->time - s->pp_time;
1618                 }
1619                 for (i = 0; i < context_count; i++) {
1620                     PutBitContext *pb = &s->thread_context[i]->pb;
1621                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1622                 }
1623                 goto vbv_retry;
1624             }
1625
1626             assert(s->avctx->rc_max_rate);
1627         }
1628
1629         if (s->flags & CODEC_FLAG_PASS1)
1630             ff_write_pass1_stats(s);
1631
1632         for (i = 0; i < 4; i++) {
1633             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1634             avctx->error[i] += s->current_picture_ptr->f->error[i];
1635         }
1636
1637         if (s->flags & CODEC_FLAG_PASS1)
1638             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1639                    avctx->i_tex_bits + avctx->p_tex_bits ==
1640                        put_bits_count(&s->pb));
1641         flush_put_bits(&s->pb);
1642         s->frame_bits  = put_bits_count(&s->pb);
1643
1644         stuffing_count = ff_vbv_update(s, s->frame_bits);
1645         if (stuffing_count) {
1646             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1647                     stuffing_count + 50) {
1648                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1649                 return -1;
1650             }
1651
1652             switch (s->codec_id) {
1653             case AV_CODEC_ID_MPEG1VIDEO:
1654             case AV_CODEC_ID_MPEG2VIDEO:
1655                 while (stuffing_count--) {
1656                     put_bits(&s->pb, 8, 0);
1657                 }
1658             break;
1659             case AV_CODEC_ID_MPEG4:
1660                 put_bits(&s->pb, 16, 0);
1661                 put_bits(&s->pb, 16, 0x1C3);
1662                 stuffing_count -= 4;
1663                 while (stuffing_count--) {
1664                     put_bits(&s->pb, 8, 0xFF);
1665                 }
1666             break;
1667             default:
1668                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1669             }
1670             flush_put_bits(&s->pb);
1671             s->frame_bits  = put_bits_count(&s->pb);
1672         }
1673
1674         /* update mpeg1/2 vbv_delay for CBR */
1675         if (s->avctx->rc_max_rate                          &&
1676             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1677             s->out_format == FMT_MPEG1                     &&
1678             90000LL * (avctx->rc_buffer_size - 1) <=
1679                 s->avctx->rc_max_rate * 0xFFFFLL) {
1680             int vbv_delay, min_delay;
1681             double inbits  = s->avctx->rc_max_rate *
1682                              av_q2d(s->avctx->time_base);
1683             int    minbits = s->frame_bits - 8 *
1684                              (s->vbv_delay_ptr - s->pb.buf - 1);
1685             double bits    = s->rc_context.buffer_index + minbits - inbits;
1686
1687             if (bits < 0)
1688                 av_log(s->avctx, AV_LOG_ERROR,
1689                        "Internal error, negative bits\n");
1690
1691             assert(s->repeat_first_field == 0);
1692
1693             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1694             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1695                         s->avctx->rc_max_rate;
1696
1697             vbv_delay = FFMAX(vbv_delay, min_delay);
1698
1699             assert(vbv_delay < 0xFFFF);
1700
1701             s->vbv_delay_ptr[0] &= 0xF8;
1702             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1703             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1704             s->vbv_delay_ptr[2] &= 0x07;
1705             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1706             avctx->vbv_delay     = vbv_delay * 300;
1707         }
1708         s->total_bits     += s->frame_bits;
1709         avctx->frame_bits  = s->frame_bits;
1710
1711         pkt->pts = s->current_picture.f->pts;
1712         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1713             if (!s->current_picture.f->coded_picture_number)
1714                 pkt->dts = pkt->pts - s->dts_delta;
1715             else
1716                 pkt->dts = s->reordered_pts;
1717             s->reordered_pts = pkt->pts;
1718         } else
1719             pkt->dts = pkt->pts;
1720         if (s->current_picture.f->key_frame)
1721             pkt->flags |= AV_PKT_FLAG_KEY;
1722         if (s->mb_info)
1723             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1724     } else {
1725         s->frame_bits = 0;
1726     }
1727     assert((s->frame_bits & 7) == 0);
1728
1729     pkt->size = s->frame_bits / 8;
1730     *got_packet = !!pkt->size;
1731     return 0;
1732 }
1733
1734 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1735                                                 int n, int threshold)
1736 {
1737     static const char tab[64] = {
1738         3, 2, 2, 1, 1, 1, 1, 1,
1739         1, 1, 1, 1, 1, 1, 1, 1,
1740         1, 1, 1, 1, 1, 1, 1, 1,
1741         0, 0, 0, 0, 0, 0, 0, 0,
1742         0, 0, 0, 0, 0, 0, 0, 0,
1743         0, 0, 0, 0, 0, 0, 0, 0,
1744         0, 0, 0, 0, 0, 0, 0, 0,
1745         0, 0, 0, 0, 0, 0, 0, 0
1746     };
1747     int score = 0;
1748     int run = 0;
1749     int i;
1750     int16_t *block = s->block[n];
1751     const int last_index = s->block_last_index[n];
1752     int skip_dc;
1753
1754     if (threshold < 0) {
1755         skip_dc = 0;
1756         threshold = -threshold;
1757     } else
1758         skip_dc = 1;
1759
1760     /* Are all we could set to zero already zero? */
1761     if (last_index <= skip_dc - 1)
1762         return;
1763
1764     for (i = 0; i <= last_index; i++) {
1765         const int j = s->intra_scantable.permutated[i];
1766         const int level = FFABS(block[j]);
1767         if (level == 1) {
1768             if (skip_dc && i == 0)
1769                 continue;
1770             score += tab[run];
1771             run = 0;
1772         } else if (level > 1) {
1773             return;
1774         } else {
1775             run++;
1776         }
1777     }
1778     if (score >= threshold)
1779         return;
1780     for (i = skip_dc; i <= last_index; i++) {
1781         const int j = s->intra_scantable.permutated[i];
1782         block[j] = 0;
1783     }
1784     if (block[0])
1785         s->block_last_index[n] = 0;
1786     else
1787         s->block_last_index[n] = -1;
1788 }
1789
1790 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1791                                int last_index)
1792 {
1793     int i;
1794     const int maxlevel = s->max_qcoeff;
1795     const int minlevel = s->min_qcoeff;
1796     int overflow = 0;
1797
1798     if (s->mb_intra) {
1799         i = 1; // skip clipping of intra dc
1800     } else
1801         i = 0;
1802
1803     for (; i <= last_index; i++) {
1804         const int j = s->intra_scantable.permutated[i];
1805         int level = block[j];
1806
1807         if (level > maxlevel) {
1808             level = maxlevel;
1809             overflow++;
1810         } else if (level < minlevel) {
1811             level = minlevel;
1812             overflow++;
1813         }
1814
1815         block[j] = level;
1816     }
1817
1818     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1819         av_log(s->avctx, AV_LOG_INFO,
1820                "warning, clipping %d dct coefficients to %d..%d\n",
1821                overflow, minlevel, maxlevel);
1822 }
1823
1824 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1825 {
1826     int x, y;
1827     // FIXME optimize
1828     for (y = 0; y < 8; y++) {
1829         for (x = 0; x < 8; x++) {
1830             int x2, y2;
1831             int sum = 0;
1832             int sqr = 0;
1833             int count = 0;
1834
1835             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1836                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1837                     int v = ptr[x2 + y2 * stride];
1838                     sum += v;
1839                     sqr += v * v;
1840                     count++;
1841                 }
1842             }
1843             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1844         }
1845     }
1846 }
1847
1848 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1849                                                 int motion_x, int motion_y,
1850                                                 int mb_block_height,
1851                                                 int mb_block_count)
1852 {
1853     int16_t weight[8][64];
1854     int16_t orig[8][64];
1855     const int mb_x = s->mb_x;
1856     const int mb_y = s->mb_y;
1857     int i;
1858     int skip_dct[8];
1859     int dct_offset = s->linesize * 8; // default for progressive frames
1860     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1861     ptrdiff_t wrap_y, wrap_c;
1862
1863     for (i = 0; i < mb_block_count; i++)
1864         skip_dct[i] = s->skipdct;
1865
1866     if (s->adaptive_quant) {
1867         const int last_qp = s->qscale;
1868         const int mb_xy = mb_x + mb_y * s->mb_stride;
1869
1870         s->lambda = s->lambda_table[mb_xy];
1871         update_qscale(s);
1872
1873         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1874             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1875             s->dquant = s->qscale - last_qp;
1876
1877             if (s->out_format == FMT_H263) {
1878                 s->dquant = av_clip(s->dquant, -2, 2);
1879
1880                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1881                     if (!s->mb_intra) {
1882                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1883                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1884                                 s->dquant = 0;
1885                         }
1886                         if (s->mv_type == MV_TYPE_8X8)
1887                             s->dquant = 0;
1888                     }
1889                 }
1890             }
1891         }
1892         ff_set_qscale(s, last_qp + s->dquant);
1893     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1894         ff_set_qscale(s, s->qscale + s->dquant);
1895
1896     wrap_y = s->linesize;
1897     wrap_c = s->uvlinesize;
1898     ptr_y  = s->new_picture.f->data[0] +
1899              (mb_y * 16 * wrap_y)              + mb_x * 16;
1900     ptr_cb = s->new_picture.f->data[1] +
1901              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1902     ptr_cr = s->new_picture.f->data[2] +
1903              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1904
1905     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1906         uint8_t *ebuf = s->edge_emu_buffer + 32;
1907         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1908                                  wrap_y, wrap_y,
1909                                  16, 16, mb_x * 16, mb_y * 16,
1910                                  s->width, s->height);
1911         ptr_y = ebuf;
1912         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1913                                  wrap_c, wrap_c,
1914                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1915                                  s->width >> 1, s->height >> 1);
1916         ptr_cb = ebuf + 18 * wrap_y;
1917         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1918                                  wrap_c, wrap_c,
1919                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1920                                  s->width >> 1, s->height >> 1);
1921         ptr_cr = ebuf + 18 * wrap_y + 8;
1922     }
1923
1924     if (s->mb_intra) {
1925         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1926             int progressive_score, interlaced_score;
1927
1928             s->interlaced_dct = 0;
1929             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
1930                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1931                                                      NULL, wrap_y, 8) - 400;
1932
1933             if (progressive_score > 0) {
1934                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
1935                                                         NULL, wrap_y * 2, 8) +
1936                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
1937                                                         NULL, wrap_y * 2, 8);
1938                 if (progressive_score > interlaced_score) {
1939                     s->interlaced_dct = 1;
1940
1941                     dct_offset = wrap_y;
1942                     wrap_y <<= 1;
1943                     if (s->chroma_format == CHROMA_422)
1944                         wrap_c <<= 1;
1945                 }
1946             }
1947         }
1948
1949         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
1950         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
1951         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
1952         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
1953
1954         if (s->flags & CODEC_FLAG_GRAY) {
1955             skip_dct[4] = 1;
1956             skip_dct[5] = 1;
1957         } else {
1958             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1959             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1960             if (!s->chroma_y_shift) { /* 422 */
1961                 s->pdsp.get_pixels(s->block[6],
1962                                    ptr_cb + (dct_offset >> 1), wrap_c);
1963                 s->pdsp.get_pixels(s->block[7],
1964                                    ptr_cr + (dct_offset >> 1), wrap_c);
1965             }
1966         }
1967     } else {
1968         op_pixels_func (*op_pix)[4];
1969         qpel_mc_func (*op_qpix)[16];
1970         uint8_t *dest_y, *dest_cb, *dest_cr;
1971
1972         dest_y  = s->dest[0];
1973         dest_cb = s->dest[1];
1974         dest_cr = s->dest[2];
1975
1976         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1977             op_pix  = s->hdsp.put_pixels_tab;
1978             op_qpix = s->qdsp.put_qpel_pixels_tab;
1979         } else {
1980             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1981             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
1982         }
1983
1984         if (s->mv_dir & MV_DIR_FORWARD) {
1985             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1986                           s->last_picture.f->data,
1987                           op_pix, op_qpix);
1988             op_pix  = s->hdsp.avg_pixels_tab;
1989             op_qpix = s->qdsp.avg_qpel_pixels_tab;
1990         }
1991         if (s->mv_dir & MV_DIR_BACKWARD) {
1992             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1993                           s->next_picture.f->data,
1994                           op_pix, op_qpix);
1995         }
1996
1997         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1998             int progressive_score, interlaced_score;
1999
2000             s->interlaced_dct = 0;
2001             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2002                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2003                                                      ptr_y + wrap_y * 8,
2004                                                      wrap_y, 8) - 400;
2005
2006             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2007                 progressive_score -= 400;
2008
2009             if (progressive_score > 0) {
2010                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2011                                                         wrap_y * 2, 8) +
2012                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2013                                                         ptr_y + wrap_y,
2014                                                         wrap_y * 2, 8);
2015
2016                 if (progressive_score > interlaced_score) {
2017                     s->interlaced_dct = 1;
2018
2019                     dct_offset = wrap_y;
2020                     wrap_y <<= 1;
2021                     if (s->chroma_format == CHROMA_422)
2022                         wrap_c <<= 1;
2023                 }
2024             }
2025         }
2026
2027         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2028         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2029         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2030                             dest_y + dct_offset, wrap_y);
2031         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2032                             dest_y + dct_offset + 8, wrap_y);
2033
2034         if (s->flags & CODEC_FLAG_GRAY) {
2035             skip_dct[4] = 1;
2036             skip_dct[5] = 1;
2037         } else {
2038             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2039             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2040             if (!s->chroma_y_shift) { /* 422 */
2041                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2042                                     dest_cb + (dct_offset >> 1), wrap_c);
2043                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2044                                     dest_cr + (dct_offset >> 1), wrap_c);
2045             }
2046         }
2047         /* pre quantization */
2048         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2049                 2 * s->qscale * s->qscale) {
2050             // FIXME optimize
2051             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2052                 skip_dct[0] = 1;
2053             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2054                 skip_dct[1] = 1;
2055             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2056                                wrap_y, 8) < 20 * s->qscale)
2057                 skip_dct[2] = 1;
2058             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2059                                wrap_y, 8) < 20 * s->qscale)
2060                 skip_dct[3] = 1;
2061             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2062                 skip_dct[4] = 1;
2063             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2064                 skip_dct[5] = 1;
2065             if (!s->chroma_y_shift) { /* 422 */
2066                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2067                                    dest_cb + (dct_offset >> 1),
2068                                    wrap_c, 8) < 20 * s->qscale)
2069                     skip_dct[6] = 1;
2070                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2071                                    dest_cr + (dct_offset >> 1),
2072                                    wrap_c, 8) < 20 * s->qscale)
2073                     skip_dct[7] = 1;
2074             }
2075         }
2076     }
2077
2078     if (s->quantizer_noise_shaping) {
2079         if (!skip_dct[0])
2080             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2081         if (!skip_dct[1])
2082             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2083         if (!skip_dct[2])
2084             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2085         if (!skip_dct[3])
2086             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2087         if (!skip_dct[4])
2088             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2089         if (!skip_dct[5])
2090             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2091         if (!s->chroma_y_shift) { /* 422 */
2092             if (!skip_dct[6])
2093                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2094                                   wrap_c);
2095             if (!skip_dct[7])
2096                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2097                                   wrap_c);
2098         }
2099         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2100     }
2101
2102     /* DCT & quantize */
2103     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2104     {
2105         for (i = 0; i < mb_block_count; i++) {
2106             if (!skip_dct[i]) {
2107                 int overflow;
2108                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2109                 // FIXME we could decide to change to quantizer instead of
2110                 // clipping
2111                 // JS: I don't think that would be a good idea it could lower
2112                 //     quality instead of improve it. Just INTRADC clipping
2113                 //     deserves changes in quantizer
2114                 if (overflow)
2115                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2116             } else
2117                 s->block_last_index[i] = -1;
2118         }
2119         if (s->quantizer_noise_shaping) {
2120             for (i = 0; i < mb_block_count; i++) {
2121                 if (!skip_dct[i]) {
2122                     s->block_last_index[i] =
2123                         dct_quantize_refine(s, s->block[i], weight[i],
2124                                             orig[i], i, s->qscale);
2125                 }
2126             }
2127         }
2128
2129         if (s->luma_elim_threshold && !s->mb_intra)
2130             for (i = 0; i < 4; i++)
2131                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2132         if (s->chroma_elim_threshold && !s->mb_intra)
2133             for (i = 4; i < mb_block_count; i++)
2134                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2135
2136         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2137             for (i = 0; i < mb_block_count; i++) {
2138                 if (s->block_last_index[i] == -1)
2139                     s->coded_score[i] = INT_MAX / 256;
2140             }
2141         }
2142     }
2143
2144     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2145         s->block_last_index[4] =
2146         s->block_last_index[5] = 0;
2147         s->block[4][0] =
2148         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2149     }
2150
2151     // non c quantize code returns incorrect block_last_index FIXME
2152     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2153         for (i = 0; i < mb_block_count; i++) {
2154             int j;
2155             if (s->block_last_index[i] > 0) {
2156                 for (j = 63; j > 0; j--) {
2157                     if (s->block[i][s->intra_scantable.permutated[j]])
2158                         break;
2159                 }
2160                 s->block_last_index[i] = j;
2161             }
2162         }
2163     }
2164
2165     /* huffman encode */
2166     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2167     case AV_CODEC_ID_MPEG1VIDEO:
2168     case AV_CODEC_ID_MPEG2VIDEO:
2169         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2170             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2171         break;
2172     case AV_CODEC_ID_MPEG4:
2173         if (CONFIG_MPEG4_ENCODER)
2174             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2175         break;
2176     case AV_CODEC_ID_MSMPEG4V2:
2177     case AV_CODEC_ID_MSMPEG4V3:
2178     case AV_CODEC_ID_WMV1:
2179         if (CONFIG_MSMPEG4_ENCODER)
2180             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2181         break;
2182     case AV_CODEC_ID_WMV2:
2183         if (CONFIG_WMV2_ENCODER)
2184             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2185         break;
2186     case AV_CODEC_ID_H261:
2187         if (CONFIG_H261_ENCODER)
2188             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2189         break;
2190     case AV_CODEC_ID_H263:
2191     case AV_CODEC_ID_H263P:
2192     case AV_CODEC_ID_FLV1:
2193     case AV_CODEC_ID_RV10:
2194     case AV_CODEC_ID_RV20:
2195         if (CONFIG_H263_ENCODER)
2196             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2197         break;
2198     case AV_CODEC_ID_MJPEG:
2199         if (CONFIG_MJPEG_ENCODER)
2200             ff_mjpeg_encode_mb(s, s->block);
2201         break;
2202     default:
2203         assert(0);
2204     }
2205 }
2206
2207 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2208 {
2209     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2210     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2211 }
2212
2213 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2214     int i;
2215
2216     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2217
2218     /* mpeg1 */
2219     d->mb_skip_run= s->mb_skip_run;
2220     for(i=0; i<3; i++)
2221         d->last_dc[i] = s->last_dc[i];
2222
2223     /* statistics */
2224     d->mv_bits= s->mv_bits;
2225     d->i_tex_bits= s->i_tex_bits;
2226     d->p_tex_bits= s->p_tex_bits;
2227     d->i_count= s->i_count;
2228     d->f_count= s->f_count;
2229     d->b_count= s->b_count;
2230     d->skip_count= s->skip_count;
2231     d->misc_bits= s->misc_bits;
2232     d->last_bits= 0;
2233
2234     d->mb_skipped= 0;
2235     d->qscale= s->qscale;
2236     d->dquant= s->dquant;
2237
2238     d->esc3_level_length= s->esc3_level_length;
2239 }
2240
2241 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2242     int i;
2243
2244     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2245     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2246
2247     /* mpeg1 */
2248     d->mb_skip_run= s->mb_skip_run;
2249     for(i=0; i<3; i++)
2250         d->last_dc[i] = s->last_dc[i];
2251
2252     /* statistics */
2253     d->mv_bits= s->mv_bits;
2254     d->i_tex_bits= s->i_tex_bits;
2255     d->p_tex_bits= s->p_tex_bits;
2256     d->i_count= s->i_count;
2257     d->f_count= s->f_count;
2258     d->b_count= s->b_count;
2259     d->skip_count= s->skip_count;
2260     d->misc_bits= s->misc_bits;
2261
2262     d->mb_intra= s->mb_intra;
2263     d->mb_skipped= s->mb_skipped;
2264     d->mv_type= s->mv_type;
2265     d->mv_dir= s->mv_dir;
2266     d->pb= s->pb;
2267     if(s->data_partitioning){
2268         d->pb2= s->pb2;
2269         d->tex_pb= s->tex_pb;
2270     }
2271     d->block= s->block;
2272     for(i=0; i<8; i++)
2273         d->block_last_index[i]= s->block_last_index[i];
2274     d->interlaced_dct= s->interlaced_dct;
2275     d->qscale= s->qscale;
2276
2277     d->esc3_level_length= s->esc3_level_length;
2278 }
2279
2280 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2281                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2282                            int *dmin, int *next_block, int motion_x, int motion_y)
2283 {
2284     int score;
2285     uint8_t *dest_backup[3];
2286
2287     copy_context_before_encode(s, backup, type);
2288
2289     s->block= s->blocks[*next_block];
2290     s->pb= pb[*next_block];
2291     if(s->data_partitioning){
2292         s->pb2   = pb2   [*next_block];
2293         s->tex_pb= tex_pb[*next_block];
2294     }
2295
2296     if(*next_block){
2297         memcpy(dest_backup, s->dest, sizeof(s->dest));
2298         s->dest[0] = s->rd_scratchpad;
2299         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2300         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2301         assert(s->linesize >= 32); //FIXME
2302     }
2303
2304     encode_mb(s, motion_x, motion_y);
2305
2306     score= put_bits_count(&s->pb);
2307     if(s->data_partitioning){
2308         score+= put_bits_count(&s->pb2);
2309         score+= put_bits_count(&s->tex_pb);
2310     }
2311
2312     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2313         ff_MPV_decode_mb(s, s->block);
2314
2315         score *= s->lambda2;
2316         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2317     }
2318
2319     if(*next_block){
2320         memcpy(s->dest, dest_backup, sizeof(s->dest));
2321     }
2322
2323     if(score<*dmin){
2324         *dmin= score;
2325         *next_block^=1;
2326
2327         copy_context_after_encode(best, s, type);
2328     }
2329 }
2330
2331 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2332     uint32_t *sq = ff_square_tab + 256;
2333     int acc=0;
2334     int x,y;
2335
2336     if(w==16 && h==16)
2337         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2338     else if(w==8 && h==8)
2339         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2340
2341     for(y=0; y<h; y++){
2342         for(x=0; x<w; x++){
2343             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2344         }
2345     }
2346
2347     assert(acc>=0);
2348
2349     return acc;
2350 }
2351
2352 static int sse_mb(MpegEncContext *s){
2353     int w= 16;
2354     int h= 16;
2355
2356     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2357     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2358
2359     if(w==16 && h==16)
2360       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2361         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2362                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2363                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2364       }else{
2365         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2366                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2367                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2368       }
2369     else
2370         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2371                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2372                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2373 }
2374
2375 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2376     MpegEncContext *s= *(void**)arg;
2377
2378
2379     s->me.pre_pass=1;
2380     s->me.dia_size= s->avctx->pre_dia_size;
2381     s->first_slice_line=1;
2382     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2383         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2384             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2385         }
2386         s->first_slice_line=0;
2387     }
2388
2389     s->me.pre_pass=0;
2390
2391     return 0;
2392 }
2393
2394 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2395     MpegEncContext *s= *(void**)arg;
2396
2397     s->me.dia_size= s->avctx->dia_size;
2398     s->first_slice_line=1;
2399     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2400         s->mb_x=0; //for block init below
2401         ff_init_block_index(s);
2402         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2403             s->block_index[0]+=2;
2404             s->block_index[1]+=2;
2405             s->block_index[2]+=2;
2406             s->block_index[3]+=2;
2407
2408             /* compute motion vector & mb_type and store in context */
2409             if(s->pict_type==AV_PICTURE_TYPE_B)
2410                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2411             else
2412                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2413         }
2414         s->first_slice_line=0;
2415     }
2416     return 0;
2417 }
2418
2419 static int mb_var_thread(AVCodecContext *c, void *arg){
2420     MpegEncContext *s= *(void**)arg;
2421     int mb_x, mb_y;
2422
2423     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2424         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2425             int xx = mb_x * 16;
2426             int yy = mb_y * 16;
2427             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2428             int varc;
2429             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2430
2431             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2432                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2433
2434             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2435             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2436             s->me.mb_var_sum_temp    += varc;
2437         }
2438     }
2439     return 0;
2440 }
2441
2442 static void write_slice_end(MpegEncContext *s){
2443     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2444         if(s->partitioned_frame){
2445             ff_mpeg4_merge_partitions(s);
2446         }
2447
2448         ff_mpeg4_stuffing(&s->pb);
2449     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2450         ff_mjpeg_encode_stuffing(&s->pb);
2451     }
2452
2453     avpriv_align_put_bits(&s->pb);
2454     flush_put_bits(&s->pb);
2455
2456     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2457         s->misc_bits+= get_bits_diff(s);
2458 }
2459
2460 static void write_mb_info(MpegEncContext *s)
2461 {
2462     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2463     int offset = put_bits_count(&s->pb);
2464     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2465     int gobn = s->mb_y / s->gob_index;
2466     int pred_x, pred_y;
2467     if (CONFIG_H263_ENCODER)
2468         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2469     bytestream_put_le32(&ptr, offset);
2470     bytestream_put_byte(&ptr, s->qscale);
2471     bytestream_put_byte(&ptr, gobn);
2472     bytestream_put_le16(&ptr, mba);
2473     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2474     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2475     /* 4MV not implemented */
2476     bytestream_put_byte(&ptr, 0); /* hmv2 */
2477     bytestream_put_byte(&ptr, 0); /* vmv2 */
2478 }
2479
2480 static void update_mb_info(MpegEncContext *s, int startcode)
2481 {
2482     if (!s->mb_info)
2483         return;
2484     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2485         s->mb_info_size += 12;
2486         s->prev_mb_info = s->last_mb_info;
2487     }
2488     if (startcode) {
2489         s->prev_mb_info = put_bits_count(&s->pb)/8;
2490         /* This might have incremented mb_info_size above, and we return without
2491          * actually writing any info into that slot yet. But in that case,
2492          * this will be called again at the start of the after writing the
2493          * start code, actually writing the mb info. */
2494         return;
2495     }
2496
2497     s->last_mb_info = put_bits_count(&s->pb)/8;
2498     if (!s->mb_info_size)
2499         s->mb_info_size += 12;
2500     write_mb_info(s);
2501 }
2502
2503 static int encode_thread(AVCodecContext *c, void *arg){
2504     MpegEncContext *s= *(void**)arg;
2505     int mb_x, mb_y, pdif = 0;
2506     int chr_h= 16>>s->chroma_y_shift;
2507     int i, j;
2508     MpegEncContext best_s, backup_s;
2509     uint8_t bit_buf[2][MAX_MB_BYTES];
2510     uint8_t bit_buf2[2][MAX_MB_BYTES];
2511     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2512     PutBitContext pb[2], pb2[2], tex_pb[2];
2513
2514     for(i=0; i<2; i++){
2515         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2516         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2517         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2518     }
2519
2520     s->last_bits= put_bits_count(&s->pb);
2521     s->mv_bits=0;
2522     s->misc_bits=0;
2523     s->i_tex_bits=0;
2524     s->p_tex_bits=0;
2525     s->i_count=0;
2526     s->f_count=0;
2527     s->b_count=0;
2528     s->skip_count=0;
2529
2530     for(i=0; i<3; i++){
2531         /* init last dc values */
2532         /* note: quant matrix value (8) is implied here */
2533         s->last_dc[i] = 128 << s->intra_dc_precision;
2534
2535         s->current_picture.f->error[i] = 0;
2536     }
2537     s->mb_skip_run = 0;
2538     memset(s->last_mv, 0, sizeof(s->last_mv));
2539
2540     s->last_mv_dir = 0;
2541
2542     switch(s->codec_id){
2543     case AV_CODEC_ID_H263:
2544     case AV_CODEC_ID_H263P:
2545     case AV_CODEC_ID_FLV1:
2546         if (CONFIG_H263_ENCODER)
2547             s->gob_index = ff_h263_get_gob_height(s);
2548         break;
2549     case AV_CODEC_ID_MPEG4:
2550         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2551             ff_mpeg4_init_partitions(s);
2552         break;
2553     }
2554
2555     s->resync_mb_x=0;
2556     s->resync_mb_y=0;
2557     s->first_slice_line = 1;
2558     s->ptr_lastgob = s->pb.buf;
2559     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2560         s->mb_x=0;
2561         s->mb_y= mb_y;
2562
2563         ff_set_qscale(s, s->qscale);
2564         ff_init_block_index(s);
2565
2566         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2567             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2568             int mb_type= s->mb_type[xy];
2569 //            int d;
2570             int dmin= INT_MAX;
2571             int dir;
2572
2573             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2574                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2575                 return -1;
2576             }
2577             if(s->data_partitioning){
2578                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2579                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2580                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2581                     return -1;
2582                 }
2583             }
2584
2585             s->mb_x = mb_x;
2586             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2587             ff_update_block_index(s);
2588
2589             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2590                 ff_h261_reorder_mb_index(s);
2591                 xy= s->mb_y*s->mb_stride + s->mb_x;
2592                 mb_type= s->mb_type[xy];
2593             }
2594
2595             /* write gob / video packet header  */
2596             if(s->rtp_mode){
2597                 int current_packet_size, is_gob_start;
2598
2599                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2600
2601                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2602
2603                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2604
2605                 switch(s->codec_id){
2606                 case AV_CODEC_ID_H263:
2607                 case AV_CODEC_ID_H263P:
2608                     if(!s->h263_slice_structured)
2609                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2610                     break;
2611                 case AV_CODEC_ID_MPEG2VIDEO:
2612                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2613                 case AV_CODEC_ID_MPEG1VIDEO:
2614                     if(s->mb_skip_run) is_gob_start=0;
2615                     break;
2616                 }
2617
2618                 if(is_gob_start){
2619                     if(s->start_mb_y != mb_y || mb_x!=0){
2620                         write_slice_end(s);
2621
2622                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2623                             ff_mpeg4_init_partitions(s);
2624                         }
2625                     }
2626
2627                     assert((put_bits_count(&s->pb)&7) == 0);
2628                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2629
2630                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2631                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2632                         int d = 100 / s->error_rate;
2633                         if(r % d == 0){
2634                             current_packet_size=0;
2635                             s->pb.buf_ptr= s->ptr_lastgob;
2636                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2637                         }
2638                     }
2639
2640                     if (s->avctx->rtp_callback){
2641                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2642                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2643                     }
2644                     update_mb_info(s, 1);
2645
2646                     switch(s->codec_id){
2647                     case AV_CODEC_ID_MPEG4:
2648                         if (CONFIG_MPEG4_ENCODER) {
2649                             ff_mpeg4_encode_video_packet_header(s);
2650                             ff_mpeg4_clean_buffers(s);
2651                         }
2652                     break;
2653                     case AV_CODEC_ID_MPEG1VIDEO:
2654                     case AV_CODEC_ID_MPEG2VIDEO:
2655                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2656                             ff_mpeg1_encode_slice_header(s);
2657                             ff_mpeg1_clean_buffers(s);
2658                         }
2659                     break;
2660                     case AV_CODEC_ID_H263:
2661                     case AV_CODEC_ID_H263P:
2662                         if (CONFIG_H263_ENCODER)
2663                             ff_h263_encode_gob_header(s, mb_y);
2664                     break;
2665                     }
2666
2667                     if(s->flags&CODEC_FLAG_PASS1){
2668                         int bits= put_bits_count(&s->pb);
2669                         s->misc_bits+= bits - s->last_bits;
2670                         s->last_bits= bits;
2671                     }
2672
2673                     s->ptr_lastgob += current_packet_size;
2674                     s->first_slice_line=1;
2675                     s->resync_mb_x=mb_x;
2676                     s->resync_mb_y=mb_y;
2677                 }
2678             }
2679
2680             if(  (s->resync_mb_x   == s->mb_x)
2681                && s->resync_mb_y+1 == s->mb_y){
2682                 s->first_slice_line=0;
2683             }
2684
2685             s->mb_skipped=0;
2686             s->dquant=0; //only for QP_RD
2687
2688             update_mb_info(s, 0);
2689
2690             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2691                 int next_block=0;
2692                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2693
2694                 copy_context_before_encode(&backup_s, s, -1);
2695                 backup_s.pb= s->pb;
2696                 best_s.data_partitioning= s->data_partitioning;
2697                 best_s.partitioned_frame= s->partitioned_frame;
2698                 if(s->data_partitioning){
2699                     backup_s.pb2= s->pb2;
2700                     backup_s.tex_pb= s->tex_pb;
2701                 }
2702
2703                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2704                     s->mv_dir = MV_DIR_FORWARD;
2705                     s->mv_type = MV_TYPE_16X16;
2706                     s->mb_intra= 0;
2707                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2708                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2709                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2710                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2711                 }
2712                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2713                     s->mv_dir = MV_DIR_FORWARD;
2714                     s->mv_type = MV_TYPE_FIELD;
2715                     s->mb_intra= 0;
2716                     for(i=0; i<2; i++){
2717                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2718                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2719                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2720                     }
2721                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2722                                  &dmin, &next_block, 0, 0);
2723                 }
2724                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2725                     s->mv_dir = MV_DIR_FORWARD;
2726                     s->mv_type = MV_TYPE_16X16;
2727                     s->mb_intra= 0;
2728                     s->mv[0][0][0] = 0;
2729                     s->mv[0][0][1] = 0;
2730                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2731                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2732                 }
2733                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2734                     s->mv_dir = MV_DIR_FORWARD;
2735                     s->mv_type = MV_TYPE_8X8;
2736                     s->mb_intra= 0;
2737                     for(i=0; i<4; i++){
2738                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2739                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2740                     }
2741                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2742                                  &dmin, &next_block, 0, 0);
2743                 }
2744                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2745                     s->mv_dir = MV_DIR_FORWARD;
2746                     s->mv_type = MV_TYPE_16X16;
2747                     s->mb_intra= 0;
2748                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2749                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2750                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2751                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2752                 }
2753                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2754                     s->mv_dir = MV_DIR_BACKWARD;
2755                     s->mv_type = MV_TYPE_16X16;
2756                     s->mb_intra= 0;
2757                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2758                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2759                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2760                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2761                 }
2762                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2763                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2764                     s->mv_type = MV_TYPE_16X16;
2765                     s->mb_intra= 0;
2766                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2767                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2768                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2769                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2770                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2771                                  &dmin, &next_block, 0, 0);
2772                 }
2773                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2774                     s->mv_dir = MV_DIR_FORWARD;
2775                     s->mv_type = MV_TYPE_FIELD;
2776                     s->mb_intra= 0;
2777                     for(i=0; i<2; i++){
2778                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2779                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2780                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2781                     }
2782                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2783                                  &dmin, &next_block, 0, 0);
2784                 }
2785                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2786                     s->mv_dir = MV_DIR_BACKWARD;
2787                     s->mv_type = MV_TYPE_FIELD;
2788                     s->mb_intra= 0;
2789                     for(i=0; i<2; i++){
2790                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2791                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2792                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2793                     }
2794                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2795                                  &dmin, &next_block, 0, 0);
2796                 }
2797                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2798                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2799                     s->mv_type = MV_TYPE_FIELD;
2800                     s->mb_intra= 0;
2801                     for(dir=0; dir<2; dir++){
2802                         for(i=0; i<2; i++){
2803                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2804                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2805                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2806                         }
2807                     }
2808                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2809                                  &dmin, &next_block, 0, 0);
2810                 }
2811                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2812                     s->mv_dir = 0;
2813                     s->mv_type = MV_TYPE_16X16;
2814                     s->mb_intra= 1;
2815                     s->mv[0][0][0] = 0;
2816                     s->mv[0][0][1] = 0;
2817                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2818                                  &dmin, &next_block, 0, 0);
2819                     if(s->h263_pred || s->h263_aic){
2820                         if(best_s.mb_intra)
2821                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2822                         else
2823                             ff_clean_intra_table_entries(s); //old mode?
2824                     }
2825                 }
2826
2827                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2828                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2829                         const int last_qp= backup_s.qscale;
2830                         int qpi, qp, dc[6];
2831                         int16_t ac[6][16];
2832                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2833                         static const int dquant_tab[4]={-1,1,-2,2};
2834
2835                         assert(backup_s.dquant == 0);
2836
2837                         //FIXME intra
2838                         s->mv_dir= best_s.mv_dir;
2839                         s->mv_type = MV_TYPE_16X16;
2840                         s->mb_intra= best_s.mb_intra;
2841                         s->mv[0][0][0] = best_s.mv[0][0][0];
2842                         s->mv[0][0][1] = best_s.mv[0][0][1];
2843                         s->mv[1][0][0] = best_s.mv[1][0][0];
2844                         s->mv[1][0][1] = best_s.mv[1][0][1];
2845
2846                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2847                         for(; qpi<4; qpi++){
2848                             int dquant= dquant_tab[qpi];
2849                             qp= last_qp + dquant;
2850                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2851                                 continue;
2852                             backup_s.dquant= dquant;
2853                             if(s->mb_intra && s->dc_val[0]){
2854                                 for(i=0; i<6; i++){
2855                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2856                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2857                                 }
2858                             }
2859
2860                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2861                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2862                             if(best_s.qscale != qp){
2863                                 if(s->mb_intra && s->dc_val[0]){
2864                                     for(i=0; i<6; i++){
2865                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2866                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2867                                     }
2868                                 }
2869                             }
2870                         }
2871                     }
2872                 }
2873                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2874                     int mx= s->b_direct_mv_table[xy][0];
2875                     int my= s->b_direct_mv_table[xy][1];
2876
2877                     backup_s.dquant = 0;
2878                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2879                     s->mb_intra= 0;
2880                     ff_mpeg4_set_direct_mv(s, mx, my);
2881                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2882                                  &dmin, &next_block, mx, my);
2883                 }
2884                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2885                     backup_s.dquant = 0;
2886                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2887                     s->mb_intra= 0;
2888                     ff_mpeg4_set_direct_mv(s, 0, 0);
2889                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2890                                  &dmin, &next_block, 0, 0);
2891                 }
2892                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2893                     int coded=0;
2894                     for(i=0; i<6; i++)
2895                         coded |= s->block_last_index[i];
2896                     if(coded){
2897                         int mx,my;
2898                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2899                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2900                             mx=my=0; //FIXME find the one we actually used
2901                             ff_mpeg4_set_direct_mv(s, mx, my);
2902                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2903                             mx= s->mv[1][0][0];
2904                             my= s->mv[1][0][1];
2905                         }else{
2906                             mx= s->mv[0][0][0];
2907                             my= s->mv[0][0][1];
2908                         }
2909
2910                         s->mv_dir= best_s.mv_dir;
2911                         s->mv_type = best_s.mv_type;
2912                         s->mb_intra= 0;
2913 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2914                         s->mv[0][0][1] = best_s.mv[0][0][1];
2915                         s->mv[1][0][0] = best_s.mv[1][0][0];
2916                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2917                         backup_s.dquant= 0;
2918                         s->skipdct=1;
2919                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2920                                         &dmin, &next_block, mx, my);
2921                         s->skipdct=0;
2922                     }
2923                 }
2924
2925                 s->current_picture.qscale_table[xy] = best_s.qscale;
2926
2927                 copy_context_after_encode(s, &best_s, -1);
2928
2929                 pb_bits_count= put_bits_count(&s->pb);
2930                 flush_put_bits(&s->pb);
2931                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2932                 s->pb= backup_s.pb;
2933
2934                 if(s->data_partitioning){
2935                     pb2_bits_count= put_bits_count(&s->pb2);
2936                     flush_put_bits(&s->pb2);
2937                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2938                     s->pb2= backup_s.pb2;
2939
2940                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2941                     flush_put_bits(&s->tex_pb);
2942                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2943                     s->tex_pb= backup_s.tex_pb;
2944                 }
2945                 s->last_bits= put_bits_count(&s->pb);
2946
2947                 if (CONFIG_H263_ENCODER &&
2948                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2949                     ff_h263_update_motion_val(s);
2950
2951                 if(next_block==0){ //FIXME 16 vs linesize16
2952                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2953                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2954                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2955                 }
2956
2957                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2958                     ff_MPV_decode_mb(s, s->block);
2959             } else {
2960                 int motion_x = 0, motion_y = 0;
2961                 s->mv_type=MV_TYPE_16X16;
2962                 // only one MB-Type possible
2963
2964                 switch(mb_type){
2965                 case CANDIDATE_MB_TYPE_INTRA:
2966                     s->mv_dir = 0;
2967                     s->mb_intra= 1;
2968                     motion_x= s->mv[0][0][0] = 0;
2969                     motion_y= s->mv[0][0][1] = 0;
2970                     break;
2971                 case CANDIDATE_MB_TYPE_INTER:
2972                     s->mv_dir = MV_DIR_FORWARD;
2973                     s->mb_intra= 0;
2974                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2975                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2976                     break;
2977                 case CANDIDATE_MB_TYPE_INTER_I:
2978                     s->mv_dir = MV_DIR_FORWARD;
2979                     s->mv_type = MV_TYPE_FIELD;
2980                     s->mb_intra= 0;
2981                     for(i=0; i<2; i++){
2982                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2983                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2984                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2985                     }
2986                     break;
2987                 case CANDIDATE_MB_TYPE_INTER4V:
2988                     s->mv_dir = MV_DIR_FORWARD;
2989                     s->mv_type = MV_TYPE_8X8;
2990                     s->mb_intra= 0;
2991                     for(i=0; i<4; i++){
2992                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2993                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2994                     }
2995                     break;
2996                 case CANDIDATE_MB_TYPE_DIRECT:
2997                     if (CONFIG_MPEG4_ENCODER) {
2998                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2999                         s->mb_intra= 0;
3000                         motion_x=s->b_direct_mv_table[xy][0];
3001                         motion_y=s->b_direct_mv_table[xy][1];
3002                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3003                     }
3004                     break;
3005                 case CANDIDATE_MB_TYPE_DIRECT0:
3006                     if (CONFIG_MPEG4_ENCODER) {
3007                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3008                         s->mb_intra= 0;
3009                         ff_mpeg4_set_direct_mv(s, 0, 0);
3010                     }
3011                     break;
3012                 case CANDIDATE_MB_TYPE_BIDIR:
3013                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3014                     s->mb_intra= 0;
3015                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3016                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3017                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3018                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3019                     break;
3020                 case CANDIDATE_MB_TYPE_BACKWARD:
3021                     s->mv_dir = MV_DIR_BACKWARD;
3022                     s->mb_intra= 0;
3023                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3024                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3025                     break;
3026                 case CANDIDATE_MB_TYPE_FORWARD:
3027                     s->mv_dir = MV_DIR_FORWARD;
3028                     s->mb_intra= 0;
3029                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3030                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3031                     break;
3032                 case CANDIDATE_MB_TYPE_FORWARD_I:
3033                     s->mv_dir = MV_DIR_FORWARD;
3034                     s->mv_type = MV_TYPE_FIELD;
3035                     s->mb_intra= 0;
3036                     for(i=0; i<2; i++){
3037                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3038                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3039                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3040                     }
3041                     break;
3042                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3043                     s->mv_dir = MV_DIR_BACKWARD;
3044                     s->mv_type = MV_TYPE_FIELD;
3045                     s->mb_intra= 0;
3046                     for(i=0; i<2; i++){
3047                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3048                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3049                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3050                     }
3051                     break;
3052                 case CANDIDATE_MB_TYPE_BIDIR_I:
3053                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3054                     s->mv_type = MV_TYPE_FIELD;
3055                     s->mb_intra= 0;
3056                     for(dir=0; dir<2; dir++){
3057                         for(i=0; i<2; i++){
3058                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3059                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3060                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3061                         }
3062                     }
3063                     break;
3064                 default:
3065                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3066                 }
3067
3068                 encode_mb(s, motion_x, motion_y);
3069
3070                 // RAL: Update last macroblock type
3071                 s->last_mv_dir = s->mv_dir;
3072
3073                 if (CONFIG_H263_ENCODER &&
3074                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3075                     ff_h263_update_motion_val(s);
3076
3077                 ff_MPV_decode_mb(s, s->block);
3078             }
3079
3080             /* clean the MV table in IPS frames for direct mode in B frames */
3081             if(s->mb_intra /* && I,P,S_TYPE */){
3082                 s->p_mv_table[xy][0]=0;
3083                 s->p_mv_table[xy][1]=0;
3084             }
3085
3086             if(s->flags&CODEC_FLAG_PSNR){
3087                 int w= 16;
3088                 int h= 16;
3089
3090                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3091                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3092
3093                 s->current_picture.f->error[0] += sse(
3094                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3095                     s->dest[0], w, h, s->linesize);
3096                 s->current_picture.f->error[1] += sse(
3097                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3098                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3099                 s->current_picture.f->error[2] += sse(
3100                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3101                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3102             }
3103             if(s->loop_filter){
3104                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3105                     ff_h263_loop_filter(s);
3106             }
3107             av_dlog(s->avctx, "MB %d %d bits\n",
3108                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3109         }
3110     }
3111
3112     //not beautiful here but we must write it before flushing so it has to be here
3113     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3114         ff_msmpeg4_encode_ext_header(s);
3115
3116     write_slice_end(s);
3117
3118     /* Send the last GOB if RTP */
3119     if (s->avctx->rtp_callback) {
3120         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3121         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3122         /* Call the RTP callback to send the last GOB */
3123         emms_c();
3124         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3125     }
3126
3127     return 0;
3128 }
3129
3130 #define MERGE(field) dst->field += src->field; src->field=0
3131 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3132     MERGE(me.scene_change_score);
3133     MERGE(me.mc_mb_var_sum_temp);
3134     MERGE(me.mb_var_sum_temp);
3135 }
3136
3137 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3138     int i;
3139
3140     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3141     MERGE(dct_count[1]);
3142     MERGE(mv_bits);
3143     MERGE(i_tex_bits);
3144     MERGE(p_tex_bits);
3145     MERGE(i_count);
3146     MERGE(f_count);
3147     MERGE(b_count);
3148     MERGE(skip_count);
3149     MERGE(misc_bits);
3150     MERGE(er.error_count);
3151     MERGE(padding_bug_score);
3152     MERGE(current_picture.f->error[0]);
3153     MERGE(current_picture.f->error[1]);
3154     MERGE(current_picture.f->error[2]);
3155
3156     if(dst->avctx->noise_reduction){
3157         for(i=0; i<64; i++){
3158             MERGE(dct_error_sum[0][i]);
3159             MERGE(dct_error_sum[1][i]);
3160         }
3161     }
3162
3163     assert(put_bits_count(&src->pb) % 8 ==0);
3164     assert(put_bits_count(&dst->pb) % 8 ==0);
3165     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3166     flush_put_bits(&dst->pb);
3167 }
3168
3169 static int estimate_qp(MpegEncContext *s, int dry_run){
3170     if (s->next_lambda){
3171         s->current_picture_ptr->f->quality =
3172         s->current_picture.f->quality = s->next_lambda;
3173         if(!dry_run) s->next_lambda= 0;
3174     } else if (!s->fixed_qscale) {
3175         s->current_picture_ptr->f->quality =
3176         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3177         if (s->current_picture.f->quality < 0)
3178             return -1;
3179     }
3180
3181     if(s->adaptive_quant){
3182         switch(s->codec_id){
3183         case AV_CODEC_ID_MPEG4:
3184             if (CONFIG_MPEG4_ENCODER)
3185                 ff_clean_mpeg4_qscales(s);
3186             break;
3187         case AV_CODEC_ID_H263:
3188         case AV_CODEC_ID_H263P:
3189         case AV_CODEC_ID_FLV1:
3190             if (CONFIG_H263_ENCODER)
3191                 ff_clean_h263_qscales(s);
3192             break;
3193         default:
3194             ff_init_qscale_tab(s);
3195         }
3196
3197         s->lambda= s->lambda_table[0];
3198         //FIXME broken
3199     }else
3200         s->lambda = s->current_picture.f->quality;
3201     update_qscale(s);
3202     return 0;
3203 }
3204
3205 /* must be called before writing the header */
3206 static void set_frame_distances(MpegEncContext * s){
3207     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3208     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3209
3210     if(s->pict_type==AV_PICTURE_TYPE_B){
3211         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3212         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3213     }else{
3214         s->pp_time= s->time - s->last_non_b_time;
3215         s->last_non_b_time= s->time;
3216         assert(s->picture_number==0 || s->pp_time > 0);
3217     }
3218 }
3219
3220 static int encode_picture(MpegEncContext *s, int picture_number)
3221 {
3222     int i, ret;
3223     int bits;
3224     int context_count = s->slice_context_count;
3225
3226     s->picture_number = picture_number;
3227
3228     /* Reset the average MB variance */
3229     s->me.mb_var_sum_temp    =
3230     s->me.mc_mb_var_sum_temp = 0;
3231
3232     /* we need to initialize some time vars before we can encode b-frames */
3233     // RAL: Condition added for MPEG1VIDEO
3234     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3235         set_frame_distances(s);
3236     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3237         ff_set_mpeg4_time(s);
3238
3239     s->me.scene_change_score=0;
3240
3241 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3242
3243     if(s->pict_type==AV_PICTURE_TYPE_I){
3244         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3245         else                        s->no_rounding=0;
3246     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3247         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3248             s->no_rounding ^= 1;
3249     }
3250
3251     if(s->flags & CODEC_FLAG_PASS2){
3252         if (estimate_qp(s,1) < 0)
3253             return -1;
3254         ff_get_2pass_fcode(s);
3255     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3256         if(s->pict_type==AV_PICTURE_TYPE_B)
3257             s->lambda= s->last_lambda_for[s->pict_type];
3258         else
3259             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3260         update_qscale(s);
3261     }
3262
3263     s->mb_intra=0; //for the rate distortion & bit compare functions
3264     for(i=1; i<context_count; i++){
3265         ret = ff_update_duplicate_context(s->thread_context[i], s);
3266         if (ret < 0)
3267             return ret;
3268     }
3269
3270     if(ff_init_me(s)<0)
3271         return -1;
3272
3273     /* Estimate motion for every MB */
3274     if(s->pict_type != AV_PICTURE_TYPE_I){
3275         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3276         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3277         if (s->pict_type != AV_PICTURE_TYPE_B) {
3278             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3279                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3280             }
3281         }
3282
3283         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3284     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3285         /* I-Frame */
3286         for(i=0; i<s->mb_stride*s->mb_height; i++)
3287             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3288
3289         if(!s->fixed_qscale){
3290             /* finding spatial complexity for I-frame rate control */
3291             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3292         }
3293     }
3294     for(i=1; i<context_count; i++){
3295         merge_context_after_me(s, s->thread_context[i]);
3296     }
3297     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3298     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3299     emms_c();
3300
3301     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3302         s->pict_type= AV_PICTURE_TYPE_I;
3303         for(i=0; i<s->mb_stride*s->mb_height; i++)
3304             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3305         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3306                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3307     }
3308
3309     if(!s->umvplus){
3310         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3311             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3312
3313             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3314                 int a,b;
3315                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3316                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3317                 s->f_code= FFMAX3(s->f_code, a, b);
3318             }
3319
3320             ff_fix_long_p_mvs(s);
3321             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3322             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3323                 int j;
3324                 for(i=0; i<2; i++){
3325                     for(j=0; j<2; j++)
3326                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3327                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3328                 }
3329             }
3330         }
3331
3332         if(s->pict_type==AV_PICTURE_TYPE_B){
3333             int a, b;
3334
3335             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3336             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3337             s->f_code = FFMAX(a, b);
3338
3339             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3340             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3341             s->b_code = FFMAX(a, b);
3342
3343             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3344             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3345             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3346             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3347             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3348                 int dir, j;
3349                 for(dir=0; dir<2; dir++){
3350                     for(i=0; i<2; i++){
3351                         for(j=0; j<2; j++){
3352                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3353                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3354                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3355                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3356                         }
3357                     }
3358                 }
3359             }
3360         }
3361     }
3362
3363     if (estimate_qp(s, 0) < 0)
3364         return -1;
3365
3366     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3367         s->qscale= 3; //reduce clipping problems
3368
3369     if (s->out_format == FMT_MJPEG) {
3370         /* for mjpeg, we do include qscale in the matrix */
3371         for(i=1;i<64;i++){
3372             int j = s->idsp.idct_permutation[i];
3373
3374             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3375         }
3376         s->y_dc_scale_table=
3377         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3378         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3379         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3380                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3381         s->qscale= 8;
3382     }
3383
3384     //FIXME var duplication
3385     s->current_picture_ptr->f->key_frame =
3386     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3387     s->current_picture_ptr->f->pict_type =
3388     s->current_picture.f->pict_type = s->pict_type;
3389
3390     if (s->current_picture.f->key_frame)
3391         s->picture_in_gop_number=0;
3392
3393     s->last_bits= put_bits_count(&s->pb);
3394     switch(s->out_format) {
3395     case FMT_MJPEG:
3396         if (CONFIG_MJPEG_ENCODER)
3397             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3398                                            s->intra_matrix);
3399         break;
3400     case FMT_H261:
3401         if (CONFIG_H261_ENCODER)
3402             ff_h261_encode_picture_header(s, picture_number);
3403         break;
3404     case FMT_H263:
3405         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3406             ff_wmv2_encode_picture_header(s, picture_number);
3407         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3408             ff_msmpeg4_encode_picture_header(s, picture_number);
3409         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3410             ff_mpeg4_encode_picture_header(s, picture_number);
3411         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3412             ff_rv10_encode_picture_header(s, picture_number);
3413         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3414             ff_rv20_encode_picture_header(s, picture_number);
3415         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3416             ff_flv_encode_picture_header(s, picture_number);
3417         else if (CONFIG_H263_ENCODER)
3418             ff_h263_encode_picture_header(s, picture_number);
3419         break;
3420     case FMT_MPEG1:
3421         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3422             ff_mpeg1_encode_picture_header(s, picture_number);
3423         break;
3424     default:
3425         assert(0);
3426     }
3427     bits= put_bits_count(&s->pb);
3428     s->header_bits= bits - s->last_bits;
3429
3430     for(i=1; i<context_count; i++){
3431         update_duplicate_context_after_me(s->thread_context[i], s);
3432     }
3433     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3434     for(i=1; i<context_count; i++){
3435         merge_context_after_encode(s, s->thread_context[i]);
3436     }
3437     emms_c();
3438     return 0;
3439 }
3440
3441 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3442     const int intra= s->mb_intra;
3443     int i;
3444
3445     s->dct_count[intra]++;
3446
3447     for(i=0; i<64; i++){
3448         int level= block[i];
3449
3450         if(level){
3451             if(level>0){
3452                 s->dct_error_sum[intra][i] += level;
3453                 level -= s->dct_offset[intra][i];
3454                 if(level<0) level=0;
3455             }else{
3456                 s->dct_error_sum[intra][i] -= level;
3457                 level += s->dct_offset[intra][i];
3458                 if(level>0) level=0;
3459             }
3460             block[i]= level;
3461         }
3462     }
3463 }
3464
3465 static int dct_quantize_trellis_c(MpegEncContext *s,
3466                                   int16_t *block, int n,
3467                                   int qscale, int *overflow){
3468     const int *qmat;
3469     const uint8_t *scantable= s->intra_scantable.scantable;
3470     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3471     int max=0;
3472     unsigned int threshold1, threshold2;
3473     int bias=0;
3474     int run_tab[65];
3475     int level_tab[65];
3476     int score_tab[65];
3477     int survivor[65];
3478     int survivor_count;
3479     int last_run=0;
3480     int last_level=0;
3481     int last_score= 0;
3482     int last_i;
3483     int coeff[2][64];
3484     int coeff_count[64];
3485     int qmul, qadd, start_i, last_non_zero, i, dc;
3486     const int esc_length= s->ac_esc_length;
3487     uint8_t * length;
3488     uint8_t * last_length;
3489     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3490
3491     s->fdsp.fdct(block);
3492
3493     if(s->dct_error_sum)
3494         s->denoise_dct(s, block);
3495     qmul= qscale*16;
3496     qadd= ((qscale-1)|1)*8;
3497
3498     if (s->mb_intra) {
3499         int q;
3500         if (!s->h263_aic) {
3501             if (n < 4)
3502                 q = s->y_dc_scale;
3503             else
3504                 q = s->c_dc_scale;
3505             q = q << 3;
3506         } else{
3507             /* For AIC we skip quant/dequant of INTRADC */
3508             q = 1 << 3;
3509             qadd=0;
3510         }
3511
3512         /* note: block[0] is assumed to be positive */
3513         block[0] = (block[0] + (q >> 1)) / q;
3514         start_i = 1;
3515         last_non_zero = 0;
3516         qmat = s->q_intra_matrix[qscale];
3517         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3518             bias= 1<<(QMAT_SHIFT-1);
3519         length     = s->intra_ac_vlc_length;
3520         last_length= s->intra_ac_vlc_last_length;
3521     } else {
3522         start_i = 0;
3523         last_non_zero = -1;
3524         qmat = s->q_inter_matrix[qscale];
3525         length     = s->inter_ac_vlc_length;
3526         last_length= s->inter_ac_vlc_last_length;
3527     }
3528     last_i= start_i;
3529
3530     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3531     threshold2= (threshold1<<1);
3532
3533     for(i=63; i>=start_i; i--) {
3534         const int j = scantable[i];
3535         int level = block[j] * qmat[j];
3536
3537         if(((unsigned)(level+threshold1))>threshold2){
3538             last_non_zero = i;
3539             break;
3540         }
3541     }
3542
3543     for(i=start_i; i<=last_non_zero; i++) {
3544         const int j = scantable[i];
3545         int level = block[j] * qmat[j];
3546
3547 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3548 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3549         if(((unsigned)(level+threshold1))>threshold2){
3550             if(level>0){
3551                 level= (bias + level)>>QMAT_SHIFT;
3552                 coeff[0][i]= level;
3553                 coeff[1][i]= level-1;
3554 //                coeff[2][k]= level-2;
3555             }else{
3556                 level= (bias - level)>>QMAT_SHIFT;
3557                 coeff[0][i]= -level;
3558                 coeff[1][i]= -level+1;
3559 //                coeff[2][k]= -level+2;
3560             }
3561             coeff_count[i]= FFMIN(level, 2);
3562             assert(coeff_count[i]);
3563             max |=level;
3564         }else{
3565             coeff[0][i]= (level>>31)|1;
3566             coeff_count[i]= 1;
3567         }
3568     }
3569
3570     *overflow= s->max_qcoeff < max; //overflow might have happened
3571
3572     if(last_non_zero < start_i){
3573         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3574         return last_non_zero;
3575     }
3576
3577     score_tab[start_i]= 0;
3578     survivor[0]= start_i;
3579     survivor_count= 1;
3580
3581     for(i=start_i; i<=last_non_zero; i++){
3582         int level_index, j, zero_distortion;
3583         int dct_coeff= FFABS(block[ scantable[i] ]);
3584         int best_score=256*256*256*120;
3585
3586         if (s->fdsp.fdct == ff_fdct_ifast)
3587             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3588         zero_distortion= dct_coeff*dct_coeff;
3589
3590         for(level_index=0; level_index < coeff_count[i]; level_index++){
3591             int distortion;
3592             int level= coeff[level_index][i];
3593             const int alevel= FFABS(level);
3594             int unquant_coeff;
3595
3596             assert(level);
3597
3598             if(s->out_format == FMT_H263){
3599                 unquant_coeff= alevel*qmul + qadd;
3600             }else{ //MPEG1
3601                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3602                 if(s->mb_intra){
3603                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3604                         unquant_coeff =   (unquant_coeff - 1) | 1;
3605                 }else{
3606                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3607                         unquant_coeff =   (unquant_coeff - 1) | 1;
3608                 }
3609                 unquant_coeff<<= 3;
3610             }
3611
3612             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3613             level+=64;
3614             if((level&(~127)) == 0){
3615                 for(j=survivor_count-1; j>=0; j--){
3616                     int run= i - survivor[j];
3617                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3618                     score += score_tab[i-run];
3619
3620                     if(score < best_score){
3621                         best_score= score;
3622                         run_tab[i+1]= run;
3623                         level_tab[i+1]= level-64;
3624                     }
3625                 }
3626
3627                 if(s->out_format == FMT_H263){
3628                     for(j=survivor_count-1; j>=0; j--){
3629                         int run= i - survivor[j];
3630                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3631                         score += score_tab[i-run];
3632                         if(score < last_score){
3633                             last_score= score;
3634                             last_run= run;
3635                             last_level= level-64;
3636                             last_i= i+1;
3637                         }
3638                     }
3639                 }
3640             }else{
3641                 distortion += esc_length*lambda;
3642                 for(j=survivor_count-1; j>=0; j--){
3643                     int run= i - survivor[j];
3644                     int score= distortion + score_tab[i-run];
3645
3646                     if(score < best_score){
3647                         best_score= score;
3648                         run_tab[i+1]= run;
3649                         level_tab[i+1]= level-64;
3650                     }
3651                 }
3652
3653                 if(s->out_format == FMT_H263){
3654                   for(j=survivor_count-1; j>=0; j--){
3655                         int run= i - survivor[j];
3656                         int score= distortion + score_tab[i-run];
3657                         if(score < last_score){
3658                             last_score= score;
3659                             last_run= run;
3660                             last_level= level-64;
3661                             last_i= i+1;
3662                         }
3663                     }
3664                 }
3665             }
3666         }
3667
3668         score_tab[i+1]= best_score;
3669
3670         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3671         if(last_non_zero <= 27){
3672             for(; survivor_count; survivor_count--){
3673                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3674                     break;
3675             }
3676         }else{
3677             for(; survivor_count; survivor_count--){
3678                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3679                     break;
3680             }
3681         }
3682
3683         survivor[ survivor_count++ ]= i+1;
3684     }
3685
3686     if(s->out_format != FMT_H263){
3687         last_score= 256*256*256*120;
3688         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3689             int score= score_tab[i];
3690             if(i) score += lambda*2; //FIXME exacter?
3691
3692             if(score < last_score){
3693                 last_score= score;
3694                 last_i= i;
3695                 last_level= level_tab[i];
3696                 last_run= run_tab[i];
3697             }
3698         }
3699     }
3700
3701     s->coded_score[n] = last_score;
3702
3703     dc= FFABS(block[0]);
3704     last_non_zero= last_i - 1;
3705     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3706
3707     if(last_non_zero < start_i)
3708         return last_non_zero;
3709
3710     if(last_non_zero == 0 && start_i == 0){
3711         int best_level= 0;
3712         int best_score= dc * dc;
3713
3714         for(i=0; i<coeff_count[0]; i++){
3715             int level= coeff[i][0];
3716             int alevel= FFABS(level);
3717             int unquant_coeff, score, distortion;
3718
3719             if(s->out_format == FMT_H263){
3720                     unquant_coeff= (alevel*qmul + qadd)>>3;
3721             }else{ //MPEG1
3722                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3723                     unquant_coeff =   (unquant_coeff - 1) | 1;
3724             }
3725             unquant_coeff = (unquant_coeff + 4) >> 3;
3726             unquant_coeff<<= 3 + 3;
3727
3728             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3729             level+=64;
3730             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3731             else                    score= distortion + esc_length*lambda;
3732
3733             if(score < best_score){
3734                 best_score= score;
3735                 best_level= level - 64;
3736             }
3737         }
3738         block[0]= best_level;
3739         s->coded_score[n] = best_score - dc*dc;
3740         if(best_level == 0) return -1;
3741         else                return last_non_zero;
3742     }
3743
3744     i= last_i;
3745     assert(last_level);
3746
3747     block[ perm_scantable[last_non_zero] ]= last_level;
3748     i -= last_run + 1;
3749
3750     for(; i>start_i; i -= run_tab[i] + 1){
3751         block[ perm_scantable[i-1] ]= level_tab[i];
3752     }
3753
3754     return last_non_zero;
3755 }
3756
3757 //#define REFINE_STATS 1
3758 static int16_t basis[64][64];
3759
3760 static void build_basis(uint8_t *perm){
3761     int i, j, x, y;
3762     emms_c();
3763     for(i=0; i<8; i++){
3764         for(j=0; j<8; j++){
3765             for(y=0; y<8; y++){
3766                 for(x=0; x<8; x++){
3767                     double s= 0.25*(1<<BASIS_SHIFT);
3768                     int index= 8*i + j;
3769                     int perm_index= perm[index];
3770                     if(i==0) s*= sqrt(0.5);
3771                     if(j==0) s*= sqrt(0.5);
3772                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3773                 }
3774             }
3775         }
3776     }
3777 }
3778
3779 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3780                         int16_t *block, int16_t *weight, int16_t *orig,
3781                         int n, int qscale){
3782     int16_t rem[64];
3783     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3784     const uint8_t *scantable= s->intra_scantable.scantable;
3785     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3786 //    unsigned int threshold1, threshold2;
3787 //    int bias=0;
3788     int run_tab[65];
3789     int prev_run=0;
3790     int prev_level=0;
3791     int qmul, qadd, start_i, last_non_zero, i, dc;
3792     uint8_t * length;
3793     uint8_t * last_length;
3794     int lambda;
3795     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3796 #ifdef REFINE_STATS
3797 static int count=0;
3798 static int after_last=0;
3799 static int to_zero=0;
3800 static int from_zero=0;
3801 static int raise=0;
3802 static int lower=0;
3803 static int messed_sign=0;
3804 #endif
3805
3806     if(basis[0][0] == 0)
3807         build_basis(s->idsp.idct_permutation);
3808
3809     qmul= qscale*2;
3810     qadd= (qscale-1)|1;
3811     if (s->mb_intra) {
3812         if (!s->h263_aic) {
3813             if (n < 4)
3814                 q = s->y_dc_scale;
3815             else
3816                 q = s->c_dc_scale;
3817         } else{
3818             /* For AIC we skip quant/dequant of INTRADC */
3819             q = 1;
3820             qadd=0;
3821         }
3822         q <<= RECON_SHIFT-3;
3823         /* note: block[0] is assumed to be positive */
3824         dc= block[0]*q;
3825 //        block[0] = (block[0] + (q >> 1)) / q;
3826         start_i = 1;
3827 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3828 //            bias= 1<<(QMAT_SHIFT-1);
3829         length     = s->intra_ac_vlc_length;
3830         last_length= s->intra_ac_vlc_last_length;
3831     } else {
3832         dc= 0;
3833         start_i = 0;
3834         length     = s->inter_ac_vlc_length;
3835         last_length= s->inter_ac_vlc_last_length;
3836     }
3837     last_non_zero = s->block_last_index[n];
3838
3839 #ifdef REFINE_STATS
3840 {START_TIMER
3841 #endif
3842     dc += (1<<(RECON_SHIFT-1));
3843     for(i=0; i<64; i++){
3844         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3845     }
3846 #ifdef REFINE_STATS
3847 STOP_TIMER("memset rem[]")}
3848 #endif
3849     sum=0;
3850     for(i=0; i<64; i++){
3851         int one= 36;
3852         int qns=4;
3853         int w;
3854
3855         w= FFABS(weight[i]) + qns*one;
3856         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3857
3858         weight[i] = w;
3859 //        w=weight[i] = (63*qns + (w/2)) / w;
3860
3861         assert(w>0);
3862         assert(w<(1<<6));
3863         sum += w*w;
3864     }
3865     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3866 #ifdef REFINE_STATS
3867 {START_TIMER
3868 #endif
3869     run=0;
3870     rle_index=0;
3871     for(i=start_i; i<=last_non_zero; i++){
3872         int j= perm_scantable[i];
3873         const int level= block[j];
3874         int coeff;
3875
3876         if(level){
3877             if(level<0) coeff= qmul*level - qadd;
3878             else        coeff= qmul*level + qadd;
3879             run_tab[rle_index++]=run;
3880             run=0;
3881
3882             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
3883         }else{
3884             run++;
3885         }
3886     }
3887 #ifdef REFINE_STATS
3888 if(last_non_zero>0){
3889 STOP_TIMER("init rem[]")
3890 }
3891 }
3892
3893 {START_TIMER
3894 #endif
3895     for(;;){
3896         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
3897         int best_coeff=0;
3898         int best_change=0;
3899         int run2, best_unquant_change=0, analyze_gradient;
3900 #ifdef REFINE_STATS
3901 {START_TIMER
3902 #endif
3903         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3904
3905         if(analyze_gradient){
3906 #ifdef REFINE_STATS
3907 {START_TIMER
3908 #endif
3909             for(i=0; i<64; i++){
3910                 int w= weight[i];
3911
3912                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3913             }
3914 #ifdef REFINE_STATS
3915 STOP_TIMER("rem*w*w")}
3916 {START_TIMER
3917 #endif
3918             s->fdsp.fdct(d1);
3919 #ifdef REFINE_STATS
3920 STOP_TIMER("dct")}
3921 #endif
3922         }
3923
3924         if(start_i){
3925             const int level= block[0];
3926             int change, old_coeff;
3927
3928             assert(s->mb_intra);
3929
3930             old_coeff= q*level;
3931
3932             for(change=-1; change<=1; change+=2){
3933                 int new_level= level + change;
3934                 int score, new_coeff;
3935
3936                 new_coeff= q*new_level;
3937                 if(new_coeff >= 2048 || new_coeff < 0)
3938                     continue;
3939
3940                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
3941                                                   new_coeff - old_coeff);
3942                 if(score<best_score){
3943                     best_score= score;
3944                     best_coeff= 0;
3945                     best_change= change;
3946                     best_unquant_change= new_coeff - old_coeff;
3947                 }
3948             }
3949         }
3950
3951         run=0;
3952         rle_index=0;
3953         run2= run_tab[rle_index++];
3954         prev_level=0;
3955         prev_run=0;
3956
3957         for(i=start_i; i<64; i++){
3958             int j= perm_scantable[i];
3959             const int level= block[j];
3960             int change, old_coeff;
3961
3962             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3963                 break;
3964
3965             if(level){
3966                 if(level<0) old_coeff= qmul*level - qadd;
3967                 else        old_coeff= qmul*level + qadd;
3968                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3969             }else{
3970                 old_coeff=0;
3971                 run2--;
3972                 assert(run2>=0 || i >= last_non_zero );
3973             }
3974
3975             for(change=-1; change<=1; change+=2){
3976                 int new_level= level + change;
3977                 int score, new_coeff, unquant_change;
3978
3979                 score=0;
3980                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3981                    continue;
3982
3983                 if(new_level){
3984                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3985                     else            new_coeff= qmul*new_level + qadd;
3986                     if(new_coeff >= 2048 || new_coeff <= -2048)
3987                         continue;
3988                     //FIXME check for overflow
3989
3990                     if(level){
3991                         if(level < 63 && level > -63){
3992                             if(i < last_non_zero)
3993                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3994                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3995                             else
3996                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3997                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3998                         }
3999                     }else{
4000                         assert(FFABS(new_level)==1);
4001
4002                         if(analyze_gradient){
4003                             int g= d1[ scantable[i] ];
4004                             if(g && (g^new_level) >= 0)
4005                                 continue;
4006                         }
4007
4008                         if(i < last_non_zero){
4009                             int next_i= i + run2 + 1;
4010                             int next_level= block[ perm_scantable[next_i] ] + 64;
4011
4012                             if(next_level&(~127))
4013                                 next_level= 0;
4014
4015                             if(next_i < last_non_zero)
4016                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4017                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4018                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4019                             else
4020                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4021                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4022                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4023                         }else{
4024                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4025                             if(prev_level){
4026                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4027                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4028                             }
4029                         }
4030                     }
4031                 }else{
4032                     new_coeff=0;
4033                     assert(FFABS(level)==1);
4034
4035                     if(i < last_non_zero){
4036                         int next_i= i + run2 + 1;
4037                         int next_level= block[ perm_scantable[next_i] ] + 64;
4038
4039                         if(next_level&(~127))
4040                             next_level= 0;
4041
4042                         if(next_i < last_non_zero)
4043                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4044                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4045                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4046                         else
4047                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4048                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4049                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4050                     }else{
4051                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4052                         if(prev_level){
4053                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4054                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4055                         }
4056                     }
4057                 }
4058
4059                 score *= lambda;
4060
4061                 unquant_change= new_coeff - old_coeff;
4062                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4063
4064                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4065                                                    unquant_change);
4066                 if(score<best_score){
4067                     best_score= score;
4068                     best_coeff= i;
4069                     best_change= change;
4070                     best_unquant_change= unquant_change;
4071                 }
4072             }
4073             if(level){
4074                 prev_level= level + 64;
4075                 if(prev_level&(~127))
4076                     prev_level= 0;
4077                 prev_run= run;
4078                 run=0;
4079             }else{
4080                 run++;
4081             }
4082         }
4083 #ifdef REFINE_STATS
4084 STOP_TIMER("iterative step")}
4085 #endif
4086
4087         if(best_change){
4088             int j= perm_scantable[ best_coeff ];
4089
4090             block[j] += best_change;
4091
4092             if(best_coeff > last_non_zero){
4093                 last_non_zero= best_coeff;
4094                 assert(block[j]);
4095 #ifdef REFINE_STATS
4096 after_last++;
4097 #endif
4098             }else{
4099 #ifdef REFINE_STATS
4100 if(block[j]){
4101     if(block[j] - best_change){
4102         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4103             raise++;
4104         }else{
4105             lower++;
4106         }
4107     }else{
4108         from_zero++;
4109     }
4110 }else{
4111     to_zero++;
4112 }
4113 #endif
4114                 for(; last_non_zero>=start_i; last_non_zero--){
4115                     if(block[perm_scantable[last_non_zero]])
4116                         break;
4117                 }
4118             }
4119 #ifdef REFINE_STATS
4120 count++;
4121 if(256*256*256*64 % count == 0){
4122     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4123 }
4124 #endif
4125             run=0;
4126             rle_index=0;
4127             for(i=start_i; i<=last_non_zero; i++){
4128                 int j= perm_scantable[i];
4129                 const int level= block[j];
4130
4131                  if(level){
4132                      run_tab[rle_index++]=run;
4133                      run=0;
4134                  }else{
4135                      run++;
4136                  }
4137             }
4138
4139             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4140         }else{
4141             break;
4142         }
4143     }
4144 #ifdef REFINE_STATS
4145 if(last_non_zero>0){
4146 STOP_TIMER("iterative search")
4147 }
4148 }
4149 #endif
4150
4151     return last_non_zero;
4152 }
4153
4154 int ff_dct_quantize_c(MpegEncContext *s,
4155                         int16_t *block, int n,
4156                         int qscale, int *overflow)
4157 {
4158     int i, j, level, last_non_zero, q, start_i;
4159     const int *qmat;
4160     const uint8_t *scantable= s->intra_scantable.scantable;
4161     int bias;
4162     int max=0;
4163     unsigned int threshold1, threshold2;
4164
4165     s->fdsp.fdct(block);
4166
4167     if(s->dct_error_sum)
4168         s->denoise_dct(s, block);
4169
4170     if (s->mb_intra) {
4171         if (!s->h263_aic) {
4172             if (n < 4)
4173                 q = s->y_dc_scale;
4174             else
4175                 q = s->c_dc_scale;
4176             q = q << 3;
4177         } else
4178             /* For AIC we skip quant/dequant of INTRADC */
4179             q = 1 << 3;
4180
4181         /* note: block[0] is assumed to be positive */
4182         block[0] = (block[0] + (q >> 1)) / q;
4183         start_i = 1;
4184         last_non_zero = 0;
4185         qmat = s->q_intra_matrix[qscale];
4186         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4187     } else {
4188         start_i = 0;
4189         last_non_zero = -1;
4190         qmat = s->q_inter_matrix[qscale];
4191         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4192     }
4193     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4194     threshold2= (threshold1<<1);
4195     for(i=63;i>=start_i;i--) {
4196         j = scantable[i];
4197         level = block[j] * qmat[j];
4198
4199         if(((unsigned)(level+threshold1))>threshold2){
4200             last_non_zero = i;
4201             break;
4202         }else{
4203             block[j]=0;
4204         }
4205     }
4206     for(i=start_i; i<=last_non_zero; i++) {
4207         j = scantable[i];
4208         level = block[j] * qmat[j];
4209
4210 //        if(   bias+level >= (1<<QMAT_SHIFT)
4211 //           || bias-level >= (1<<QMAT_SHIFT)){
4212         if(((unsigned)(level+threshold1))>threshold2){
4213             if(level>0){
4214                 level= (bias + level)>>QMAT_SHIFT;
4215                 block[j]= level;
4216             }else{
4217                 level= (bias - level)>>QMAT_SHIFT;
4218                 block[j]= -level;
4219             }
4220             max |=level;
4221         }else{
4222             block[j]=0;
4223         }
4224     }
4225     *overflow= s->max_qcoeff < max; //overflow might have happened
4226
4227     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4228     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4229         ff_block_permute(block, s->idsp.idct_permutation,
4230                          scantable, last_non_zero);
4231
4232     return last_non_zero;
4233 }
4234
4235 #define OFFSET(x) offsetof(MpegEncContext, x)
4236 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4237 static const AVOption h263_options[] = {
4238     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4239     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4240     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4241     FF_MPV_COMMON_OPTS
4242     { NULL },
4243 };
4244
4245 static const AVClass h263_class = {
4246     .class_name = "H.263 encoder",
4247     .item_name  = av_default_item_name,
4248     .option     = h263_options,
4249     .version    = LIBAVUTIL_VERSION_INT,
4250 };
4251
4252 AVCodec ff_h263_encoder = {
4253     .name           = "h263",
4254     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4255     .type           = AVMEDIA_TYPE_VIDEO,
4256     .id             = AV_CODEC_ID_H263,
4257     .priv_data_size = sizeof(MpegEncContext),
4258     .init           = ff_MPV_encode_init,
4259     .encode2        = ff_MPV_encode_picture,
4260     .close          = ff_MPV_encode_end,
4261     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4262     .priv_class     = &h263_class,
4263 };
4264
4265 static const AVOption h263p_options[] = {
4266     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4267     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4268     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4269     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4270     FF_MPV_COMMON_OPTS
4271     { NULL },
4272 };
4273 static const AVClass h263p_class = {
4274     .class_name = "H.263p encoder",
4275     .item_name  = av_default_item_name,
4276     .option     = h263p_options,
4277     .version    = LIBAVUTIL_VERSION_INT,
4278 };
4279
4280 AVCodec ff_h263p_encoder = {
4281     .name           = "h263p",
4282     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4283     .type           = AVMEDIA_TYPE_VIDEO,
4284     .id             = AV_CODEC_ID_H263P,
4285     .priv_data_size = sizeof(MpegEncContext),
4286     .init           = ff_MPV_encode_init,
4287     .encode2        = ff_MPV_encode_picture,
4288     .close          = ff_MPV_encode_end,
4289     .capabilities   = CODEC_CAP_SLICE_THREADS,
4290     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4291     .priv_class     = &h263p_class,
4292 };
4293
4294 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4295
4296 AVCodec ff_msmpeg4v2_encoder = {
4297     .name           = "msmpeg4v2",
4298     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4299     .type           = AVMEDIA_TYPE_VIDEO,
4300     .id             = AV_CODEC_ID_MSMPEG4V2,
4301     .priv_data_size = sizeof(MpegEncContext),
4302     .init           = ff_MPV_encode_init,
4303     .encode2        = ff_MPV_encode_picture,
4304     .close          = ff_MPV_encode_end,
4305     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4306     .priv_class     = &msmpeg4v2_class,
4307 };
4308
4309 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4310
4311 AVCodec ff_msmpeg4v3_encoder = {
4312     .name           = "msmpeg4",
4313     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4314     .type           = AVMEDIA_TYPE_VIDEO,
4315     .id             = AV_CODEC_ID_MSMPEG4V3,
4316     .priv_data_size = sizeof(MpegEncContext),
4317     .init           = ff_MPV_encode_init,
4318     .encode2        = ff_MPV_encode_picture,
4319     .close          = ff_MPV_encode_end,
4320     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4321     .priv_class     = &msmpeg4v3_class,
4322 };
4323
4324 FF_MPV_GENERIC_CLASS(wmv1)
4325
4326 AVCodec ff_wmv1_encoder = {
4327     .name           = "wmv1",
4328     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4329     .type           = AVMEDIA_TYPE_VIDEO,
4330     .id             = AV_CODEC_ID_WMV1,
4331     .priv_data_size = sizeof(MpegEncContext),
4332     .init           = ff_MPV_encode_init,
4333     .encode2        = ff_MPV_encode_picture,
4334     .close          = ff_MPV_encode_end,
4335     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4336     .priv_class     = &wmv1_class,
4337 };