]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
0cf7ed4d65a8b54eea99b1bd301905ac0f1b9491
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "dsputil.h"
41 #include "idctdsp.h"
42 #include "mpeg12.h"
43 #include "mpegvideo.h"
44 #include "h261.h"
45 #include "h263.h"
46 #include "mjpegenc_common.h"
47 #include "mathops.h"
48 #include "mpegutils.h"
49 #include "mjpegenc.h"
50 #include "msmpeg4.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60
61 static int encode_picture(MpegEncContext *s, int picture_number);
62 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
63 static int sse_mb(MpegEncContext *s);
64 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
65 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
66
67 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
68 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
69
70 const AVOption ff_mpv_generic_options[] = {
71     FF_MPV_COMMON_OPTS
72     { NULL },
73 };
74
75 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
76                        uint16_t (*qmat16)[2][64],
77                        const uint16_t *quant_matrix,
78                        int bias, int qmin, int qmax, int intra)
79 {
80     FDCTDSPContext *fdsp = &s->fdsp;
81     int qscale;
82     int shift = 0;
83
84     for (qscale = qmin; qscale <= qmax; qscale++) {
85         int i;
86         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
87             fdsp->fdct == ff_jpeg_fdct_islow_10 ||
88             fdsp->fdct == ff_faandct) {
89             for (i = 0; i < 64; i++) {
90                 const int j = s->idsp.idct_permutation[i];
91                 /* 16 <= qscale * quant_matrix[i] <= 7905
92                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
93                  *             19952 <=              x  <= 249205026
94                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
95                  *           3444240 >= (1 << 36) / (x) >= 275 */
96
97                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
98                                         (qscale * quant_matrix[j]));
99             }
100         } else if (fdsp->fdct == ff_fdct_ifast) {
101             for (i = 0; i < 64; i++) {
102                 const int j = s->idsp.idct_permutation[i];
103                 /* 16 <= qscale * quant_matrix[i] <= 7905
104                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
105                  *             19952 <=              x  <= 249205026
106                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
107                  *           3444240 >= (1 << 36) / (x) >= 275 */
108
109                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
110                                         (ff_aanscales[i] * qscale *
111                                          quant_matrix[j]));
112             }
113         } else {
114             for (i = 0; i < 64; i++) {
115                 const int j = s->idsp.idct_permutation[i];
116                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
117                  * Assume x = qscale * quant_matrix[i]
118                  * So             16 <=              x  <= 7905
119                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
120                  * so          32768 >= (1 << 19) / (x) >= 67 */
121                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
122                                         (qscale * quant_matrix[j]));
123                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
124                 //                    (qscale * quant_matrix[i]);
125                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
126                                        (qscale * quant_matrix[j]);
127
128                 if (qmat16[qscale][0][i] == 0 ||
129                     qmat16[qscale][0][i] == 128 * 256)
130                     qmat16[qscale][0][i] = 128 * 256 - 1;
131                 qmat16[qscale][1][i] =
132                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
133                                 qmat16[qscale][0][i]);
134             }
135         }
136
137         for (i = intra; i < 64; i++) {
138             int64_t max = 8191;
139             if (fdsp->fdct == ff_fdct_ifast) {
140                 max = (8191LL * ff_aanscales[i]) >> 14;
141             }
142             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
143                 shift++;
144             }
145         }
146     }
147     if (shift) {
148         av_log(NULL, AV_LOG_INFO,
149                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
150                QMAT_SHIFT - shift);
151     }
152 }
153
154 static inline void update_qscale(MpegEncContext *s)
155 {
156     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
157                 (FF_LAMBDA_SHIFT + 7);
158     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
159
160     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
161                  FF_LAMBDA_SHIFT;
162 }
163
164 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
165 {
166     int i;
167
168     if (matrix) {
169         put_bits(pb, 1, 1);
170         for (i = 0; i < 64; i++) {
171             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
172         }
173     } else
174         put_bits(pb, 1, 0);
175 }
176
177 /**
178  * init s->current_picture.qscale_table from s->lambda_table
179  */
180 void ff_init_qscale_tab(MpegEncContext *s)
181 {
182     int8_t * const qscale_table = s->current_picture.qscale_table;
183     int i;
184
185     for (i = 0; i < s->mb_num; i++) {
186         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
187         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
188         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
189                                                   s->avctx->qmax);
190     }
191 }
192
193 static void update_duplicate_context_after_me(MpegEncContext *dst,
194                                               MpegEncContext *src)
195 {
196 #define COPY(a) dst->a= src->a
197     COPY(pict_type);
198     COPY(current_picture);
199     COPY(f_code);
200     COPY(b_code);
201     COPY(qscale);
202     COPY(lambda);
203     COPY(lambda2);
204     COPY(picture_in_gop_number);
205     COPY(gop_picture_number);
206     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
207     COPY(progressive_frame);    // FIXME don't set in encode_header
208     COPY(partitioned_frame);    // FIXME don't set in encode_header
209 #undef COPY
210 }
211
212 /**
213  * Set the given MpegEncContext to defaults for encoding.
214  * the changed fields will not depend upon the prior state of the MpegEncContext.
215  */
216 static void MPV_encode_defaults(MpegEncContext *s)
217 {
218     int i;
219     ff_MPV_common_defaults(s);
220
221     for (i = -16; i < 16; i++) {
222         default_fcode_tab[i + MAX_MV] = 1;
223     }
224     s->me.mv_penalty = default_mv_penalty;
225     s->fcode_tab     = default_fcode_tab;
226
227     s->input_picture_number  = 0;
228     s->picture_in_gop_number = 0;
229 }
230
231 /* init video encoder */
232 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
233 {
234     MpegEncContext *s = avctx->priv_data;
235     int i, ret, format_supported;
236
237     MPV_encode_defaults(s);
238
239     switch (avctx->codec_id) {
240     case AV_CODEC_ID_MPEG2VIDEO:
241         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
242             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
243             av_log(avctx, AV_LOG_ERROR,
244                    "only YUV420 and YUV422 are supported\n");
245             return -1;
246         }
247         break;
248     case AV_CODEC_ID_MJPEG:
249         format_supported = 0;
250         /* JPEG color space */
251         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
252             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
253             (avctx->color_range == AVCOL_RANGE_JPEG &&
254              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
255               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
256             format_supported = 1;
257         /* MPEG color space */
258         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
259                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
260                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
261             format_supported = 1;
262
263         if (!format_supported) {
264             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
265             return -1;
266         }
267         break;
268     default:
269         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
270             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
271             return -1;
272         }
273     }
274
275     switch (avctx->pix_fmt) {
276     case AV_PIX_FMT_YUVJ422P:
277     case AV_PIX_FMT_YUV422P:
278         s->chroma_format = CHROMA_422;
279         break;
280     case AV_PIX_FMT_YUVJ420P:
281     case AV_PIX_FMT_YUV420P:
282     default:
283         s->chroma_format = CHROMA_420;
284         break;
285     }
286
287     s->bit_rate = avctx->bit_rate;
288     s->width    = avctx->width;
289     s->height   = avctx->height;
290     if (avctx->gop_size > 600 &&
291         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
292         av_log(avctx, AV_LOG_ERROR,
293                "Warning keyframe interval too large! reducing it ...\n");
294         avctx->gop_size = 600;
295     }
296     s->gop_size     = avctx->gop_size;
297     s->avctx        = avctx;
298     s->flags        = avctx->flags;
299     s->flags2       = avctx->flags2;
300     if (avctx->max_b_frames > MAX_B_FRAMES) {
301         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
302                "is %d.\n", MAX_B_FRAMES);
303     }
304     s->max_b_frames = avctx->max_b_frames;
305     s->codec_id     = avctx->codec->id;
306     s->strict_std_compliance = avctx->strict_std_compliance;
307     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
308     s->mpeg_quant         = avctx->mpeg_quant;
309     s->rtp_mode           = !!avctx->rtp_payload_size;
310     s->intra_dc_precision = avctx->intra_dc_precision;
311     s->user_specified_pts = AV_NOPTS_VALUE;
312
313     if (s->gop_size <= 1) {
314         s->intra_only = 1;
315         s->gop_size   = 12;
316     } else {
317         s->intra_only = 0;
318     }
319
320     s->me_method = avctx->me_method;
321
322     /* Fixed QSCALE */
323     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
324
325     s->adaptive_quant = (s->avctx->lumi_masking ||
326                          s->avctx->dark_masking ||
327                          s->avctx->temporal_cplx_masking ||
328                          s->avctx->spatial_cplx_masking  ||
329                          s->avctx->p_masking      ||
330                          s->avctx->border_masking ||
331                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
332                         !s->fixed_qscale;
333
334     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
335
336     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
337         av_log(avctx, AV_LOG_ERROR,
338                "a vbv buffer size is needed, "
339                "for encoding with a maximum bitrate\n");
340         return -1;
341     }
342
343     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
344         av_log(avctx, AV_LOG_INFO,
345                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
346     }
347
348     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
349         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
350         return -1;
351     }
352
353     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
354         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
355         return -1;
356     }
357
358     if (avctx->rc_max_rate &&
359         avctx->rc_max_rate == avctx->bit_rate &&
360         avctx->rc_max_rate != avctx->rc_min_rate) {
361         av_log(avctx, AV_LOG_INFO,
362                "impossible bitrate constraints, this will fail\n");
363     }
364
365     if (avctx->rc_buffer_size &&
366         avctx->bit_rate * (int64_t)avctx->time_base.num >
367             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
368         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
369         return -1;
370     }
371
372     if (!s->fixed_qscale &&
373         avctx->bit_rate * av_q2d(avctx->time_base) >
374             avctx->bit_rate_tolerance) {
375         av_log(avctx, AV_LOG_ERROR,
376                "bitrate tolerance too small for bitrate\n");
377         return -1;
378     }
379
380     if (s->avctx->rc_max_rate &&
381         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
382         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
383          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
384         90000LL * (avctx->rc_buffer_size - 1) >
385             s->avctx->rc_max_rate * 0xFFFFLL) {
386         av_log(avctx, AV_LOG_INFO,
387                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
388                "specified vbv buffer is too large for the given bitrate!\n");
389     }
390
391     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
392         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
393         s->codec_id != AV_CODEC_ID_FLV1) {
394         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
395         return -1;
396     }
397
398     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
399         av_log(avctx, AV_LOG_ERROR,
400                "OBMC is only supported with simple mb decision\n");
401         return -1;
402     }
403
404     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
405         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
406         return -1;
407     }
408
409     if (s->max_b_frames                    &&
410         s->codec_id != AV_CODEC_ID_MPEG4      &&
411         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
412         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
413         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
414         return -1;
415     }
416
417     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
418          s->codec_id == AV_CODEC_ID_H263  ||
419          s->codec_id == AV_CODEC_ID_H263P) &&
420         (avctx->sample_aspect_ratio.num > 255 ||
421          avctx->sample_aspect_ratio.den > 255)) {
422         av_log(avctx, AV_LOG_ERROR,
423                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
424                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
425         return -1;
426     }
427
428     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
429         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
430         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
431         return -1;
432     }
433
434     // FIXME mpeg2 uses that too
435     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
436         av_log(avctx, AV_LOG_ERROR,
437                "mpeg2 style quantization not supported by codec\n");
438         return -1;
439     }
440
441     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
442         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
443         return -1;
444     }
445
446     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
447         s->avctx->mb_decision != FF_MB_DECISION_RD) {
448         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
449         return -1;
450     }
451
452     if (s->avctx->scenechange_threshold < 1000000000 &&
453         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
454         av_log(avctx, AV_LOG_ERROR,
455                "closed gop with scene change detection are not supported yet, "
456                "set threshold to 1000000000\n");
457         return -1;
458     }
459
460     if (s->flags & CODEC_FLAG_LOW_DELAY) {
461         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
462             av_log(avctx, AV_LOG_ERROR,
463                   "low delay forcing is only available for mpeg2\n");
464             return -1;
465         }
466         if (s->max_b_frames != 0) {
467             av_log(avctx, AV_LOG_ERROR,
468                    "b frames cannot be used with low delay\n");
469             return -1;
470         }
471     }
472
473     if (s->q_scale_type == 1) {
474         if (avctx->qmax > 12) {
475             av_log(avctx, AV_LOG_ERROR,
476                    "non linear quant only supports qmax <= 12 currently\n");
477             return -1;
478         }
479     }
480
481     if (s->avctx->thread_count > 1         &&
482         s->codec_id != AV_CODEC_ID_MPEG4      &&
483         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
484         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
485         (s->codec_id != AV_CODEC_ID_H263P)) {
486         av_log(avctx, AV_LOG_ERROR,
487                "multi threaded encoding not supported by codec\n");
488         return -1;
489     }
490
491     if (s->avctx->thread_count < 1) {
492         av_log(avctx, AV_LOG_ERROR,
493                "automatic thread number detection not supported by codec,"
494                "patch welcome\n");
495         return -1;
496     }
497
498     if (s->avctx->thread_count > 1)
499         s->rtp_mode = 1;
500
501     if (!avctx->time_base.den || !avctx->time_base.num) {
502         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
503         return -1;
504     }
505
506     i = (INT_MAX / 2 + 128) >> 8;
507     if (avctx->mb_threshold >= i) {
508         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
509                i - 1);
510         return -1;
511     }
512
513     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
514         av_log(avctx, AV_LOG_INFO,
515                "notice: b_frame_strategy only affects the first pass\n");
516         avctx->b_frame_strategy = 0;
517     }
518
519     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
520     if (i > 1) {
521         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
522         avctx->time_base.den /= i;
523         avctx->time_base.num /= i;
524         //return -1;
525     }
526
527     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
528         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
529         // (a + x * 3 / 8) / x
530         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
531         s->inter_quant_bias = 0;
532     } else {
533         s->intra_quant_bias = 0;
534         // (a - x / 4) / x
535         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
536     }
537
538     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
539         s->intra_quant_bias = avctx->intra_quant_bias;
540     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
541         s->inter_quant_bias = avctx->inter_quant_bias;
542
543     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
544         s->avctx->time_base.den > (1 << 16) - 1) {
545         av_log(avctx, AV_LOG_ERROR,
546                "timebase %d/%d not supported by MPEG 4 standard, "
547                "the maximum admitted value for the timebase denominator "
548                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
549                (1 << 16) - 1);
550         return -1;
551     }
552     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
553
554     switch (avctx->codec->id) {
555     case AV_CODEC_ID_MPEG1VIDEO:
556         s->out_format = FMT_MPEG1;
557         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
558         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
559         break;
560     case AV_CODEC_ID_MPEG2VIDEO:
561         s->out_format = FMT_MPEG1;
562         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
563         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
564         s->rtp_mode   = 1;
565         break;
566     case AV_CODEC_ID_MJPEG:
567         s->out_format = FMT_MJPEG;
568         s->intra_only = 1; /* force intra only for jpeg */
569         if (!CONFIG_MJPEG_ENCODER ||
570             ff_mjpeg_encode_init(s) < 0)
571             return -1;
572         avctx->delay = 0;
573         s->low_delay = 1;
574         break;
575     case AV_CODEC_ID_H261:
576         if (!CONFIG_H261_ENCODER)
577             return -1;
578         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
579             av_log(avctx, AV_LOG_ERROR,
580                    "The specified picture size of %dx%d is not valid for the "
581                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
582                     s->width, s->height);
583             return -1;
584         }
585         s->out_format = FMT_H261;
586         avctx->delay  = 0;
587         s->low_delay  = 1;
588         break;
589     case AV_CODEC_ID_H263:
590         if (!CONFIG_H263_ENCODER)
591         return -1;
592         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
593                              s->width, s->height) == 8) {
594             av_log(avctx, AV_LOG_INFO,
595                    "The specified picture size of %dx%d is not valid for "
596                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
597                    "352x288, 704x576, and 1408x1152."
598                    "Try H.263+.\n", s->width, s->height);
599             return -1;
600         }
601         s->out_format = FMT_H263;
602         avctx->delay  = 0;
603         s->low_delay  = 1;
604         break;
605     case AV_CODEC_ID_H263P:
606         s->out_format = FMT_H263;
607         s->h263_plus  = 1;
608         /* Fx */
609         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
610         s->modified_quant  = s->h263_aic;
611         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
612         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
613
614         /* /Fx */
615         /* These are just to be sure */
616         avctx->delay = 0;
617         s->low_delay = 1;
618         break;
619     case AV_CODEC_ID_FLV1:
620         s->out_format      = FMT_H263;
621         s->h263_flv        = 2; /* format = 1; 11-bit codes */
622         s->unrestricted_mv = 1;
623         s->rtp_mode  = 0; /* don't allow GOB */
624         avctx->delay = 0;
625         s->low_delay = 1;
626         break;
627     case AV_CODEC_ID_RV10:
628         s->out_format = FMT_H263;
629         avctx->delay  = 0;
630         s->low_delay  = 1;
631         break;
632     case AV_CODEC_ID_RV20:
633         s->out_format      = FMT_H263;
634         avctx->delay       = 0;
635         s->low_delay       = 1;
636         s->modified_quant  = 1;
637         s->h263_aic        = 1;
638         s->h263_plus       = 1;
639         s->loop_filter     = 1;
640         s->unrestricted_mv = 0;
641         break;
642     case AV_CODEC_ID_MPEG4:
643         s->out_format      = FMT_H263;
644         s->h263_pred       = 1;
645         s->unrestricted_mv = 1;
646         s->low_delay       = s->max_b_frames ? 0 : 1;
647         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
648         break;
649     case AV_CODEC_ID_MSMPEG4V2:
650         s->out_format      = FMT_H263;
651         s->h263_pred       = 1;
652         s->unrestricted_mv = 1;
653         s->msmpeg4_version = 2;
654         avctx->delay       = 0;
655         s->low_delay       = 1;
656         break;
657     case AV_CODEC_ID_MSMPEG4V3:
658         s->out_format        = FMT_H263;
659         s->h263_pred         = 1;
660         s->unrestricted_mv   = 1;
661         s->msmpeg4_version   = 3;
662         s->flipflop_rounding = 1;
663         avctx->delay         = 0;
664         s->low_delay         = 1;
665         break;
666     case AV_CODEC_ID_WMV1:
667         s->out_format        = FMT_H263;
668         s->h263_pred         = 1;
669         s->unrestricted_mv   = 1;
670         s->msmpeg4_version   = 4;
671         s->flipflop_rounding = 1;
672         avctx->delay         = 0;
673         s->low_delay         = 1;
674         break;
675     case AV_CODEC_ID_WMV2:
676         s->out_format        = FMT_H263;
677         s->h263_pred         = 1;
678         s->unrestricted_mv   = 1;
679         s->msmpeg4_version   = 5;
680         s->flipflop_rounding = 1;
681         avctx->delay         = 0;
682         s->low_delay         = 1;
683         break;
684     default:
685         return -1;
686     }
687
688     avctx->has_b_frames = !s->low_delay;
689
690     s->encoding = 1;
691
692     s->progressive_frame    =
693     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
694                                                 CODEC_FLAG_INTERLACED_ME) ||
695                                 s->alternate_scan);
696
697     /* init */
698     if (ff_MPV_common_init(s) < 0)
699         return -1;
700
701     if (ARCH_X86)
702         ff_MPV_encode_init_x86(s);
703
704     ff_fdctdsp_init(&s->fdsp, avctx);
705     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
706     ff_qpeldsp_init(&s->qdsp);
707
708     s->avctx->coded_frame = s->current_picture.f;
709
710     if (s->msmpeg4_version) {
711         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
712                           2 * 2 * (MAX_LEVEL + 1) *
713                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
714     }
715     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
716
717     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
718     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
719     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
720     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
721     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
722                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
723     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
724                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
725
726     if (s->avctx->noise_reduction) {
727         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
728                           2 * 64 * sizeof(uint16_t), fail);
729     }
730
731     if (CONFIG_H263_ENCODER)
732         ff_h263dsp_init(&s->h263dsp);
733     if (!s->dct_quantize)
734         s->dct_quantize = ff_dct_quantize_c;
735     if (!s->denoise_dct)
736         s->denoise_dct  = denoise_dct_c;
737     s->fast_dct_quantize = s->dct_quantize;
738     if (avctx->trellis)
739         s->dct_quantize  = dct_quantize_trellis_c;
740
741     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
742         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
743
744     s->quant_precision = 5;
745
746     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
747     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
748
749     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
750         ff_h261_encode_init(s);
751     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
752         ff_h263_encode_init(s);
753     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
754         ff_msmpeg4_encode_init(s);
755     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
756         && s->out_format == FMT_MPEG1)
757         ff_mpeg1_encode_init(s);
758
759     /* init q matrix */
760     for (i = 0; i < 64; i++) {
761         int j = s->idsp.idct_permutation[i];
762         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
763             s->mpeg_quant) {
764             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
765             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
766         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
767             s->intra_matrix[j] =
768             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
769         } else {
770             /* mpeg1/2 */
771             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
772             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
773         }
774         if (s->avctx->intra_matrix)
775             s->intra_matrix[j] = s->avctx->intra_matrix[i];
776         if (s->avctx->inter_matrix)
777             s->inter_matrix[j] = s->avctx->inter_matrix[i];
778     }
779
780     /* precompute matrix */
781     /* for mjpeg, we do include qscale in the matrix */
782     if (s->out_format != FMT_MJPEG) {
783         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
784                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
785                           31, 1);
786         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
787                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
788                           31, 0);
789     }
790
791     if (ff_rate_control_init(s) < 0)
792         return -1;
793
794 #if FF_API_ERROR_RATE
795     FF_DISABLE_DEPRECATION_WARNINGS
796     if (avctx->error_rate)
797         s->error_rate = avctx->error_rate;
798     FF_ENABLE_DEPRECATION_WARNINGS;
799 #endif
800
801 #if FF_API_NORMALIZE_AQP
802     FF_DISABLE_DEPRECATION_WARNINGS
803     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
804         s->mpv_flags |= FF_MPV_FLAG_NAQ;
805     FF_ENABLE_DEPRECATION_WARNINGS;
806 #endif
807
808 #if FF_API_MV0
809     FF_DISABLE_DEPRECATION_WARNINGS
810     if (avctx->flags & CODEC_FLAG_MV0)
811         s->mpv_flags |= FF_MPV_FLAG_MV0;
812     FF_ENABLE_DEPRECATION_WARNINGS
813 #endif
814
815     if (avctx->b_frame_strategy == 2) {
816         for (i = 0; i < s->max_b_frames + 2; i++) {
817             s->tmp_frames[i] = av_frame_alloc();
818             if (!s->tmp_frames[i])
819                 return AVERROR(ENOMEM);
820
821             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
822             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
823             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
824
825             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
826             if (ret < 0)
827                 return ret;
828         }
829     }
830
831     return 0;
832 fail:
833     ff_MPV_encode_end(avctx);
834     return AVERROR_UNKNOWN;
835 }
836
837 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
838 {
839     MpegEncContext *s = avctx->priv_data;
840     int i;
841
842     ff_rate_control_uninit(s);
843
844     ff_MPV_common_end(s);
845     if (CONFIG_MJPEG_ENCODER &&
846         s->out_format == FMT_MJPEG)
847         ff_mjpeg_encode_close(s);
848
849     av_freep(&avctx->extradata);
850
851     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
852         av_frame_free(&s->tmp_frames[i]);
853
854     ff_free_picture_tables(&s->new_picture);
855     ff_mpeg_unref_picture(s, &s->new_picture);
856
857     av_freep(&s->avctx->stats_out);
858     av_freep(&s->ac_stats);
859
860     av_freep(&s->q_intra_matrix);
861     av_freep(&s->q_inter_matrix);
862     av_freep(&s->q_intra_matrix16);
863     av_freep(&s->q_inter_matrix16);
864     av_freep(&s->input_picture);
865     av_freep(&s->reordered_input_picture);
866     av_freep(&s->dct_offset);
867
868     return 0;
869 }
870
871 static int get_sae(uint8_t *src, int ref, int stride)
872 {
873     int x,y;
874     int acc = 0;
875
876     for (y = 0; y < 16; y++) {
877         for (x = 0; x < 16; x++) {
878             acc += FFABS(src[x + y * stride] - ref);
879         }
880     }
881
882     return acc;
883 }
884
885 static int get_intra_count(MpegEncContext *s, uint8_t *src,
886                            uint8_t *ref, int stride)
887 {
888     int x, y, w, h;
889     int acc = 0;
890
891     w = s->width  & ~15;
892     h = s->height & ~15;
893
894     for (y = 0; y < h; y += 16) {
895         for (x = 0; x < w; x += 16) {
896             int offset = x + y * stride;
897             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
898                                      16);
899             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
900             int sae  = get_sae(src + offset, mean, stride);
901
902             acc += sae + 500 < sad;
903         }
904     }
905     return acc;
906 }
907
908
909 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
910 {
911     Picture *pic = NULL;
912     int64_t pts;
913     int i, display_picture_number = 0, ret;
914     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
915                                                  (s->low_delay ? 0 : 1);
916     int direct = 1;
917
918     if (pic_arg) {
919         pts = pic_arg->pts;
920         display_picture_number = s->input_picture_number++;
921
922         if (pts != AV_NOPTS_VALUE) {
923             if (s->user_specified_pts != AV_NOPTS_VALUE) {
924                 int64_t time = pts;
925                 int64_t last = s->user_specified_pts;
926
927                 if (time <= last) {
928                     av_log(s->avctx, AV_LOG_ERROR,
929                            "Error, Invalid timestamp=%"PRId64", "
930                            "last=%"PRId64"\n", pts, s->user_specified_pts);
931                     return -1;
932                 }
933
934                 if (!s->low_delay && display_picture_number == 1)
935                     s->dts_delta = time - last;
936             }
937             s->user_specified_pts = pts;
938         } else {
939             if (s->user_specified_pts != AV_NOPTS_VALUE) {
940                 s->user_specified_pts =
941                 pts = s->user_specified_pts + 1;
942                 av_log(s->avctx, AV_LOG_INFO,
943                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
944                        pts);
945             } else {
946                 pts = display_picture_number;
947             }
948         }
949     }
950
951     if (pic_arg) {
952         if (!pic_arg->buf[0]);
953             direct = 0;
954         if (pic_arg->linesize[0] != s->linesize)
955             direct = 0;
956         if (pic_arg->linesize[1] != s->uvlinesize)
957             direct = 0;
958         if (pic_arg->linesize[2] != s->uvlinesize)
959             direct = 0;
960
961         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
962                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
963
964         if (direct) {
965             i = ff_find_unused_picture(s, 1);
966             if (i < 0)
967                 return i;
968
969             pic = &s->picture[i];
970             pic->reference = 3;
971
972             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
973                 return ret;
974             if (ff_alloc_picture(s, pic, 1) < 0) {
975                 return -1;
976             }
977         } else {
978             i = ff_find_unused_picture(s, 0);
979             if (i < 0)
980                 return i;
981
982             pic = &s->picture[i];
983             pic->reference = 3;
984
985             if (ff_alloc_picture(s, pic, 0) < 0) {
986                 return -1;
987             }
988
989             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
990                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
991                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
992                 // empty
993             } else {
994                 int h_chroma_shift, v_chroma_shift;
995                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
996                                                  &h_chroma_shift,
997                                                  &v_chroma_shift);
998
999                 for (i = 0; i < 3; i++) {
1000                     int src_stride = pic_arg->linesize[i];
1001                     int dst_stride = i ? s->uvlinesize : s->linesize;
1002                     int h_shift = i ? h_chroma_shift : 0;
1003                     int v_shift = i ? v_chroma_shift : 0;
1004                     int w = s->width  >> h_shift;
1005                     int h = s->height >> v_shift;
1006                     uint8_t *src = pic_arg->data[i];
1007                     uint8_t *dst = pic->f->data[i];
1008
1009                     if (!s->avctx->rc_buffer_size)
1010                         dst += INPLACE_OFFSET;
1011
1012                     if (src_stride == dst_stride)
1013                         memcpy(dst, src, src_stride * h);
1014                     else {
1015                         while (h--) {
1016                             memcpy(dst, src, w);
1017                             dst += dst_stride;
1018                             src += src_stride;
1019                         }
1020                     }
1021                 }
1022             }
1023         }
1024         ret = av_frame_copy_props(pic->f, pic_arg);
1025         if (ret < 0)
1026             return ret;
1027
1028         pic->f->display_picture_number = display_picture_number;
1029         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1030     }
1031
1032     /* shift buffer entries */
1033     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1034         s->input_picture[i - 1] = s->input_picture[i];
1035
1036     s->input_picture[encoding_delay] = (Picture*) pic;
1037
1038     return 0;
1039 }
1040
1041 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1042 {
1043     int x, y, plane;
1044     int score = 0;
1045     int64_t score64 = 0;
1046
1047     for (plane = 0; plane < 3; plane++) {
1048         const int stride = p->f->linesize[plane];
1049         const int bw = plane ? 1 : 2;
1050         for (y = 0; y < s->mb_height * bw; y++) {
1051             for (x = 0; x < s->mb_width * bw; x++) {
1052                 int off = p->shared ? 0 : 16;
1053                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1054                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1055                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1056
1057                 switch (s->avctx->frame_skip_exp) {
1058                 case 0: score    =  FFMAX(score, v);          break;
1059                 case 1: score   += FFABS(v);                  break;
1060                 case 2: score   += v * v;                     break;
1061                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1062                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1063                 }
1064             }
1065         }
1066     }
1067
1068     if (score)
1069         score64 = score;
1070
1071     if (score64 < s->avctx->frame_skip_threshold)
1072         return 1;
1073     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1074         return 1;
1075     return 0;
1076 }
1077
1078 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1079 {
1080     AVPacket pkt = { 0 };
1081     int ret, got_output;
1082
1083     av_init_packet(&pkt);
1084     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1085     if (ret < 0)
1086         return ret;
1087
1088     ret = pkt.size;
1089     av_free_packet(&pkt);
1090     return ret;
1091 }
1092
1093 static int estimate_best_b_count(MpegEncContext *s)
1094 {
1095     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1096     AVCodecContext *c = avcodec_alloc_context3(NULL);
1097     const int scale = s->avctx->brd_scale;
1098     int i, j, out_size, p_lambda, b_lambda, lambda2;
1099     int64_t best_rd  = INT64_MAX;
1100     int best_b_count = -1;
1101
1102     assert(scale >= 0 && scale <= 3);
1103
1104     //emms_c();
1105     //s->next_picture_ptr->quality;
1106     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1107     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1108     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1109     if (!b_lambda) // FIXME we should do this somewhere else
1110         b_lambda = p_lambda;
1111     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1112                FF_LAMBDA_SHIFT;
1113
1114     c->width        = s->width  >> scale;
1115     c->height       = s->height >> scale;
1116     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1117     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1118     c->mb_decision  = s->avctx->mb_decision;
1119     c->me_cmp       = s->avctx->me_cmp;
1120     c->mb_cmp       = s->avctx->mb_cmp;
1121     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1122     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1123     c->time_base    = s->avctx->time_base;
1124     c->max_b_frames = s->max_b_frames;
1125
1126     if (avcodec_open2(c, codec, NULL) < 0)
1127         return -1;
1128
1129     for (i = 0; i < s->max_b_frames + 2; i++) {
1130         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1131                                                 s->next_picture_ptr;
1132
1133         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1134             pre_input = *pre_input_ptr;
1135
1136             if (!pre_input.shared && i) {
1137                 pre_input.f->data[0] += INPLACE_OFFSET;
1138                 pre_input.f->data[1] += INPLACE_OFFSET;
1139                 pre_input.f->data[2] += INPLACE_OFFSET;
1140             }
1141
1142             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1143                                        s->tmp_frames[i]->linesize[0],
1144                                        pre_input.f->data[0],
1145                                        pre_input.f->linesize[0],
1146                                        c->width, c->height);
1147             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1148                                        s->tmp_frames[i]->linesize[1],
1149                                        pre_input.f->data[1],
1150                                        pre_input.f->linesize[1],
1151                                        c->width >> 1, c->height >> 1);
1152             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1153                                        s->tmp_frames[i]->linesize[2],
1154                                        pre_input.f->data[2],
1155                                        pre_input.f->linesize[2],
1156                                        c->width >> 1, c->height >> 1);
1157         }
1158     }
1159
1160     for (j = 0; j < s->max_b_frames + 1; j++) {
1161         int64_t rd = 0;
1162
1163         if (!s->input_picture[j])
1164             break;
1165
1166         c->error[0] = c->error[1] = c->error[2] = 0;
1167
1168         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1169         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1170
1171         out_size = encode_frame(c, s->tmp_frames[0]);
1172
1173         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1174
1175         for (i = 0; i < s->max_b_frames + 1; i++) {
1176             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1177
1178             s->tmp_frames[i + 1]->pict_type = is_p ?
1179                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1180             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1181
1182             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1183
1184             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1185         }
1186
1187         /* get the delayed frames */
1188         while (out_size) {
1189             out_size = encode_frame(c, NULL);
1190             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1191         }
1192
1193         rd += c->error[0] + c->error[1] + c->error[2];
1194
1195         if (rd < best_rd) {
1196             best_rd = rd;
1197             best_b_count = j;
1198         }
1199     }
1200
1201     avcodec_close(c);
1202     av_freep(&c);
1203
1204     return best_b_count;
1205 }
1206
1207 static int select_input_picture(MpegEncContext *s)
1208 {
1209     int i, ret;
1210
1211     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1212         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1213     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1214
1215     /* set next picture type & ordering */
1216     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1217         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1218             s->next_picture_ptr == NULL || s->intra_only) {
1219             s->reordered_input_picture[0] = s->input_picture[0];
1220             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1221             s->reordered_input_picture[0]->f->coded_picture_number =
1222                 s->coded_picture_number++;
1223         } else {
1224             int b_frames;
1225
1226             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1227                 if (s->picture_in_gop_number < s->gop_size &&
1228                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1229                     // FIXME check that te gop check above is +-1 correct
1230                     av_frame_unref(s->input_picture[0]->f);
1231
1232                     emms_c();
1233                     ff_vbv_update(s, 0);
1234
1235                     goto no_output_pic;
1236                 }
1237             }
1238
1239             if (s->flags & CODEC_FLAG_PASS2) {
1240                 for (i = 0; i < s->max_b_frames + 1; i++) {
1241                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1242
1243                     if (pict_num >= s->rc_context.num_entries)
1244                         break;
1245                     if (!s->input_picture[i]) {
1246                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1247                         break;
1248                     }
1249
1250                     s->input_picture[i]->f->pict_type =
1251                         s->rc_context.entry[pict_num].new_pict_type;
1252                 }
1253             }
1254
1255             if (s->avctx->b_frame_strategy == 0) {
1256                 b_frames = s->max_b_frames;
1257                 while (b_frames && !s->input_picture[b_frames])
1258                     b_frames--;
1259             } else if (s->avctx->b_frame_strategy == 1) {
1260                 for (i = 1; i < s->max_b_frames + 1; i++) {
1261                     if (s->input_picture[i] &&
1262                         s->input_picture[i]->b_frame_score == 0) {
1263                         s->input_picture[i]->b_frame_score =
1264                             get_intra_count(s,
1265                                             s->input_picture[i    ]->f->data[0],
1266                                             s->input_picture[i - 1]->f->data[0],
1267                                             s->linesize) + 1;
1268                     }
1269                 }
1270                 for (i = 0; i < s->max_b_frames + 1; i++) {
1271                     if (s->input_picture[i] == NULL ||
1272                         s->input_picture[i]->b_frame_score - 1 >
1273                             s->mb_num / s->avctx->b_sensitivity)
1274                         break;
1275                 }
1276
1277                 b_frames = FFMAX(0, i - 1);
1278
1279                 /* reset scores */
1280                 for (i = 0; i < b_frames + 1; i++) {
1281                     s->input_picture[i]->b_frame_score = 0;
1282                 }
1283             } else if (s->avctx->b_frame_strategy == 2) {
1284                 b_frames = estimate_best_b_count(s);
1285             } else {
1286                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1287                 b_frames = 0;
1288             }
1289
1290             emms_c();
1291
1292             for (i = b_frames - 1; i >= 0; i--) {
1293                 int type = s->input_picture[i]->f->pict_type;
1294                 if (type && type != AV_PICTURE_TYPE_B)
1295                     b_frames = i;
1296             }
1297             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1298                 b_frames == s->max_b_frames) {
1299                 av_log(s->avctx, AV_LOG_ERROR,
1300                        "warning, too many b frames in a row\n");
1301             }
1302
1303             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1304                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1305                     s->gop_size > s->picture_in_gop_number) {
1306                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1307                 } else {
1308                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1309                         b_frames = 0;
1310                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1311                 }
1312             }
1313
1314             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1315                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1316                 b_frames--;
1317
1318             s->reordered_input_picture[0] = s->input_picture[b_frames];
1319             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1320                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1321             s->reordered_input_picture[0]->f->coded_picture_number =
1322                 s->coded_picture_number++;
1323             for (i = 0; i < b_frames; i++) {
1324                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1325                 s->reordered_input_picture[i + 1]->f->pict_type =
1326                     AV_PICTURE_TYPE_B;
1327                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1328                     s->coded_picture_number++;
1329             }
1330         }
1331     }
1332 no_output_pic:
1333     if (s->reordered_input_picture[0]) {
1334         s->reordered_input_picture[0]->reference =
1335            s->reordered_input_picture[0]->f->pict_type !=
1336                AV_PICTURE_TYPE_B ? 3 : 0;
1337
1338         ff_mpeg_unref_picture(s, &s->new_picture);
1339         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1340             return ret;
1341
1342         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1343             // input is a shared pix, so we can't modifiy it -> alloc a new
1344             // one & ensure that the shared one is reuseable
1345
1346             Picture *pic;
1347             int i = ff_find_unused_picture(s, 0);
1348             if (i < 0)
1349                 return i;
1350             pic = &s->picture[i];
1351
1352             pic->reference = s->reordered_input_picture[0]->reference;
1353             if (ff_alloc_picture(s, pic, 0) < 0) {
1354                 return -1;
1355             }
1356
1357             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1358             if (ret < 0)
1359                 return ret;
1360
1361             /* mark us unused / free shared pic */
1362             av_frame_unref(s->reordered_input_picture[0]->f);
1363             s->reordered_input_picture[0]->shared = 0;
1364
1365             s->current_picture_ptr = pic;
1366         } else {
1367             // input is not a shared pix -> reuse buffer for current_pix
1368             s->current_picture_ptr = s->reordered_input_picture[0];
1369             for (i = 0; i < 4; i++) {
1370                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1371             }
1372         }
1373         ff_mpeg_unref_picture(s, &s->current_picture);
1374         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1375                                        s->current_picture_ptr)) < 0)
1376             return ret;
1377
1378         s->picture_number = s->new_picture.f->display_picture_number;
1379     } else {
1380         ff_mpeg_unref_picture(s, &s->new_picture);
1381     }
1382     return 0;
1383 }
1384
1385 static void frame_end(MpegEncContext *s)
1386 {
1387     int i;
1388
1389     if (s->unrestricted_mv &&
1390         s->current_picture.reference &&
1391         !s->intra_only) {
1392         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1393         int hshift = desc->log2_chroma_w;
1394         int vshift = desc->log2_chroma_h;
1395         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1396                                 s->h_edge_pos, s->v_edge_pos,
1397                                 EDGE_WIDTH, EDGE_WIDTH,
1398                                 EDGE_TOP | EDGE_BOTTOM);
1399         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1400                                 s->h_edge_pos >> hshift,
1401                                 s->v_edge_pos >> vshift,
1402                                 EDGE_WIDTH >> hshift,
1403                                 EDGE_WIDTH >> vshift,
1404                                 EDGE_TOP | EDGE_BOTTOM);
1405         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1406                                 s->h_edge_pos >> hshift,
1407                                 s->v_edge_pos >> vshift,
1408                                 EDGE_WIDTH >> hshift,
1409                                 EDGE_WIDTH >> vshift,
1410                                 EDGE_TOP | EDGE_BOTTOM);
1411     }
1412
1413     emms_c();
1414
1415     s->last_pict_type                 = s->pict_type;
1416     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1417     if (s->pict_type!= AV_PICTURE_TYPE_B)
1418         s->last_non_b_pict_type = s->pict_type;
1419
1420     if (s->encoding) {
1421         /* release non-reference frames */
1422         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1423             if (!s->picture[i].reference)
1424                 ff_mpeg_unref_picture(s, &s->picture[i]);
1425         }
1426     }
1427
1428     s->avctx->coded_frame = s->current_picture_ptr->f;
1429
1430 }
1431
1432 static void update_noise_reduction(MpegEncContext *s)
1433 {
1434     int intra, i;
1435
1436     for (intra = 0; intra < 2; intra++) {
1437         if (s->dct_count[intra] > (1 << 16)) {
1438             for (i = 0; i < 64; i++) {
1439                 s->dct_error_sum[intra][i] >>= 1;
1440             }
1441             s->dct_count[intra] >>= 1;
1442         }
1443
1444         for (i = 0; i < 64; i++) {
1445             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1446                                        s->dct_count[intra] +
1447                                        s->dct_error_sum[intra][i] / 2) /
1448                                       (s->dct_error_sum[intra][i] + 1);
1449         }
1450     }
1451 }
1452
1453 static int frame_start(MpegEncContext *s)
1454 {
1455     int ret;
1456
1457     /* mark & release old frames */
1458     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1459         s->last_picture_ptr != s->next_picture_ptr &&
1460         s->last_picture_ptr->f->buf[0]) {
1461         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1462     }
1463
1464     s->current_picture_ptr->f->pict_type = s->pict_type;
1465     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1466
1467     ff_mpeg_unref_picture(s, &s->current_picture);
1468     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1469                                    s->current_picture_ptr)) < 0)
1470         return ret;
1471
1472     if (s->pict_type != AV_PICTURE_TYPE_B) {
1473         s->last_picture_ptr = s->next_picture_ptr;
1474         if (!s->droppable)
1475             s->next_picture_ptr = s->current_picture_ptr;
1476     }
1477
1478     if (s->last_picture_ptr) {
1479         ff_mpeg_unref_picture(s, &s->last_picture);
1480         if (s->last_picture_ptr->f->buf[0] &&
1481             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1482                                        s->last_picture_ptr)) < 0)
1483             return ret;
1484     }
1485     if (s->next_picture_ptr) {
1486         ff_mpeg_unref_picture(s, &s->next_picture);
1487         if (s->next_picture_ptr->f->buf[0] &&
1488             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1489                                        s->next_picture_ptr)) < 0)
1490             return ret;
1491     }
1492
1493     if (s->picture_structure!= PICT_FRAME) {
1494         int i;
1495         for (i = 0; i < 4; i++) {
1496             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1497                 s->current_picture.f->data[i] +=
1498                     s->current_picture.f->linesize[i];
1499             }
1500             s->current_picture.f->linesize[i] *= 2;
1501             s->last_picture.f->linesize[i]    *= 2;
1502             s->next_picture.f->linesize[i]    *= 2;
1503         }
1504     }
1505
1506     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1507         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1508         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1509     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1510         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1511         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1512     } else {
1513         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1514         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1515     }
1516
1517     if (s->dct_error_sum) {
1518         assert(s->avctx->noise_reduction && s->encoding);
1519         update_noise_reduction(s);
1520     }
1521
1522     return 0;
1523 }
1524
1525 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1526                           const AVFrame *pic_arg, int *got_packet)
1527 {
1528     MpegEncContext *s = avctx->priv_data;
1529     int i, stuffing_count, ret;
1530     int context_count = s->slice_context_count;
1531
1532     s->picture_in_gop_number++;
1533
1534     if (load_input_picture(s, pic_arg) < 0)
1535         return -1;
1536
1537     if (select_input_picture(s) < 0) {
1538         return -1;
1539     }
1540
1541     /* output? */
1542     if (s->new_picture.f->data[0]) {
1543         if (!pkt->data &&
1544             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1545             return ret;
1546         if (s->mb_info) {
1547             s->mb_info_ptr = av_packet_new_side_data(pkt,
1548                                  AV_PKT_DATA_H263_MB_INFO,
1549                                  s->mb_width*s->mb_height*12);
1550             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1551         }
1552
1553         for (i = 0; i < context_count; i++) {
1554             int start_y = s->thread_context[i]->start_mb_y;
1555             int   end_y = s->thread_context[i]->  end_mb_y;
1556             int h       = s->mb_height;
1557             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1558             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1559
1560             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1561         }
1562
1563         s->pict_type = s->new_picture.f->pict_type;
1564         //emms_c();
1565         ret = frame_start(s);
1566         if (ret < 0)
1567             return ret;
1568 vbv_retry:
1569         if (encode_picture(s, s->picture_number) < 0)
1570             return -1;
1571
1572         avctx->header_bits = s->header_bits;
1573         avctx->mv_bits     = s->mv_bits;
1574         avctx->misc_bits   = s->misc_bits;
1575         avctx->i_tex_bits  = s->i_tex_bits;
1576         avctx->p_tex_bits  = s->p_tex_bits;
1577         avctx->i_count     = s->i_count;
1578         // FIXME f/b_count in avctx
1579         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1580         avctx->skip_count  = s->skip_count;
1581
1582         frame_end(s);
1583
1584         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1585             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1586
1587         if (avctx->rc_buffer_size) {
1588             RateControlContext *rcc = &s->rc_context;
1589             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1590
1591             if (put_bits_count(&s->pb) > max_size &&
1592                 s->lambda < s->avctx->lmax) {
1593                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1594                                        (s->qscale + 1) / s->qscale);
1595                 if (s->adaptive_quant) {
1596                     int i;
1597                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1598                         s->lambda_table[i] =
1599                             FFMAX(s->lambda_table[i] + 1,
1600                                   s->lambda_table[i] * (s->qscale + 1) /
1601                                   s->qscale);
1602                 }
1603                 s->mb_skipped = 0;        // done in frame_start()
1604                 // done in encode_picture() so we must undo it
1605                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1606                     if (s->flipflop_rounding          ||
1607                         s->codec_id == AV_CODEC_ID_H263P ||
1608                         s->codec_id == AV_CODEC_ID_MPEG4)
1609                         s->no_rounding ^= 1;
1610                 }
1611                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1612                     s->time_base       = s->last_time_base;
1613                     s->last_non_b_time = s->time - s->pp_time;
1614                 }
1615                 for (i = 0; i < context_count; i++) {
1616                     PutBitContext *pb = &s->thread_context[i]->pb;
1617                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1618                 }
1619                 goto vbv_retry;
1620             }
1621
1622             assert(s->avctx->rc_max_rate);
1623         }
1624
1625         if (s->flags & CODEC_FLAG_PASS1)
1626             ff_write_pass1_stats(s);
1627
1628         for (i = 0; i < 4; i++) {
1629             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1630             avctx->error[i] += s->current_picture_ptr->f->error[i];
1631         }
1632
1633         if (s->flags & CODEC_FLAG_PASS1)
1634             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1635                    avctx->i_tex_bits + avctx->p_tex_bits ==
1636                        put_bits_count(&s->pb));
1637         flush_put_bits(&s->pb);
1638         s->frame_bits  = put_bits_count(&s->pb);
1639
1640         stuffing_count = ff_vbv_update(s, s->frame_bits);
1641         if (stuffing_count) {
1642             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1643                     stuffing_count + 50) {
1644                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1645                 return -1;
1646             }
1647
1648             switch (s->codec_id) {
1649             case AV_CODEC_ID_MPEG1VIDEO:
1650             case AV_CODEC_ID_MPEG2VIDEO:
1651                 while (stuffing_count--) {
1652                     put_bits(&s->pb, 8, 0);
1653                 }
1654             break;
1655             case AV_CODEC_ID_MPEG4:
1656                 put_bits(&s->pb, 16, 0);
1657                 put_bits(&s->pb, 16, 0x1C3);
1658                 stuffing_count -= 4;
1659                 while (stuffing_count--) {
1660                     put_bits(&s->pb, 8, 0xFF);
1661                 }
1662             break;
1663             default:
1664                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1665             }
1666             flush_put_bits(&s->pb);
1667             s->frame_bits  = put_bits_count(&s->pb);
1668         }
1669
1670         /* update mpeg1/2 vbv_delay for CBR */
1671         if (s->avctx->rc_max_rate                          &&
1672             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1673             s->out_format == FMT_MPEG1                     &&
1674             90000LL * (avctx->rc_buffer_size - 1) <=
1675                 s->avctx->rc_max_rate * 0xFFFFLL) {
1676             int vbv_delay, min_delay;
1677             double inbits  = s->avctx->rc_max_rate *
1678                              av_q2d(s->avctx->time_base);
1679             int    minbits = s->frame_bits - 8 *
1680                              (s->vbv_delay_ptr - s->pb.buf - 1);
1681             double bits    = s->rc_context.buffer_index + minbits - inbits;
1682
1683             if (bits < 0)
1684                 av_log(s->avctx, AV_LOG_ERROR,
1685                        "Internal error, negative bits\n");
1686
1687             assert(s->repeat_first_field == 0);
1688
1689             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1690             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1691                         s->avctx->rc_max_rate;
1692
1693             vbv_delay = FFMAX(vbv_delay, min_delay);
1694
1695             assert(vbv_delay < 0xFFFF);
1696
1697             s->vbv_delay_ptr[0] &= 0xF8;
1698             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1699             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1700             s->vbv_delay_ptr[2] &= 0x07;
1701             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1702             avctx->vbv_delay     = vbv_delay * 300;
1703         }
1704         s->total_bits     += s->frame_bits;
1705         avctx->frame_bits  = s->frame_bits;
1706
1707         pkt->pts = s->current_picture.f->pts;
1708         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1709             if (!s->current_picture.f->coded_picture_number)
1710                 pkt->dts = pkt->pts - s->dts_delta;
1711             else
1712                 pkt->dts = s->reordered_pts;
1713             s->reordered_pts = pkt->pts;
1714         } else
1715             pkt->dts = pkt->pts;
1716         if (s->current_picture.f->key_frame)
1717             pkt->flags |= AV_PKT_FLAG_KEY;
1718         if (s->mb_info)
1719             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1720     } else {
1721         s->frame_bits = 0;
1722     }
1723     assert((s->frame_bits & 7) == 0);
1724
1725     pkt->size = s->frame_bits / 8;
1726     *got_packet = !!pkt->size;
1727     return 0;
1728 }
1729
1730 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1731                                                 int n, int threshold)
1732 {
1733     static const char tab[64] = {
1734         3, 2, 2, 1, 1, 1, 1, 1,
1735         1, 1, 1, 1, 1, 1, 1, 1,
1736         1, 1, 1, 1, 1, 1, 1, 1,
1737         0, 0, 0, 0, 0, 0, 0, 0,
1738         0, 0, 0, 0, 0, 0, 0, 0,
1739         0, 0, 0, 0, 0, 0, 0, 0,
1740         0, 0, 0, 0, 0, 0, 0, 0,
1741         0, 0, 0, 0, 0, 0, 0, 0
1742     };
1743     int score = 0;
1744     int run = 0;
1745     int i;
1746     int16_t *block = s->block[n];
1747     const int last_index = s->block_last_index[n];
1748     int skip_dc;
1749
1750     if (threshold < 0) {
1751         skip_dc = 0;
1752         threshold = -threshold;
1753     } else
1754         skip_dc = 1;
1755
1756     /* Are all we could set to zero already zero? */
1757     if (last_index <= skip_dc - 1)
1758         return;
1759
1760     for (i = 0; i <= last_index; i++) {
1761         const int j = s->intra_scantable.permutated[i];
1762         const int level = FFABS(block[j]);
1763         if (level == 1) {
1764             if (skip_dc && i == 0)
1765                 continue;
1766             score += tab[run];
1767             run = 0;
1768         } else if (level > 1) {
1769             return;
1770         } else {
1771             run++;
1772         }
1773     }
1774     if (score >= threshold)
1775         return;
1776     for (i = skip_dc; i <= last_index; i++) {
1777         const int j = s->intra_scantable.permutated[i];
1778         block[j] = 0;
1779     }
1780     if (block[0])
1781         s->block_last_index[n] = 0;
1782     else
1783         s->block_last_index[n] = -1;
1784 }
1785
1786 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1787                                int last_index)
1788 {
1789     int i;
1790     const int maxlevel = s->max_qcoeff;
1791     const int minlevel = s->min_qcoeff;
1792     int overflow = 0;
1793
1794     if (s->mb_intra) {
1795         i = 1; // skip clipping of intra dc
1796     } else
1797         i = 0;
1798
1799     for (; i <= last_index; i++) {
1800         const int j = s->intra_scantable.permutated[i];
1801         int level = block[j];
1802
1803         if (level > maxlevel) {
1804             level = maxlevel;
1805             overflow++;
1806         } else if (level < minlevel) {
1807             level = minlevel;
1808             overflow++;
1809         }
1810
1811         block[j] = level;
1812     }
1813
1814     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1815         av_log(s->avctx, AV_LOG_INFO,
1816                "warning, clipping %d dct coefficients to %d..%d\n",
1817                overflow, minlevel, maxlevel);
1818 }
1819
1820 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1821 {
1822     int x, y;
1823     // FIXME optimize
1824     for (y = 0; y < 8; y++) {
1825         for (x = 0; x < 8; x++) {
1826             int x2, y2;
1827             int sum = 0;
1828             int sqr = 0;
1829             int count = 0;
1830
1831             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1832                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1833                     int v = ptr[x2 + y2 * stride];
1834                     sum += v;
1835                     sqr += v * v;
1836                     count++;
1837                 }
1838             }
1839             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1840         }
1841     }
1842 }
1843
1844 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1845                                                 int motion_x, int motion_y,
1846                                                 int mb_block_height,
1847                                                 int mb_block_count)
1848 {
1849     int16_t weight[8][64];
1850     int16_t orig[8][64];
1851     const int mb_x = s->mb_x;
1852     const int mb_y = s->mb_y;
1853     int i;
1854     int skip_dct[8];
1855     int dct_offset = s->linesize * 8; // default for progressive frames
1856     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1857     ptrdiff_t wrap_y, wrap_c;
1858
1859     for (i = 0; i < mb_block_count; i++)
1860         skip_dct[i] = s->skipdct;
1861
1862     if (s->adaptive_quant) {
1863         const int last_qp = s->qscale;
1864         const int mb_xy = mb_x + mb_y * s->mb_stride;
1865
1866         s->lambda = s->lambda_table[mb_xy];
1867         update_qscale(s);
1868
1869         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1870             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1871             s->dquant = s->qscale - last_qp;
1872
1873             if (s->out_format == FMT_H263) {
1874                 s->dquant = av_clip(s->dquant, -2, 2);
1875
1876                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1877                     if (!s->mb_intra) {
1878                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1879                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1880                                 s->dquant = 0;
1881                         }
1882                         if (s->mv_type == MV_TYPE_8X8)
1883                             s->dquant = 0;
1884                     }
1885                 }
1886             }
1887         }
1888         ff_set_qscale(s, last_qp + s->dquant);
1889     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1890         ff_set_qscale(s, s->qscale + s->dquant);
1891
1892     wrap_y = s->linesize;
1893     wrap_c = s->uvlinesize;
1894     ptr_y  = s->new_picture.f->data[0] +
1895              (mb_y * 16 * wrap_y)              + mb_x * 16;
1896     ptr_cb = s->new_picture.f->data[1] +
1897              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1898     ptr_cr = s->new_picture.f->data[2] +
1899              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1900
1901     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1902         uint8_t *ebuf = s->edge_emu_buffer + 32;
1903         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1904                                  wrap_y, wrap_y,
1905                                  16, 16, mb_x * 16, mb_y * 16,
1906                                  s->width, s->height);
1907         ptr_y = ebuf;
1908         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1909                                  wrap_c, wrap_c,
1910                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1911                                  s->width >> 1, s->height >> 1);
1912         ptr_cb = ebuf + 18 * wrap_y;
1913         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1914                                  wrap_c, wrap_c,
1915                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1916                                  s->width >> 1, s->height >> 1);
1917         ptr_cr = ebuf + 18 * wrap_y + 8;
1918     }
1919
1920     if (s->mb_intra) {
1921         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1922             int progressive_score, interlaced_score;
1923
1924             s->interlaced_dct = 0;
1925             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1926                                                     NULL, wrap_y, 8) +
1927                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1928                                                     NULL, wrap_y, 8) - 400;
1929
1930             if (progressive_score > 0) {
1931                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1932                                                        NULL, wrap_y * 2, 8) +
1933                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1934                                                        NULL, wrap_y * 2, 8);
1935                 if (progressive_score > interlaced_score) {
1936                     s->interlaced_dct = 1;
1937
1938                     dct_offset = wrap_y;
1939                     wrap_y <<= 1;
1940                     if (s->chroma_format == CHROMA_422)
1941                         wrap_c <<= 1;
1942                 }
1943             }
1944         }
1945
1946         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1947         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1948         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1949         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1950
1951         if (s->flags & CODEC_FLAG_GRAY) {
1952             skip_dct[4] = 1;
1953             skip_dct[5] = 1;
1954         } else {
1955             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1956             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1957             if (!s->chroma_y_shift) { /* 422 */
1958                 s->dsp.get_pixels(s->block[6],
1959                                   ptr_cb + (dct_offset >> 1), wrap_c);
1960                 s->dsp.get_pixels(s->block[7],
1961                                   ptr_cr + (dct_offset >> 1), wrap_c);
1962             }
1963         }
1964     } else {
1965         op_pixels_func (*op_pix)[4];
1966         qpel_mc_func (*op_qpix)[16];
1967         uint8_t *dest_y, *dest_cb, *dest_cr;
1968
1969         dest_y  = s->dest[0];
1970         dest_cb = s->dest[1];
1971         dest_cr = s->dest[2];
1972
1973         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1974             op_pix  = s->hdsp.put_pixels_tab;
1975             op_qpix = s->qdsp.put_qpel_pixels_tab;
1976         } else {
1977             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1978             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
1979         }
1980
1981         if (s->mv_dir & MV_DIR_FORWARD) {
1982             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1983                           s->last_picture.f->data,
1984                           op_pix, op_qpix);
1985             op_pix  = s->hdsp.avg_pixels_tab;
1986             op_qpix = s->qdsp.avg_qpel_pixels_tab;
1987         }
1988         if (s->mv_dir & MV_DIR_BACKWARD) {
1989             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1990                           s->next_picture.f->data,
1991                           op_pix, op_qpix);
1992         }
1993
1994         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1995             int progressive_score, interlaced_score;
1996
1997             s->interlaced_dct = 0;
1998             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1999                                                     ptr_y,              wrap_y,
2000                                                     8) +
2001                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
2002                                                     ptr_y + wrap_y * 8, wrap_y,
2003                                                     8) - 400;
2004
2005             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2006                 progressive_score -= 400;
2007
2008             if (progressive_score > 0) {
2009                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
2010                                                        ptr_y,
2011                                                        wrap_y * 2, 8) +
2012                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
2013                                                        ptr_y + wrap_y,
2014                                                        wrap_y * 2, 8);
2015
2016                 if (progressive_score > interlaced_score) {
2017                     s->interlaced_dct = 1;
2018
2019                     dct_offset = wrap_y;
2020                     wrap_y <<= 1;
2021                     if (s->chroma_format == CHROMA_422)
2022                         wrap_c <<= 1;
2023                 }
2024             }
2025         }
2026
2027         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2028         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2029         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2030                            dest_y + dct_offset, wrap_y);
2031         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2032                            dest_y + dct_offset + 8, wrap_y);
2033
2034         if (s->flags & CODEC_FLAG_GRAY) {
2035             skip_dct[4] = 1;
2036             skip_dct[5] = 1;
2037         } else {
2038             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2039             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2040             if (!s->chroma_y_shift) { /* 422 */
2041                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2042                                    dest_cb + (dct_offset >> 1), wrap_c);
2043                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2044                                    dest_cr + (dct_offset >> 1), wrap_c);
2045             }
2046         }
2047         /* pre quantization */
2048         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2049                 2 * s->qscale * s->qscale) {
2050             // FIXME optimize
2051             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2052                               wrap_y, 8) < 20 * s->qscale)
2053                 skip_dct[0] = 1;
2054             if (s->dsp.sad[1](NULL, ptr_y + 8,
2055                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2056                 skip_dct[1] = 1;
2057             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2058                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2059                 skip_dct[2] = 1;
2060             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2061                               dest_y + dct_offset + 8,
2062                               wrap_y, 8) < 20 * s->qscale)
2063                 skip_dct[3] = 1;
2064             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2065                               wrap_c, 8) < 20 * s->qscale)
2066                 skip_dct[4] = 1;
2067             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2068                               wrap_c, 8) < 20 * s->qscale)
2069                 skip_dct[5] = 1;
2070             if (!s->chroma_y_shift) { /* 422 */
2071                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2072                                   dest_cb + (dct_offset >> 1),
2073                                   wrap_c, 8) < 20 * s->qscale)
2074                     skip_dct[6] = 1;
2075                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2076                                   dest_cr + (dct_offset >> 1),
2077                                   wrap_c, 8) < 20 * s->qscale)
2078                     skip_dct[7] = 1;
2079             }
2080         }
2081     }
2082
2083     if (s->quantizer_noise_shaping) {
2084         if (!skip_dct[0])
2085             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2086         if (!skip_dct[1])
2087             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2088         if (!skip_dct[2])
2089             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2090         if (!skip_dct[3])
2091             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2092         if (!skip_dct[4])
2093             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2094         if (!skip_dct[5])
2095             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2096         if (!s->chroma_y_shift) { /* 422 */
2097             if (!skip_dct[6])
2098                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2099                                   wrap_c);
2100             if (!skip_dct[7])
2101                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2102                                   wrap_c);
2103         }
2104         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2105     }
2106
2107     /* DCT & quantize */
2108     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2109     {
2110         for (i = 0; i < mb_block_count; i++) {
2111             if (!skip_dct[i]) {
2112                 int overflow;
2113                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2114                 // FIXME we could decide to change to quantizer instead of
2115                 // clipping
2116                 // JS: I don't think that would be a good idea it could lower
2117                 //     quality instead of improve it. Just INTRADC clipping
2118                 //     deserves changes in quantizer
2119                 if (overflow)
2120                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2121             } else
2122                 s->block_last_index[i] = -1;
2123         }
2124         if (s->quantizer_noise_shaping) {
2125             for (i = 0; i < mb_block_count; i++) {
2126                 if (!skip_dct[i]) {
2127                     s->block_last_index[i] =
2128                         dct_quantize_refine(s, s->block[i], weight[i],
2129                                             orig[i], i, s->qscale);
2130                 }
2131             }
2132         }
2133
2134         if (s->luma_elim_threshold && !s->mb_intra)
2135             for (i = 0; i < 4; i++)
2136                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2137         if (s->chroma_elim_threshold && !s->mb_intra)
2138             for (i = 4; i < mb_block_count; i++)
2139                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2140
2141         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2142             for (i = 0; i < mb_block_count; i++) {
2143                 if (s->block_last_index[i] == -1)
2144                     s->coded_score[i] = INT_MAX / 256;
2145             }
2146         }
2147     }
2148
2149     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2150         s->block_last_index[4] =
2151         s->block_last_index[5] = 0;
2152         s->block[4][0] =
2153         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2154     }
2155
2156     // non c quantize code returns incorrect block_last_index FIXME
2157     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2158         for (i = 0; i < mb_block_count; i++) {
2159             int j;
2160             if (s->block_last_index[i] > 0) {
2161                 for (j = 63; j > 0; j--) {
2162                     if (s->block[i][s->intra_scantable.permutated[j]])
2163                         break;
2164                 }
2165                 s->block_last_index[i] = j;
2166             }
2167         }
2168     }
2169
2170     /* huffman encode */
2171     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2172     case AV_CODEC_ID_MPEG1VIDEO:
2173     case AV_CODEC_ID_MPEG2VIDEO:
2174         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2175             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2176         break;
2177     case AV_CODEC_ID_MPEG4:
2178         if (CONFIG_MPEG4_ENCODER)
2179             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2180         break;
2181     case AV_CODEC_ID_MSMPEG4V2:
2182     case AV_CODEC_ID_MSMPEG4V3:
2183     case AV_CODEC_ID_WMV1:
2184         if (CONFIG_MSMPEG4_ENCODER)
2185             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2186         break;
2187     case AV_CODEC_ID_WMV2:
2188         if (CONFIG_WMV2_ENCODER)
2189             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2190         break;
2191     case AV_CODEC_ID_H261:
2192         if (CONFIG_H261_ENCODER)
2193             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2194         break;
2195     case AV_CODEC_ID_H263:
2196     case AV_CODEC_ID_H263P:
2197     case AV_CODEC_ID_FLV1:
2198     case AV_CODEC_ID_RV10:
2199     case AV_CODEC_ID_RV20:
2200         if (CONFIG_H263_ENCODER)
2201             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2202         break;
2203     case AV_CODEC_ID_MJPEG:
2204         if (CONFIG_MJPEG_ENCODER)
2205             ff_mjpeg_encode_mb(s, s->block);
2206         break;
2207     default:
2208         assert(0);
2209     }
2210 }
2211
2212 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2213 {
2214     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2215     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2216 }
2217
2218 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2219     int i;
2220
2221     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2222
2223     /* mpeg1 */
2224     d->mb_skip_run= s->mb_skip_run;
2225     for(i=0; i<3; i++)
2226         d->last_dc[i] = s->last_dc[i];
2227
2228     /* statistics */
2229     d->mv_bits= s->mv_bits;
2230     d->i_tex_bits= s->i_tex_bits;
2231     d->p_tex_bits= s->p_tex_bits;
2232     d->i_count= s->i_count;
2233     d->f_count= s->f_count;
2234     d->b_count= s->b_count;
2235     d->skip_count= s->skip_count;
2236     d->misc_bits= s->misc_bits;
2237     d->last_bits= 0;
2238
2239     d->mb_skipped= 0;
2240     d->qscale= s->qscale;
2241     d->dquant= s->dquant;
2242
2243     d->esc3_level_length= s->esc3_level_length;
2244 }
2245
2246 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2247     int i;
2248
2249     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2250     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2251
2252     /* mpeg1 */
2253     d->mb_skip_run= s->mb_skip_run;
2254     for(i=0; i<3; i++)
2255         d->last_dc[i] = s->last_dc[i];
2256
2257     /* statistics */
2258     d->mv_bits= s->mv_bits;
2259     d->i_tex_bits= s->i_tex_bits;
2260     d->p_tex_bits= s->p_tex_bits;
2261     d->i_count= s->i_count;
2262     d->f_count= s->f_count;
2263     d->b_count= s->b_count;
2264     d->skip_count= s->skip_count;
2265     d->misc_bits= s->misc_bits;
2266
2267     d->mb_intra= s->mb_intra;
2268     d->mb_skipped= s->mb_skipped;
2269     d->mv_type= s->mv_type;
2270     d->mv_dir= s->mv_dir;
2271     d->pb= s->pb;
2272     if(s->data_partitioning){
2273         d->pb2= s->pb2;
2274         d->tex_pb= s->tex_pb;
2275     }
2276     d->block= s->block;
2277     for(i=0; i<8; i++)
2278         d->block_last_index[i]= s->block_last_index[i];
2279     d->interlaced_dct= s->interlaced_dct;
2280     d->qscale= s->qscale;
2281
2282     d->esc3_level_length= s->esc3_level_length;
2283 }
2284
2285 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2286                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2287                            int *dmin, int *next_block, int motion_x, int motion_y)
2288 {
2289     int score;
2290     uint8_t *dest_backup[3];
2291
2292     copy_context_before_encode(s, backup, type);
2293
2294     s->block= s->blocks[*next_block];
2295     s->pb= pb[*next_block];
2296     if(s->data_partitioning){
2297         s->pb2   = pb2   [*next_block];
2298         s->tex_pb= tex_pb[*next_block];
2299     }
2300
2301     if(*next_block){
2302         memcpy(dest_backup, s->dest, sizeof(s->dest));
2303         s->dest[0] = s->rd_scratchpad;
2304         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2305         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2306         assert(s->linesize >= 32); //FIXME
2307     }
2308
2309     encode_mb(s, motion_x, motion_y);
2310
2311     score= put_bits_count(&s->pb);
2312     if(s->data_partitioning){
2313         score+= put_bits_count(&s->pb2);
2314         score+= put_bits_count(&s->tex_pb);
2315     }
2316
2317     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2318         ff_MPV_decode_mb(s, s->block);
2319
2320         score *= s->lambda2;
2321         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2322     }
2323
2324     if(*next_block){
2325         memcpy(s->dest, dest_backup, sizeof(s->dest));
2326     }
2327
2328     if(score<*dmin){
2329         *dmin= score;
2330         *next_block^=1;
2331
2332         copy_context_after_encode(best, s, type);
2333     }
2334 }
2335
2336 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2337     uint32_t *sq = ff_square_tab + 256;
2338     int acc=0;
2339     int x,y;
2340
2341     if(w==16 && h==16)
2342         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2343     else if(w==8 && h==8)
2344         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2345
2346     for(y=0; y<h; y++){
2347         for(x=0; x<w; x++){
2348             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2349         }
2350     }
2351
2352     assert(acc>=0);
2353
2354     return acc;
2355 }
2356
2357 static int sse_mb(MpegEncContext *s){
2358     int w= 16;
2359     int h= 16;
2360
2361     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2362     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2363
2364     if(w==16 && h==16)
2365       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2366         return  s->dsp.nsse[0](s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2367                +s->dsp.nsse[1](s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2368                +s->dsp.nsse[1](s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2369       }else{
2370         return  s->dsp.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2371                +s->dsp.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2372                +s->dsp.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2373       }
2374     else
2375         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2376                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2377                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2378 }
2379
2380 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2381     MpegEncContext *s= *(void**)arg;
2382
2383
2384     s->me.pre_pass=1;
2385     s->me.dia_size= s->avctx->pre_dia_size;
2386     s->first_slice_line=1;
2387     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2388         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2389             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2390         }
2391         s->first_slice_line=0;
2392     }
2393
2394     s->me.pre_pass=0;
2395
2396     return 0;
2397 }
2398
2399 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2400     MpegEncContext *s= *(void**)arg;
2401
2402     s->me.dia_size= s->avctx->dia_size;
2403     s->first_slice_line=1;
2404     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2405         s->mb_x=0; //for block init below
2406         ff_init_block_index(s);
2407         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2408             s->block_index[0]+=2;
2409             s->block_index[1]+=2;
2410             s->block_index[2]+=2;
2411             s->block_index[3]+=2;
2412
2413             /* compute motion vector & mb_type and store in context */
2414             if(s->pict_type==AV_PICTURE_TYPE_B)
2415                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2416             else
2417                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2418         }
2419         s->first_slice_line=0;
2420     }
2421     return 0;
2422 }
2423
2424 static int mb_var_thread(AVCodecContext *c, void *arg){
2425     MpegEncContext *s= *(void**)arg;
2426     int mb_x, mb_y;
2427
2428     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2429         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2430             int xx = mb_x * 16;
2431             int yy = mb_y * 16;
2432             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2433             int varc;
2434             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2435
2436             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2437                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2438
2439             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2440             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2441             s->me.mb_var_sum_temp    += varc;
2442         }
2443     }
2444     return 0;
2445 }
2446
2447 static void write_slice_end(MpegEncContext *s){
2448     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2449         if(s->partitioned_frame){
2450             ff_mpeg4_merge_partitions(s);
2451         }
2452
2453         ff_mpeg4_stuffing(&s->pb);
2454     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2455         ff_mjpeg_encode_stuffing(&s->pb);
2456     }
2457
2458     avpriv_align_put_bits(&s->pb);
2459     flush_put_bits(&s->pb);
2460
2461     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2462         s->misc_bits+= get_bits_diff(s);
2463 }
2464
2465 static void write_mb_info(MpegEncContext *s)
2466 {
2467     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2468     int offset = put_bits_count(&s->pb);
2469     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2470     int gobn = s->mb_y / s->gob_index;
2471     int pred_x, pred_y;
2472     if (CONFIG_H263_ENCODER)
2473         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2474     bytestream_put_le32(&ptr, offset);
2475     bytestream_put_byte(&ptr, s->qscale);
2476     bytestream_put_byte(&ptr, gobn);
2477     bytestream_put_le16(&ptr, mba);
2478     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2479     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2480     /* 4MV not implemented */
2481     bytestream_put_byte(&ptr, 0); /* hmv2 */
2482     bytestream_put_byte(&ptr, 0); /* vmv2 */
2483 }
2484
2485 static void update_mb_info(MpegEncContext *s, int startcode)
2486 {
2487     if (!s->mb_info)
2488         return;
2489     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2490         s->mb_info_size += 12;
2491         s->prev_mb_info = s->last_mb_info;
2492     }
2493     if (startcode) {
2494         s->prev_mb_info = put_bits_count(&s->pb)/8;
2495         /* This might have incremented mb_info_size above, and we return without
2496          * actually writing any info into that slot yet. But in that case,
2497          * this will be called again at the start of the after writing the
2498          * start code, actually writing the mb info. */
2499         return;
2500     }
2501
2502     s->last_mb_info = put_bits_count(&s->pb)/8;
2503     if (!s->mb_info_size)
2504         s->mb_info_size += 12;
2505     write_mb_info(s);
2506 }
2507
2508 static int encode_thread(AVCodecContext *c, void *arg){
2509     MpegEncContext *s= *(void**)arg;
2510     int mb_x, mb_y, pdif = 0;
2511     int chr_h= 16>>s->chroma_y_shift;
2512     int i, j;
2513     MpegEncContext best_s, backup_s;
2514     uint8_t bit_buf[2][MAX_MB_BYTES];
2515     uint8_t bit_buf2[2][MAX_MB_BYTES];
2516     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2517     PutBitContext pb[2], pb2[2], tex_pb[2];
2518
2519     for(i=0; i<2; i++){
2520         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2521         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2522         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2523     }
2524
2525     s->last_bits= put_bits_count(&s->pb);
2526     s->mv_bits=0;
2527     s->misc_bits=0;
2528     s->i_tex_bits=0;
2529     s->p_tex_bits=0;
2530     s->i_count=0;
2531     s->f_count=0;
2532     s->b_count=0;
2533     s->skip_count=0;
2534
2535     for(i=0; i<3; i++){
2536         /* init last dc values */
2537         /* note: quant matrix value (8) is implied here */
2538         s->last_dc[i] = 128 << s->intra_dc_precision;
2539
2540         s->current_picture.f->error[i] = 0;
2541     }
2542     s->mb_skip_run = 0;
2543     memset(s->last_mv, 0, sizeof(s->last_mv));
2544
2545     s->last_mv_dir = 0;
2546
2547     switch(s->codec_id){
2548     case AV_CODEC_ID_H263:
2549     case AV_CODEC_ID_H263P:
2550     case AV_CODEC_ID_FLV1:
2551         if (CONFIG_H263_ENCODER)
2552             s->gob_index = ff_h263_get_gob_height(s);
2553         break;
2554     case AV_CODEC_ID_MPEG4:
2555         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2556             ff_mpeg4_init_partitions(s);
2557         break;
2558     }
2559
2560     s->resync_mb_x=0;
2561     s->resync_mb_y=0;
2562     s->first_slice_line = 1;
2563     s->ptr_lastgob = s->pb.buf;
2564     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2565         s->mb_x=0;
2566         s->mb_y= mb_y;
2567
2568         ff_set_qscale(s, s->qscale);
2569         ff_init_block_index(s);
2570
2571         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2572             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2573             int mb_type= s->mb_type[xy];
2574 //            int d;
2575             int dmin= INT_MAX;
2576             int dir;
2577
2578             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2579                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2580                 return -1;
2581             }
2582             if(s->data_partitioning){
2583                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2584                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2585                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2586                     return -1;
2587                 }
2588             }
2589
2590             s->mb_x = mb_x;
2591             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2592             ff_update_block_index(s);
2593
2594             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2595                 ff_h261_reorder_mb_index(s);
2596                 xy= s->mb_y*s->mb_stride + s->mb_x;
2597                 mb_type= s->mb_type[xy];
2598             }
2599
2600             /* write gob / video packet header  */
2601             if(s->rtp_mode){
2602                 int current_packet_size, is_gob_start;
2603
2604                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2605
2606                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2607
2608                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2609
2610                 switch(s->codec_id){
2611                 case AV_CODEC_ID_H263:
2612                 case AV_CODEC_ID_H263P:
2613                     if(!s->h263_slice_structured)
2614                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2615                     break;
2616                 case AV_CODEC_ID_MPEG2VIDEO:
2617                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2618                 case AV_CODEC_ID_MPEG1VIDEO:
2619                     if(s->mb_skip_run) is_gob_start=0;
2620                     break;
2621                 }
2622
2623                 if(is_gob_start){
2624                     if(s->start_mb_y != mb_y || mb_x!=0){
2625                         write_slice_end(s);
2626
2627                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2628                             ff_mpeg4_init_partitions(s);
2629                         }
2630                     }
2631
2632                     assert((put_bits_count(&s->pb)&7) == 0);
2633                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2634
2635                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2636                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2637                         int d = 100 / s->error_rate;
2638                         if(r % d == 0){
2639                             current_packet_size=0;
2640                             s->pb.buf_ptr= s->ptr_lastgob;
2641                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2642                         }
2643                     }
2644
2645                     if (s->avctx->rtp_callback){
2646                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2647                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2648                     }
2649                     update_mb_info(s, 1);
2650
2651                     switch(s->codec_id){
2652                     case AV_CODEC_ID_MPEG4:
2653                         if (CONFIG_MPEG4_ENCODER) {
2654                             ff_mpeg4_encode_video_packet_header(s);
2655                             ff_mpeg4_clean_buffers(s);
2656                         }
2657                     break;
2658                     case AV_CODEC_ID_MPEG1VIDEO:
2659                     case AV_CODEC_ID_MPEG2VIDEO:
2660                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2661                             ff_mpeg1_encode_slice_header(s);
2662                             ff_mpeg1_clean_buffers(s);
2663                         }
2664                     break;
2665                     case AV_CODEC_ID_H263:
2666                     case AV_CODEC_ID_H263P:
2667                         if (CONFIG_H263_ENCODER)
2668                             ff_h263_encode_gob_header(s, mb_y);
2669                     break;
2670                     }
2671
2672                     if(s->flags&CODEC_FLAG_PASS1){
2673                         int bits= put_bits_count(&s->pb);
2674                         s->misc_bits+= bits - s->last_bits;
2675                         s->last_bits= bits;
2676                     }
2677
2678                     s->ptr_lastgob += current_packet_size;
2679                     s->first_slice_line=1;
2680                     s->resync_mb_x=mb_x;
2681                     s->resync_mb_y=mb_y;
2682                 }
2683             }
2684
2685             if(  (s->resync_mb_x   == s->mb_x)
2686                && s->resync_mb_y+1 == s->mb_y){
2687                 s->first_slice_line=0;
2688             }
2689
2690             s->mb_skipped=0;
2691             s->dquant=0; //only for QP_RD
2692
2693             update_mb_info(s, 0);
2694
2695             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2696                 int next_block=0;
2697                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2698
2699                 copy_context_before_encode(&backup_s, s, -1);
2700                 backup_s.pb= s->pb;
2701                 best_s.data_partitioning= s->data_partitioning;
2702                 best_s.partitioned_frame= s->partitioned_frame;
2703                 if(s->data_partitioning){
2704                     backup_s.pb2= s->pb2;
2705                     backup_s.tex_pb= s->tex_pb;
2706                 }
2707
2708                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2709                     s->mv_dir = MV_DIR_FORWARD;
2710                     s->mv_type = MV_TYPE_16X16;
2711                     s->mb_intra= 0;
2712                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2713                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2714                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2715                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2716                 }
2717                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2718                     s->mv_dir = MV_DIR_FORWARD;
2719                     s->mv_type = MV_TYPE_FIELD;
2720                     s->mb_intra= 0;
2721                     for(i=0; i<2; i++){
2722                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2723                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2724                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2725                     }
2726                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2727                                  &dmin, &next_block, 0, 0);
2728                 }
2729                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2730                     s->mv_dir = MV_DIR_FORWARD;
2731                     s->mv_type = MV_TYPE_16X16;
2732                     s->mb_intra= 0;
2733                     s->mv[0][0][0] = 0;
2734                     s->mv[0][0][1] = 0;
2735                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2736                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2737                 }
2738                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2739                     s->mv_dir = MV_DIR_FORWARD;
2740                     s->mv_type = MV_TYPE_8X8;
2741                     s->mb_intra= 0;
2742                     for(i=0; i<4; i++){
2743                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2744                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2745                     }
2746                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2747                                  &dmin, &next_block, 0, 0);
2748                 }
2749                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2750                     s->mv_dir = MV_DIR_FORWARD;
2751                     s->mv_type = MV_TYPE_16X16;
2752                     s->mb_intra= 0;
2753                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2754                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2755                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2756                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2757                 }
2758                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2759                     s->mv_dir = MV_DIR_BACKWARD;
2760                     s->mv_type = MV_TYPE_16X16;
2761                     s->mb_intra= 0;
2762                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2763                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2764                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2765                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2766                 }
2767                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2768                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2769                     s->mv_type = MV_TYPE_16X16;
2770                     s->mb_intra= 0;
2771                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2772                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2773                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2774                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2775                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2776                                  &dmin, &next_block, 0, 0);
2777                 }
2778                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2779                     s->mv_dir = MV_DIR_FORWARD;
2780                     s->mv_type = MV_TYPE_FIELD;
2781                     s->mb_intra= 0;
2782                     for(i=0; i<2; i++){
2783                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2784                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2785                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2786                     }
2787                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2788                                  &dmin, &next_block, 0, 0);
2789                 }
2790                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2791                     s->mv_dir = MV_DIR_BACKWARD;
2792                     s->mv_type = MV_TYPE_FIELD;
2793                     s->mb_intra= 0;
2794                     for(i=0; i<2; i++){
2795                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2796                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2797                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2798                     }
2799                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2800                                  &dmin, &next_block, 0, 0);
2801                 }
2802                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2803                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2804                     s->mv_type = MV_TYPE_FIELD;
2805                     s->mb_intra= 0;
2806                     for(dir=0; dir<2; dir++){
2807                         for(i=0; i<2; i++){
2808                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2809                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2810                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2811                         }
2812                     }
2813                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2814                                  &dmin, &next_block, 0, 0);
2815                 }
2816                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2817                     s->mv_dir = 0;
2818                     s->mv_type = MV_TYPE_16X16;
2819                     s->mb_intra= 1;
2820                     s->mv[0][0][0] = 0;
2821                     s->mv[0][0][1] = 0;
2822                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2823                                  &dmin, &next_block, 0, 0);
2824                     if(s->h263_pred || s->h263_aic){
2825                         if(best_s.mb_intra)
2826                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2827                         else
2828                             ff_clean_intra_table_entries(s); //old mode?
2829                     }
2830                 }
2831
2832                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2833                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2834                         const int last_qp= backup_s.qscale;
2835                         int qpi, qp, dc[6];
2836                         int16_t ac[6][16];
2837                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2838                         static const int dquant_tab[4]={-1,1,-2,2};
2839
2840                         assert(backup_s.dquant == 0);
2841
2842                         //FIXME intra
2843                         s->mv_dir= best_s.mv_dir;
2844                         s->mv_type = MV_TYPE_16X16;
2845                         s->mb_intra= best_s.mb_intra;
2846                         s->mv[0][0][0] = best_s.mv[0][0][0];
2847                         s->mv[0][0][1] = best_s.mv[0][0][1];
2848                         s->mv[1][0][0] = best_s.mv[1][0][0];
2849                         s->mv[1][0][1] = best_s.mv[1][0][1];
2850
2851                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2852                         for(; qpi<4; qpi++){
2853                             int dquant= dquant_tab[qpi];
2854                             qp= last_qp + dquant;
2855                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2856                                 continue;
2857                             backup_s.dquant= dquant;
2858                             if(s->mb_intra && s->dc_val[0]){
2859                                 for(i=0; i<6; i++){
2860                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2861                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2862                                 }
2863                             }
2864
2865                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2866                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2867                             if(best_s.qscale != qp){
2868                                 if(s->mb_intra && s->dc_val[0]){
2869                                     for(i=0; i<6; i++){
2870                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2871                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2872                                     }
2873                                 }
2874                             }
2875                         }
2876                     }
2877                 }
2878                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2879                     int mx= s->b_direct_mv_table[xy][0];
2880                     int my= s->b_direct_mv_table[xy][1];
2881
2882                     backup_s.dquant = 0;
2883                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2884                     s->mb_intra= 0;
2885                     ff_mpeg4_set_direct_mv(s, mx, my);
2886                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2887                                  &dmin, &next_block, mx, my);
2888                 }
2889                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2890                     backup_s.dquant = 0;
2891                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2892                     s->mb_intra= 0;
2893                     ff_mpeg4_set_direct_mv(s, 0, 0);
2894                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2895                                  &dmin, &next_block, 0, 0);
2896                 }
2897                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2898                     int coded=0;
2899                     for(i=0; i<6; i++)
2900                         coded |= s->block_last_index[i];
2901                     if(coded){
2902                         int mx,my;
2903                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2904                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2905                             mx=my=0; //FIXME find the one we actually used
2906                             ff_mpeg4_set_direct_mv(s, mx, my);
2907                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2908                             mx= s->mv[1][0][0];
2909                             my= s->mv[1][0][1];
2910                         }else{
2911                             mx= s->mv[0][0][0];
2912                             my= s->mv[0][0][1];
2913                         }
2914
2915                         s->mv_dir= best_s.mv_dir;
2916                         s->mv_type = best_s.mv_type;
2917                         s->mb_intra= 0;
2918 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2919                         s->mv[0][0][1] = best_s.mv[0][0][1];
2920                         s->mv[1][0][0] = best_s.mv[1][0][0];
2921                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2922                         backup_s.dquant= 0;
2923                         s->skipdct=1;
2924                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2925                                         &dmin, &next_block, mx, my);
2926                         s->skipdct=0;
2927                     }
2928                 }
2929
2930                 s->current_picture.qscale_table[xy] = best_s.qscale;
2931
2932                 copy_context_after_encode(s, &best_s, -1);
2933
2934                 pb_bits_count= put_bits_count(&s->pb);
2935                 flush_put_bits(&s->pb);
2936                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2937                 s->pb= backup_s.pb;
2938
2939                 if(s->data_partitioning){
2940                     pb2_bits_count= put_bits_count(&s->pb2);
2941                     flush_put_bits(&s->pb2);
2942                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2943                     s->pb2= backup_s.pb2;
2944
2945                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2946                     flush_put_bits(&s->tex_pb);
2947                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2948                     s->tex_pb= backup_s.tex_pb;
2949                 }
2950                 s->last_bits= put_bits_count(&s->pb);
2951
2952                 if (CONFIG_H263_ENCODER &&
2953                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2954                     ff_h263_update_motion_val(s);
2955
2956                 if(next_block==0){ //FIXME 16 vs linesize16
2957                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2958                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2959                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2960                 }
2961
2962                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2963                     ff_MPV_decode_mb(s, s->block);
2964             } else {
2965                 int motion_x = 0, motion_y = 0;
2966                 s->mv_type=MV_TYPE_16X16;
2967                 // only one MB-Type possible
2968
2969                 switch(mb_type){
2970                 case CANDIDATE_MB_TYPE_INTRA:
2971                     s->mv_dir = 0;
2972                     s->mb_intra= 1;
2973                     motion_x= s->mv[0][0][0] = 0;
2974                     motion_y= s->mv[0][0][1] = 0;
2975                     break;
2976                 case CANDIDATE_MB_TYPE_INTER:
2977                     s->mv_dir = MV_DIR_FORWARD;
2978                     s->mb_intra= 0;
2979                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2980                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2981                     break;
2982                 case CANDIDATE_MB_TYPE_INTER_I:
2983                     s->mv_dir = MV_DIR_FORWARD;
2984                     s->mv_type = MV_TYPE_FIELD;
2985                     s->mb_intra= 0;
2986                     for(i=0; i<2; i++){
2987                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2988                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2989                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2990                     }
2991                     break;
2992                 case CANDIDATE_MB_TYPE_INTER4V:
2993                     s->mv_dir = MV_DIR_FORWARD;
2994                     s->mv_type = MV_TYPE_8X8;
2995                     s->mb_intra= 0;
2996                     for(i=0; i<4; i++){
2997                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2998                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2999                     }
3000                     break;
3001                 case CANDIDATE_MB_TYPE_DIRECT:
3002                     if (CONFIG_MPEG4_ENCODER) {
3003                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3004                         s->mb_intra= 0;
3005                         motion_x=s->b_direct_mv_table[xy][0];
3006                         motion_y=s->b_direct_mv_table[xy][1];
3007                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3008                     }
3009                     break;
3010                 case CANDIDATE_MB_TYPE_DIRECT0:
3011                     if (CONFIG_MPEG4_ENCODER) {
3012                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3013                         s->mb_intra= 0;
3014                         ff_mpeg4_set_direct_mv(s, 0, 0);
3015                     }
3016                     break;
3017                 case CANDIDATE_MB_TYPE_BIDIR:
3018                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3019                     s->mb_intra= 0;
3020                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3021                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3022                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3023                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3024                     break;
3025                 case CANDIDATE_MB_TYPE_BACKWARD:
3026                     s->mv_dir = MV_DIR_BACKWARD;
3027                     s->mb_intra= 0;
3028                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3029                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3030                     break;
3031                 case CANDIDATE_MB_TYPE_FORWARD:
3032                     s->mv_dir = MV_DIR_FORWARD;
3033                     s->mb_intra= 0;
3034                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3035                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3036                     break;
3037                 case CANDIDATE_MB_TYPE_FORWARD_I:
3038                     s->mv_dir = MV_DIR_FORWARD;
3039                     s->mv_type = MV_TYPE_FIELD;
3040                     s->mb_intra= 0;
3041                     for(i=0; i<2; i++){
3042                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3043                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3044                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3045                     }
3046                     break;
3047                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3048                     s->mv_dir = MV_DIR_BACKWARD;
3049                     s->mv_type = MV_TYPE_FIELD;
3050                     s->mb_intra= 0;
3051                     for(i=0; i<2; i++){
3052                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3053                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3054                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3055                     }
3056                     break;
3057                 case CANDIDATE_MB_TYPE_BIDIR_I:
3058                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3059                     s->mv_type = MV_TYPE_FIELD;
3060                     s->mb_intra= 0;
3061                     for(dir=0; dir<2; dir++){
3062                         for(i=0; i<2; i++){
3063                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3064                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3065                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3066                         }
3067                     }
3068                     break;
3069                 default:
3070                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3071                 }
3072
3073                 encode_mb(s, motion_x, motion_y);
3074
3075                 // RAL: Update last macroblock type
3076                 s->last_mv_dir = s->mv_dir;
3077
3078                 if (CONFIG_H263_ENCODER &&
3079                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3080                     ff_h263_update_motion_val(s);
3081
3082                 ff_MPV_decode_mb(s, s->block);
3083             }
3084
3085             /* clean the MV table in IPS frames for direct mode in B frames */
3086             if(s->mb_intra /* && I,P,S_TYPE */){
3087                 s->p_mv_table[xy][0]=0;
3088                 s->p_mv_table[xy][1]=0;
3089             }
3090
3091             if(s->flags&CODEC_FLAG_PSNR){
3092                 int w= 16;
3093                 int h= 16;
3094
3095                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3096                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3097
3098                 s->current_picture.f->error[0] += sse(
3099                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3100                     s->dest[0], w, h, s->linesize);
3101                 s->current_picture.f->error[1] += sse(
3102                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3103                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3104                 s->current_picture.f->error[2] += sse(
3105                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3106                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3107             }
3108             if(s->loop_filter){
3109                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3110                     ff_h263_loop_filter(s);
3111             }
3112             av_dlog(s->avctx, "MB %d %d bits\n",
3113                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3114         }
3115     }
3116
3117     //not beautiful here but we must write it before flushing so it has to be here
3118     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3119         ff_msmpeg4_encode_ext_header(s);
3120
3121     write_slice_end(s);
3122
3123     /* Send the last GOB if RTP */
3124     if (s->avctx->rtp_callback) {
3125         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3126         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3127         /* Call the RTP callback to send the last GOB */
3128         emms_c();
3129         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3130     }
3131
3132     return 0;
3133 }
3134
3135 #define MERGE(field) dst->field += src->field; src->field=0
3136 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3137     MERGE(me.scene_change_score);
3138     MERGE(me.mc_mb_var_sum_temp);
3139     MERGE(me.mb_var_sum_temp);
3140 }
3141
3142 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3143     int i;
3144
3145     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3146     MERGE(dct_count[1]);
3147     MERGE(mv_bits);
3148     MERGE(i_tex_bits);
3149     MERGE(p_tex_bits);
3150     MERGE(i_count);
3151     MERGE(f_count);
3152     MERGE(b_count);
3153     MERGE(skip_count);
3154     MERGE(misc_bits);
3155     MERGE(er.error_count);
3156     MERGE(padding_bug_score);
3157     MERGE(current_picture.f->error[0]);
3158     MERGE(current_picture.f->error[1]);
3159     MERGE(current_picture.f->error[2]);
3160
3161     if(dst->avctx->noise_reduction){
3162         for(i=0; i<64; i++){
3163             MERGE(dct_error_sum[0][i]);
3164             MERGE(dct_error_sum[1][i]);
3165         }
3166     }
3167
3168     assert(put_bits_count(&src->pb) % 8 ==0);
3169     assert(put_bits_count(&dst->pb) % 8 ==0);
3170     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3171     flush_put_bits(&dst->pb);
3172 }
3173
3174 static int estimate_qp(MpegEncContext *s, int dry_run){
3175     if (s->next_lambda){
3176         s->current_picture_ptr->f->quality =
3177         s->current_picture.f->quality = s->next_lambda;
3178         if(!dry_run) s->next_lambda= 0;
3179     } else if (!s->fixed_qscale) {
3180         s->current_picture_ptr->f->quality =
3181         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3182         if (s->current_picture.f->quality < 0)
3183             return -1;
3184     }
3185
3186     if(s->adaptive_quant){
3187         switch(s->codec_id){
3188         case AV_CODEC_ID_MPEG4:
3189             if (CONFIG_MPEG4_ENCODER)
3190                 ff_clean_mpeg4_qscales(s);
3191             break;
3192         case AV_CODEC_ID_H263:
3193         case AV_CODEC_ID_H263P:
3194         case AV_CODEC_ID_FLV1:
3195             if (CONFIG_H263_ENCODER)
3196                 ff_clean_h263_qscales(s);
3197             break;
3198         default:
3199             ff_init_qscale_tab(s);
3200         }
3201
3202         s->lambda= s->lambda_table[0];
3203         //FIXME broken
3204     }else
3205         s->lambda = s->current_picture.f->quality;
3206     update_qscale(s);
3207     return 0;
3208 }
3209
3210 /* must be called before writing the header */
3211 static void set_frame_distances(MpegEncContext * s){
3212     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3213     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3214
3215     if(s->pict_type==AV_PICTURE_TYPE_B){
3216         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3217         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3218     }else{
3219         s->pp_time= s->time - s->last_non_b_time;
3220         s->last_non_b_time= s->time;
3221         assert(s->picture_number==0 || s->pp_time > 0);
3222     }
3223 }
3224
3225 static int encode_picture(MpegEncContext *s, int picture_number)
3226 {
3227     int i, ret;
3228     int bits;
3229     int context_count = s->slice_context_count;
3230
3231     s->picture_number = picture_number;
3232
3233     /* Reset the average MB variance */
3234     s->me.mb_var_sum_temp    =
3235     s->me.mc_mb_var_sum_temp = 0;
3236
3237     /* we need to initialize some time vars before we can encode b-frames */
3238     // RAL: Condition added for MPEG1VIDEO
3239     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3240         set_frame_distances(s);
3241     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3242         ff_set_mpeg4_time(s);
3243
3244     s->me.scene_change_score=0;
3245
3246 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3247
3248     if(s->pict_type==AV_PICTURE_TYPE_I){
3249         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3250         else                        s->no_rounding=0;
3251     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3252         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3253             s->no_rounding ^= 1;
3254     }
3255
3256     if(s->flags & CODEC_FLAG_PASS2){
3257         if (estimate_qp(s,1) < 0)
3258             return -1;
3259         ff_get_2pass_fcode(s);
3260     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3261         if(s->pict_type==AV_PICTURE_TYPE_B)
3262             s->lambda= s->last_lambda_for[s->pict_type];
3263         else
3264             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3265         update_qscale(s);
3266     }
3267
3268     s->mb_intra=0; //for the rate distortion & bit compare functions
3269     for(i=1; i<context_count; i++){
3270         ret = ff_update_duplicate_context(s->thread_context[i], s);
3271         if (ret < 0)
3272             return ret;
3273     }
3274
3275     if(ff_init_me(s)<0)
3276         return -1;
3277
3278     /* Estimate motion for every MB */
3279     if(s->pict_type != AV_PICTURE_TYPE_I){
3280         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3281         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3282         if (s->pict_type != AV_PICTURE_TYPE_B) {
3283             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3284                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3285             }
3286         }
3287
3288         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3289     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3290         /* I-Frame */
3291         for(i=0; i<s->mb_stride*s->mb_height; i++)
3292             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3293
3294         if(!s->fixed_qscale){
3295             /* finding spatial complexity for I-frame rate control */
3296             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3297         }
3298     }
3299     for(i=1; i<context_count; i++){
3300         merge_context_after_me(s, s->thread_context[i]);
3301     }
3302     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3303     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3304     emms_c();
3305
3306     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3307         s->pict_type= AV_PICTURE_TYPE_I;
3308         for(i=0; i<s->mb_stride*s->mb_height; i++)
3309             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3310         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3311                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3312     }
3313
3314     if(!s->umvplus){
3315         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3316             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3317
3318             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3319                 int a,b;
3320                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3321                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3322                 s->f_code= FFMAX3(s->f_code, a, b);
3323             }
3324
3325             ff_fix_long_p_mvs(s);
3326             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3327             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3328                 int j;
3329                 for(i=0; i<2; i++){
3330                     for(j=0; j<2; j++)
3331                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3332                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3333                 }
3334             }
3335         }
3336
3337         if(s->pict_type==AV_PICTURE_TYPE_B){
3338             int a, b;
3339
3340             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3341             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3342             s->f_code = FFMAX(a, b);
3343
3344             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3345             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3346             s->b_code = FFMAX(a, b);
3347
3348             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3349             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3350             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3351             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3352             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3353                 int dir, j;
3354                 for(dir=0; dir<2; dir++){
3355                     for(i=0; i<2; i++){
3356                         for(j=0; j<2; j++){
3357                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3358                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3359                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3360                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3361                         }
3362                     }
3363                 }
3364             }
3365         }
3366     }
3367
3368     if (estimate_qp(s, 0) < 0)
3369         return -1;
3370
3371     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3372         s->qscale= 3; //reduce clipping problems
3373
3374     if (s->out_format == FMT_MJPEG) {
3375         /* for mjpeg, we do include qscale in the matrix */
3376         for(i=1;i<64;i++){
3377             int j = s->idsp.idct_permutation[i];
3378
3379             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3380         }
3381         s->y_dc_scale_table=
3382         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3383         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3384         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3385                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3386         s->qscale= 8;
3387     }
3388
3389     //FIXME var duplication
3390     s->current_picture_ptr->f->key_frame =
3391     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3392     s->current_picture_ptr->f->pict_type =
3393     s->current_picture.f->pict_type = s->pict_type;
3394
3395     if (s->current_picture.f->key_frame)
3396         s->picture_in_gop_number=0;
3397
3398     s->last_bits= put_bits_count(&s->pb);
3399     switch(s->out_format) {
3400     case FMT_MJPEG:
3401         if (CONFIG_MJPEG_ENCODER)
3402             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3403                                            s->intra_matrix);
3404         break;
3405     case FMT_H261:
3406         if (CONFIG_H261_ENCODER)
3407             ff_h261_encode_picture_header(s, picture_number);
3408         break;
3409     case FMT_H263:
3410         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3411             ff_wmv2_encode_picture_header(s, picture_number);
3412         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3413             ff_msmpeg4_encode_picture_header(s, picture_number);
3414         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3415             ff_mpeg4_encode_picture_header(s, picture_number);
3416         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3417             ff_rv10_encode_picture_header(s, picture_number);
3418         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3419             ff_rv20_encode_picture_header(s, picture_number);
3420         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3421             ff_flv_encode_picture_header(s, picture_number);
3422         else if (CONFIG_H263_ENCODER)
3423             ff_h263_encode_picture_header(s, picture_number);
3424         break;
3425     case FMT_MPEG1:
3426         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3427             ff_mpeg1_encode_picture_header(s, picture_number);
3428         break;
3429     default:
3430         assert(0);
3431     }
3432     bits= put_bits_count(&s->pb);
3433     s->header_bits= bits - s->last_bits;
3434
3435     for(i=1; i<context_count; i++){
3436         update_duplicate_context_after_me(s->thread_context[i], s);
3437     }
3438     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3439     for(i=1; i<context_count; i++){
3440         merge_context_after_encode(s, s->thread_context[i]);
3441     }
3442     emms_c();
3443     return 0;
3444 }
3445
3446 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3447     const int intra= s->mb_intra;
3448     int i;
3449
3450     s->dct_count[intra]++;
3451
3452     for(i=0; i<64; i++){
3453         int level= block[i];
3454
3455         if(level){
3456             if(level>0){
3457                 s->dct_error_sum[intra][i] += level;
3458                 level -= s->dct_offset[intra][i];
3459                 if(level<0) level=0;
3460             }else{
3461                 s->dct_error_sum[intra][i] -= level;
3462                 level += s->dct_offset[intra][i];
3463                 if(level>0) level=0;
3464             }
3465             block[i]= level;
3466         }
3467     }
3468 }
3469
3470 static int dct_quantize_trellis_c(MpegEncContext *s,
3471                                   int16_t *block, int n,
3472                                   int qscale, int *overflow){
3473     const int *qmat;
3474     const uint8_t *scantable= s->intra_scantable.scantable;
3475     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3476     int max=0;
3477     unsigned int threshold1, threshold2;
3478     int bias=0;
3479     int run_tab[65];
3480     int level_tab[65];
3481     int score_tab[65];
3482     int survivor[65];
3483     int survivor_count;
3484     int last_run=0;
3485     int last_level=0;
3486     int last_score= 0;
3487     int last_i;
3488     int coeff[2][64];
3489     int coeff_count[64];
3490     int qmul, qadd, start_i, last_non_zero, i, dc;
3491     const int esc_length= s->ac_esc_length;
3492     uint8_t * length;
3493     uint8_t * last_length;
3494     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3495
3496     s->fdsp.fdct(block);
3497
3498     if(s->dct_error_sum)
3499         s->denoise_dct(s, block);
3500     qmul= qscale*16;
3501     qadd= ((qscale-1)|1)*8;
3502
3503     if (s->mb_intra) {
3504         int q;
3505         if (!s->h263_aic) {
3506             if (n < 4)
3507                 q = s->y_dc_scale;
3508             else
3509                 q = s->c_dc_scale;
3510             q = q << 3;
3511         } else{
3512             /* For AIC we skip quant/dequant of INTRADC */
3513             q = 1 << 3;
3514             qadd=0;
3515         }
3516
3517         /* note: block[0] is assumed to be positive */
3518         block[0] = (block[0] + (q >> 1)) / q;
3519         start_i = 1;
3520         last_non_zero = 0;
3521         qmat = s->q_intra_matrix[qscale];
3522         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3523             bias= 1<<(QMAT_SHIFT-1);
3524         length     = s->intra_ac_vlc_length;
3525         last_length= s->intra_ac_vlc_last_length;
3526     } else {
3527         start_i = 0;
3528         last_non_zero = -1;
3529         qmat = s->q_inter_matrix[qscale];
3530         length     = s->inter_ac_vlc_length;
3531         last_length= s->inter_ac_vlc_last_length;
3532     }
3533     last_i= start_i;
3534
3535     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3536     threshold2= (threshold1<<1);
3537
3538     for(i=63; i>=start_i; i--) {
3539         const int j = scantable[i];
3540         int level = block[j] * qmat[j];
3541
3542         if(((unsigned)(level+threshold1))>threshold2){
3543             last_non_zero = i;
3544             break;
3545         }
3546     }
3547
3548     for(i=start_i; i<=last_non_zero; i++) {
3549         const int j = scantable[i];
3550         int level = block[j] * qmat[j];
3551
3552 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3553 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3554         if(((unsigned)(level+threshold1))>threshold2){
3555             if(level>0){
3556                 level= (bias + level)>>QMAT_SHIFT;
3557                 coeff[0][i]= level;
3558                 coeff[1][i]= level-1;
3559 //                coeff[2][k]= level-2;
3560             }else{
3561                 level= (bias - level)>>QMAT_SHIFT;
3562                 coeff[0][i]= -level;
3563                 coeff[1][i]= -level+1;
3564 //                coeff[2][k]= -level+2;
3565             }
3566             coeff_count[i]= FFMIN(level, 2);
3567             assert(coeff_count[i]);
3568             max |=level;
3569         }else{
3570             coeff[0][i]= (level>>31)|1;
3571             coeff_count[i]= 1;
3572         }
3573     }
3574
3575     *overflow= s->max_qcoeff < max; //overflow might have happened
3576
3577     if(last_non_zero < start_i){
3578         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3579         return last_non_zero;
3580     }
3581
3582     score_tab[start_i]= 0;
3583     survivor[0]= start_i;
3584     survivor_count= 1;
3585
3586     for(i=start_i; i<=last_non_zero; i++){
3587         int level_index, j, zero_distortion;
3588         int dct_coeff= FFABS(block[ scantable[i] ]);
3589         int best_score=256*256*256*120;
3590
3591         if (s->fdsp.fdct == ff_fdct_ifast)
3592             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3593         zero_distortion= dct_coeff*dct_coeff;
3594
3595         for(level_index=0; level_index < coeff_count[i]; level_index++){
3596             int distortion;
3597             int level= coeff[level_index][i];
3598             const int alevel= FFABS(level);
3599             int unquant_coeff;
3600
3601             assert(level);
3602
3603             if(s->out_format == FMT_H263){
3604                 unquant_coeff= alevel*qmul + qadd;
3605             }else{ //MPEG1
3606                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3607                 if(s->mb_intra){
3608                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3609                         unquant_coeff =   (unquant_coeff - 1) | 1;
3610                 }else{
3611                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3612                         unquant_coeff =   (unquant_coeff - 1) | 1;
3613                 }
3614                 unquant_coeff<<= 3;
3615             }
3616
3617             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3618             level+=64;
3619             if((level&(~127)) == 0){
3620                 for(j=survivor_count-1; j>=0; j--){
3621                     int run= i - survivor[j];
3622                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3623                     score += score_tab[i-run];
3624
3625                     if(score < best_score){
3626                         best_score= score;
3627                         run_tab[i+1]= run;
3628                         level_tab[i+1]= level-64;
3629                     }
3630                 }
3631
3632                 if(s->out_format == FMT_H263){
3633                     for(j=survivor_count-1; j>=0; j--){
3634                         int run= i - survivor[j];
3635                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3636                         score += score_tab[i-run];
3637                         if(score < last_score){
3638                             last_score= score;
3639                             last_run= run;
3640                             last_level= level-64;
3641                             last_i= i+1;
3642                         }
3643                     }
3644                 }
3645             }else{
3646                 distortion += esc_length*lambda;
3647                 for(j=survivor_count-1; j>=0; j--){
3648                     int run= i - survivor[j];
3649                     int score= distortion + score_tab[i-run];
3650
3651                     if(score < best_score){
3652                         best_score= score;
3653                         run_tab[i+1]= run;
3654                         level_tab[i+1]= level-64;
3655                     }
3656                 }
3657
3658                 if(s->out_format == FMT_H263){
3659                   for(j=survivor_count-1; j>=0; j--){
3660                         int run= i - survivor[j];
3661                         int score= distortion + score_tab[i-run];
3662                         if(score < last_score){
3663                             last_score= score;
3664                             last_run= run;
3665                             last_level= level-64;
3666                             last_i= i+1;
3667                         }
3668                     }
3669                 }
3670             }
3671         }
3672
3673         score_tab[i+1]= best_score;
3674
3675         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3676         if(last_non_zero <= 27){
3677             for(; survivor_count; survivor_count--){
3678                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3679                     break;
3680             }
3681         }else{
3682             for(; survivor_count; survivor_count--){
3683                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3684                     break;
3685             }
3686         }
3687
3688         survivor[ survivor_count++ ]= i+1;
3689     }
3690
3691     if(s->out_format != FMT_H263){
3692         last_score= 256*256*256*120;
3693         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3694             int score= score_tab[i];
3695             if(i) score += lambda*2; //FIXME exacter?
3696
3697             if(score < last_score){
3698                 last_score= score;
3699                 last_i= i;
3700                 last_level= level_tab[i];
3701                 last_run= run_tab[i];
3702             }
3703         }
3704     }
3705
3706     s->coded_score[n] = last_score;
3707
3708     dc= FFABS(block[0]);
3709     last_non_zero= last_i - 1;
3710     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3711
3712     if(last_non_zero < start_i)
3713         return last_non_zero;
3714
3715     if(last_non_zero == 0 && start_i == 0){
3716         int best_level= 0;
3717         int best_score= dc * dc;
3718
3719         for(i=0; i<coeff_count[0]; i++){
3720             int level= coeff[i][0];
3721             int alevel= FFABS(level);
3722             int unquant_coeff, score, distortion;
3723
3724             if(s->out_format == FMT_H263){
3725                     unquant_coeff= (alevel*qmul + qadd)>>3;
3726             }else{ //MPEG1
3727                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3728                     unquant_coeff =   (unquant_coeff - 1) | 1;
3729             }
3730             unquant_coeff = (unquant_coeff + 4) >> 3;
3731             unquant_coeff<<= 3 + 3;
3732
3733             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3734             level+=64;
3735             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3736             else                    score= distortion + esc_length*lambda;
3737
3738             if(score < best_score){
3739                 best_score= score;
3740                 best_level= level - 64;
3741             }
3742         }
3743         block[0]= best_level;
3744         s->coded_score[n] = best_score - dc*dc;
3745         if(best_level == 0) return -1;
3746         else                return last_non_zero;
3747     }
3748
3749     i= last_i;
3750     assert(last_level);
3751
3752     block[ perm_scantable[last_non_zero] ]= last_level;
3753     i -= last_run + 1;
3754
3755     for(; i>start_i; i -= run_tab[i] + 1){
3756         block[ perm_scantable[i-1] ]= level_tab[i];
3757     }
3758
3759     return last_non_zero;
3760 }
3761
3762 //#define REFINE_STATS 1
3763 static int16_t basis[64][64];
3764
3765 static void build_basis(uint8_t *perm){
3766     int i, j, x, y;
3767     emms_c();
3768     for(i=0; i<8; i++){
3769         for(j=0; j<8; j++){
3770             for(y=0; y<8; y++){
3771                 for(x=0; x<8; x++){
3772                     double s= 0.25*(1<<BASIS_SHIFT);
3773                     int index= 8*i + j;
3774                     int perm_index= perm[index];
3775                     if(i==0) s*= sqrt(0.5);
3776                     if(j==0) s*= sqrt(0.5);
3777                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3778                 }
3779             }
3780         }
3781     }
3782 }
3783
3784 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3785                         int16_t *block, int16_t *weight, int16_t *orig,
3786                         int n, int qscale){
3787     int16_t rem[64];
3788     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3789     const uint8_t *scantable= s->intra_scantable.scantable;
3790     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3791 //    unsigned int threshold1, threshold2;
3792 //    int bias=0;
3793     int run_tab[65];
3794     int prev_run=0;
3795     int prev_level=0;
3796     int qmul, qadd, start_i, last_non_zero, i, dc;
3797     uint8_t * length;
3798     uint8_t * last_length;
3799     int lambda;
3800     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3801 #ifdef REFINE_STATS
3802 static int count=0;
3803 static int after_last=0;
3804 static int to_zero=0;
3805 static int from_zero=0;
3806 static int raise=0;
3807 static int lower=0;
3808 static int messed_sign=0;
3809 #endif
3810
3811     if(basis[0][0] == 0)
3812         build_basis(s->idsp.idct_permutation);
3813
3814     qmul= qscale*2;
3815     qadd= (qscale-1)|1;
3816     if (s->mb_intra) {
3817         if (!s->h263_aic) {
3818             if (n < 4)
3819                 q = s->y_dc_scale;
3820             else
3821                 q = s->c_dc_scale;
3822         } else{
3823             /* For AIC we skip quant/dequant of INTRADC */
3824             q = 1;
3825             qadd=0;
3826         }
3827         q <<= RECON_SHIFT-3;
3828         /* note: block[0] is assumed to be positive */
3829         dc= block[0]*q;
3830 //        block[0] = (block[0] + (q >> 1)) / q;
3831         start_i = 1;
3832 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3833 //            bias= 1<<(QMAT_SHIFT-1);
3834         length     = s->intra_ac_vlc_length;
3835         last_length= s->intra_ac_vlc_last_length;
3836     } else {
3837         dc= 0;
3838         start_i = 0;
3839         length     = s->inter_ac_vlc_length;
3840         last_length= s->inter_ac_vlc_last_length;
3841     }
3842     last_non_zero = s->block_last_index[n];
3843
3844 #ifdef REFINE_STATS
3845 {START_TIMER
3846 #endif
3847     dc += (1<<(RECON_SHIFT-1));
3848     for(i=0; i<64; i++){
3849         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3850     }
3851 #ifdef REFINE_STATS
3852 STOP_TIMER("memset rem[]")}
3853 #endif
3854     sum=0;
3855     for(i=0; i<64; i++){
3856         int one= 36;
3857         int qns=4;
3858         int w;
3859
3860         w= FFABS(weight[i]) + qns*one;
3861         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3862
3863         weight[i] = w;
3864 //        w=weight[i] = (63*qns + (w/2)) / w;
3865
3866         assert(w>0);
3867         assert(w<(1<<6));
3868         sum += w*w;
3869     }
3870     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3871 #ifdef REFINE_STATS
3872 {START_TIMER
3873 #endif
3874     run=0;
3875     rle_index=0;
3876     for(i=start_i; i<=last_non_zero; i++){
3877         int j= perm_scantable[i];
3878         const int level= block[j];
3879         int coeff;
3880
3881         if(level){
3882             if(level<0) coeff= qmul*level - qadd;
3883             else        coeff= qmul*level + qadd;
3884             run_tab[rle_index++]=run;
3885             run=0;
3886
3887             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
3888         }else{
3889             run++;
3890         }
3891     }
3892 #ifdef REFINE_STATS
3893 if(last_non_zero>0){
3894 STOP_TIMER("init rem[]")
3895 }
3896 }
3897
3898 {START_TIMER
3899 #endif
3900     for(;;){
3901         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
3902         int best_coeff=0;
3903         int best_change=0;
3904         int run2, best_unquant_change=0, analyze_gradient;
3905 #ifdef REFINE_STATS
3906 {START_TIMER
3907 #endif
3908         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3909
3910         if(analyze_gradient){
3911 #ifdef REFINE_STATS
3912 {START_TIMER
3913 #endif
3914             for(i=0; i<64; i++){
3915                 int w= weight[i];
3916
3917                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3918             }
3919 #ifdef REFINE_STATS
3920 STOP_TIMER("rem*w*w")}
3921 {START_TIMER
3922 #endif
3923             s->fdsp.fdct(d1);
3924 #ifdef REFINE_STATS
3925 STOP_TIMER("dct")}
3926 #endif
3927         }
3928
3929         if(start_i){
3930             const int level= block[0];
3931             int change, old_coeff;
3932
3933             assert(s->mb_intra);
3934
3935             old_coeff= q*level;
3936
3937             for(change=-1; change<=1; change+=2){
3938                 int new_level= level + change;
3939                 int score, new_coeff;
3940
3941                 new_coeff= q*new_level;
3942                 if(new_coeff >= 2048 || new_coeff < 0)
3943                     continue;
3944
3945                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
3946                                                   new_coeff - old_coeff);
3947                 if(score<best_score){
3948                     best_score= score;
3949                     best_coeff= 0;
3950                     best_change= change;
3951                     best_unquant_change= new_coeff - old_coeff;
3952                 }
3953             }
3954         }
3955
3956         run=0;
3957         rle_index=0;
3958         run2= run_tab[rle_index++];
3959         prev_level=0;
3960         prev_run=0;
3961
3962         for(i=start_i; i<64; i++){
3963             int j= perm_scantable[i];
3964             const int level= block[j];
3965             int change, old_coeff;
3966
3967             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3968                 break;
3969
3970             if(level){
3971                 if(level<0) old_coeff= qmul*level - qadd;
3972                 else        old_coeff= qmul*level + qadd;
3973                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3974             }else{
3975                 old_coeff=0;
3976                 run2--;
3977                 assert(run2>=0 || i >= last_non_zero );
3978             }
3979
3980             for(change=-1; change<=1; change+=2){
3981                 int new_level= level + change;
3982                 int score, new_coeff, unquant_change;
3983
3984                 score=0;
3985                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3986                    continue;
3987
3988                 if(new_level){
3989                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3990                     else            new_coeff= qmul*new_level + qadd;
3991                     if(new_coeff >= 2048 || new_coeff <= -2048)
3992                         continue;
3993                     //FIXME check for overflow
3994
3995                     if(level){
3996                         if(level < 63 && level > -63){
3997                             if(i < last_non_zero)
3998                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3999                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4000                             else
4001                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4002                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4003                         }
4004                     }else{
4005                         assert(FFABS(new_level)==1);
4006
4007                         if(analyze_gradient){
4008                             int g= d1[ scantable[i] ];
4009                             if(g && (g^new_level) >= 0)
4010                                 continue;
4011                         }
4012
4013                         if(i < last_non_zero){
4014                             int next_i= i + run2 + 1;
4015                             int next_level= block[ perm_scantable[next_i] ] + 64;
4016
4017                             if(next_level&(~127))
4018                                 next_level= 0;
4019
4020                             if(next_i < last_non_zero)
4021                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4022                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4023                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4024                             else
4025                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4026                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4027                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4028                         }else{
4029                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4030                             if(prev_level){
4031                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4032                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4033                             }
4034                         }
4035                     }
4036                 }else{
4037                     new_coeff=0;
4038                     assert(FFABS(level)==1);
4039
4040                     if(i < last_non_zero){
4041                         int next_i= i + run2 + 1;
4042                         int next_level= block[ perm_scantable[next_i] ] + 64;
4043
4044                         if(next_level&(~127))
4045                             next_level= 0;
4046
4047                         if(next_i < last_non_zero)
4048                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4049                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4050                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4051                         else
4052                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4053                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4054                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4055                     }else{
4056                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4057                         if(prev_level){
4058                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4059                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4060                         }
4061                     }
4062                 }
4063
4064                 score *= lambda;
4065
4066                 unquant_change= new_coeff - old_coeff;
4067                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4068
4069                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4070                                                    unquant_change);
4071                 if(score<best_score){
4072                     best_score= score;
4073                     best_coeff= i;
4074                     best_change= change;
4075                     best_unquant_change= unquant_change;
4076                 }
4077             }
4078             if(level){
4079                 prev_level= level + 64;
4080                 if(prev_level&(~127))
4081                     prev_level= 0;
4082                 prev_run= run;
4083                 run=0;
4084             }else{
4085                 run++;
4086             }
4087         }
4088 #ifdef REFINE_STATS
4089 STOP_TIMER("iterative step")}
4090 #endif
4091
4092         if(best_change){
4093             int j= perm_scantable[ best_coeff ];
4094
4095             block[j] += best_change;
4096
4097             if(best_coeff > last_non_zero){
4098                 last_non_zero= best_coeff;
4099                 assert(block[j]);
4100 #ifdef REFINE_STATS
4101 after_last++;
4102 #endif
4103             }else{
4104 #ifdef REFINE_STATS
4105 if(block[j]){
4106     if(block[j] - best_change){
4107         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4108             raise++;
4109         }else{
4110             lower++;
4111         }
4112     }else{
4113         from_zero++;
4114     }
4115 }else{
4116     to_zero++;
4117 }
4118 #endif
4119                 for(; last_non_zero>=start_i; last_non_zero--){
4120                     if(block[perm_scantable[last_non_zero]])
4121                         break;
4122                 }
4123             }
4124 #ifdef REFINE_STATS
4125 count++;
4126 if(256*256*256*64 % count == 0){
4127     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4128 }
4129 #endif
4130             run=0;
4131             rle_index=0;
4132             for(i=start_i; i<=last_non_zero; i++){
4133                 int j= perm_scantable[i];
4134                 const int level= block[j];
4135
4136                  if(level){
4137                      run_tab[rle_index++]=run;
4138                      run=0;
4139                  }else{
4140                      run++;
4141                  }
4142             }
4143
4144             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4145         }else{
4146             break;
4147         }
4148     }
4149 #ifdef REFINE_STATS
4150 if(last_non_zero>0){
4151 STOP_TIMER("iterative search")
4152 }
4153 }
4154 #endif
4155
4156     return last_non_zero;
4157 }
4158
4159 int ff_dct_quantize_c(MpegEncContext *s,
4160                         int16_t *block, int n,
4161                         int qscale, int *overflow)
4162 {
4163     int i, j, level, last_non_zero, q, start_i;
4164     const int *qmat;
4165     const uint8_t *scantable= s->intra_scantable.scantable;
4166     int bias;
4167     int max=0;
4168     unsigned int threshold1, threshold2;
4169
4170     s->fdsp.fdct(block);
4171
4172     if(s->dct_error_sum)
4173         s->denoise_dct(s, block);
4174
4175     if (s->mb_intra) {
4176         if (!s->h263_aic) {
4177             if (n < 4)
4178                 q = s->y_dc_scale;
4179             else
4180                 q = s->c_dc_scale;
4181             q = q << 3;
4182         } else
4183             /* For AIC we skip quant/dequant of INTRADC */
4184             q = 1 << 3;
4185
4186         /* note: block[0] is assumed to be positive */
4187         block[0] = (block[0] + (q >> 1)) / q;
4188         start_i = 1;
4189         last_non_zero = 0;
4190         qmat = s->q_intra_matrix[qscale];
4191         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4192     } else {
4193         start_i = 0;
4194         last_non_zero = -1;
4195         qmat = s->q_inter_matrix[qscale];
4196         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4197     }
4198     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4199     threshold2= (threshold1<<1);
4200     for(i=63;i>=start_i;i--) {
4201         j = scantable[i];
4202         level = block[j] * qmat[j];
4203
4204         if(((unsigned)(level+threshold1))>threshold2){
4205             last_non_zero = i;
4206             break;
4207         }else{
4208             block[j]=0;
4209         }
4210     }
4211     for(i=start_i; i<=last_non_zero; i++) {
4212         j = scantable[i];
4213         level = block[j] * qmat[j];
4214
4215 //        if(   bias+level >= (1<<QMAT_SHIFT)
4216 //           || bias-level >= (1<<QMAT_SHIFT)){
4217         if(((unsigned)(level+threshold1))>threshold2){
4218             if(level>0){
4219                 level= (bias + level)>>QMAT_SHIFT;
4220                 block[j]= level;
4221             }else{
4222                 level= (bias - level)>>QMAT_SHIFT;
4223                 block[j]= -level;
4224             }
4225             max |=level;
4226         }else{
4227             block[j]=0;
4228         }
4229     }
4230     *overflow= s->max_qcoeff < max; //overflow might have happened
4231
4232     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4233     if (s->idsp.idct_permutation_type != FF_NO_IDCT_PERM)
4234         ff_block_permute(block, s->idsp.idct_permutation,
4235                          scantable, last_non_zero);
4236
4237     return last_non_zero;
4238 }
4239
4240 #define OFFSET(x) offsetof(MpegEncContext, x)
4241 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4242 static const AVOption h263_options[] = {
4243     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4244     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4245     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4246     FF_MPV_COMMON_OPTS
4247     { NULL },
4248 };
4249
4250 static const AVClass h263_class = {
4251     .class_name = "H.263 encoder",
4252     .item_name  = av_default_item_name,
4253     .option     = h263_options,
4254     .version    = LIBAVUTIL_VERSION_INT,
4255 };
4256
4257 AVCodec ff_h263_encoder = {
4258     .name           = "h263",
4259     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4260     .type           = AVMEDIA_TYPE_VIDEO,
4261     .id             = AV_CODEC_ID_H263,
4262     .priv_data_size = sizeof(MpegEncContext),
4263     .init           = ff_MPV_encode_init,
4264     .encode2        = ff_MPV_encode_picture,
4265     .close          = ff_MPV_encode_end,
4266     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4267     .priv_class     = &h263_class,
4268 };
4269
4270 static const AVOption h263p_options[] = {
4271     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4272     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4273     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4274     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4275     FF_MPV_COMMON_OPTS
4276     { NULL },
4277 };
4278 static const AVClass h263p_class = {
4279     .class_name = "H.263p encoder",
4280     .item_name  = av_default_item_name,
4281     .option     = h263p_options,
4282     .version    = LIBAVUTIL_VERSION_INT,
4283 };
4284
4285 AVCodec ff_h263p_encoder = {
4286     .name           = "h263p",
4287     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4288     .type           = AVMEDIA_TYPE_VIDEO,
4289     .id             = AV_CODEC_ID_H263P,
4290     .priv_data_size = sizeof(MpegEncContext),
4291     .init           = ff_MPV_encode_init,
4292     .encode2        = ff_MPV_encode_picture,
4293     .close          = ff_MPV_encode_end,
4294     .capabilities   = CODEC_CAP_SLICE_THREADS,
4295     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4296     .priv_class     = &h263p_class,
4297 };
4298
4299 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4300
4301 AVCodec ff_msmpeg4v2_encoder = {
4302     .name           = "msmpeg4v2",
4303     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4304     .type           = AVMEDIA_TYPE_VIDEO,
4305     .id             = AV_CODEC_ID_MSMPEG4V2,
4306     .priv_data_size = sizeof(MpegEncContext),
4307     .init           = ff_MPV_encode_init,
4308     .encode2        = ff_MPV_encode_picture,
4309     .close          = ff_MPV_encode_end,
4310     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4311     .priv_class     = &msmpeg4v2_class,
4312 };
4313
4314 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4315
4316 AVCodec ff_msmpeg4v3_encoder = {
4317     .name           = "msmpeg4",
4318     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4319     .type           = AVMEDIA_TYPE_VIDEO,
4320     .id             = AV_CODEC_ID_MSMPEG4V3,
4321     .priv_data_size = sizeof(MpegEncContext),
4322     .init           = ff_MPV_encode_init,
4323     .encode2        = ff_MPV_encode_picture,
4324     .close          = ff_MPV_encode_end,
4325     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4326     .priv_class     = &msmpeg4v3_class,
4327 };
4328
4329 FF_MPV_GENERIC_CLASS(wmv1)
4330
4331 AVCodec ff_wmv1_encoder = {
4332     .name           = "wmv1",
4333     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4334     .type           = AVMEDIA_TYPE_VIDEO,
4335     .id             = AV_CODEC_ID_WMV1,
4336     .priv_data_size = sizeof(MpegEncContext),
4337     .init           = ff_MPV_encode_init,
4338     .encode2        = ff_MPV_encode_picture,
4339     .close          = ff_MPV_encode_end,
4340     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4341     .priv_class     = &wmv1_class,
4342 };