]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
idct: remove call to ff_idctdsp_init from ff_MPV_common_init
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mjpegenc_common.h"
46 #include "mathops.h"
47 #include "mpegutils.h"
48 #include "mjpegenc.h"
49 #include "msmpeg4.h"
50 #include "pixblockdsp.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60
61 #define QUANT_BIAS_SHIFT 8
62
63 #define QMAT_SHIFT_MMX 16
64 #define QMAT_SHIFT 22
65
66 static int encode_picture(MpegEncContext *s, int picture_number);
67 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
68 static int sse_mb(MpegEncContext *s);
69 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
70 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
71
72 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
73 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
74
75 const AVOption ff_mpv_generic_options[] = {
76     FF_MPV_COMMON_OPTS
77     { NULL },
78 };
79
80 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
81                        uint16_t (*qmat16)[2][64],
82                        const uint16_t *quant_matrix,
83                        int bias, int qmin, int qmax, int intra)
84 {
85     FDCTDSPContext *fdsp = &s->fdsp;
86     int qscale;
87     int shift = 0;
88
89     for (qscale = qmin; qscale <= qmax; qscale++) {
90         int i;
91         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
92             fdsp->fdct == ff_jpeg_fdct_islow_10 ||
93             fdsp->fdct == ff_faandct) {
94             for (i = 0; i < 64; i++) {
95                 const int j = s->idsp.idct_permutation[i];
96                 /* 16 <= qscale * quant_matrix[i] <= 7905
97                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
98                  *             19952 <=              x  <= 249205026
99                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
100                  *           3444240 >= (1 << 36) / (x) >= 275 */
101
102                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
103                                         (qscale * quant_matrix[j]));
104             }
105         } else if (fdsp->fdct == ff_fdct_ifast) {
106             for (i = 0; i < 64; i++) {
107                 const int j = s->idsp.idct_permutation[i];
108                 /* 16 <= qscale * quant_matrix[i] <= 7905
109                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
110                  *             19952 <=              x  <= 249205026
111                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
112                  *           3444240 >= (1 << 36) / (x) >= 275 */
113
114                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
115                                         (ff_aanscales[i] * qscale *
116                                          quant_matrix[j]));
117             }
118         } else {
119             for (i = 0; i < 64; i++) {
120                 const int j = s->idsp.idct_permutation[i];
121                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
122                  * Assume x = qscale * quant_matrix[i]
123                  * So             16 <=              x  <= 7905
124                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
125                  * so          32768 >= (1 << 19) / (x) >= 67 */
126                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
127                                         (qscale * quant_matrix[j]));
128                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
129                 //                    (qscale * quant_matrix[i]);
130                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
131                                        (qscale * quant_matrix[j]);
132
133                 if (qmat16[qscale][0][i] == 0 ||
134                     qmat16[qscale][0][i] == 128 * 256)
135                     qmat16[qscale][0][i] = 128 * 256 - 1;
136                 qmat16[qscale][1][i] =
137                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
138                                 qmat16[qscale][0][i]);
139             }
140         }
141
142         for (i = intra; i < 64; i++) {
143             int64_t max = 8191;
144             if (fdsp->fdct == ff_fdct_ifast) {
145                 max = (8191LL * ff_aanscales[i]) >> 14;
146             }
147             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
148                 shift++;
149             }
150         }
151     }
152     if (shift) {
153         av_log(NULL, AV_LOG_INFO,
154                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
155                QMAT_SHIFT - shift);
156     }
157 }
158
159 static inline void update_qscale(MpegEncContext *s)
160 {
161     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
162                 (FF_LAMBDA_SHIFT + 7);
163     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
164
165     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
166                  FF_LAMBDA_SHIFT;
167 }
168
169 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
170 {
171     int i;
172
173     if (matrix) {
174         put_bits(pb, 1, 1);
175         for (i = 0; i < 64; i++) {
176             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
177         }
178     } else
179         put_bits(pb, 1, 0);
180 }
181
182 /**
183  * init s->current_picture.qscale_table from s->lambda_table
184  */
185 void ff_init_qscale_tab(MpegEncContext *s)
186 {
187     int8_t * const qscale_table = s->current_picture.qscale_table;
188     int i;
189
190     for (i = 0; i < s->mb_num; i++) {
191         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
192         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
193         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
194                                                   s->avctx->qmax);
195     }
196 }
197
198 static void update_duplicate_context_after_me(MpegEncContext *dst,
199                                               MpegEncContext *src)
200 {
201 #define COPY(a) dst->a= src->a
202     COPY(pict_type);
203     COPY(current_picture);
204     COPY(f_code);
205     COPY(b_code);
206     COPY(qscale);
207     COPY(lambda);
208     COPY(lambda2);
209     COPY(picture_in_gop_number);
210     COPY(gop_picture_number);
211     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
212     COPY(progressive_frame);    // FIXME don't set in encode_header
213     COPY(partitioned_frame);    // FIXME don't set in encode_header
214 #undef COPY
215 }
216
217 /**
218  * Set the given MpegEncContext to defaults for encoding.
219  * the changed fields will not depend upon the prior state of the MpegEncContext.
220  */
221 static void MPV_encode_defaults(MpegEncContext *s)
222 {
223     int i;
224     ff_MPV_common_defaults(s);
225
226     for (i = -16; i < 16; i++) {
227         default_fcode_tab[i + MAX_MV] = 1;
228     }
229     s->me.mv_penalty = default_mv_penalty;
230     s->fcode_tab     = default_fcode_tab;
231
232     s->input_picture_number  = 0;
233     s->picture_in_gop_number = 0;
234 }
235
236 /* init video encoder */
237 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
238 {
239     MpegEncContext *s = avctx->priv_data;
240     int i, ret, format_supported;
241
242     MPV_encode_defaults(s);
243
244     switch (avctx->codec_id) {
245     case AV_CODEC_ID_MPEG2VIDEO:
246         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
247             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
248             av_log(avctx, AV_LOG_ERROR,
249                    "only YUV420 and YUV422 are supported\n");
250             return -1;
251         }
252         break;
253     case AV_CODEC_ID_MJPEG:
254         format_supported = 0;
255         /* JPEG color space */
256         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
257             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
258             (avctx->color_range == AVCOL_RANGE_JPEG &&
259              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
260               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
261             format_supported = 1;
262         /* MPEG color space */
263         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
264                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
265                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
266             format_supported = 1;
267
268         if (!format_supported) {
269             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
270             return -1;
271         }
272         break;
273     default:
274         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
275             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
276             return -1;
277         }
278     }
279
280     switch (avctx->pix_fmt) {
281     case AV_PIX_FMT_YUVJ422P:
282     case AV_PIX_FMT_YUV422P:
283         s->chroma_format = CHROMA_422;
284         break;
285     case AV_PIX_FMT_YUVJ420P:
286     case AV_PIX_FMT_YUV420P:
287     default:
288         s->chroma_format = CHROMA_420;
289         break;
290     }
291
292     s->bit_rate = avctx->bit_rate;
293     s->width    = avctx->width;
294     s->height   = avctx->height;
295     if (avctx->gop_size > 600 &&
296         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
297         av_log(avctx, AV_LOG_ERROR,
298                "Warning keyframe interval too large! reducing it ...\n");
299         avctx->gop_size = 600;
300     }
301     s->gop_size     = avctx->gop_size;
302     s->avctx        = avctx;
303     s->flags        = avctx->flags;
304     s->flags2       = avctx->flags2;
305     if (avctx->max_b_frames > MAX_B_FRAMES) {
306         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
307                "is %d.\n", MAX_B_FRAMES);
308     }
309     s->max_b_frames = avctx->max_b_frames;
310     s->codec_id     = avctx->codec->id;
311     s->strict_std_compliance = avctx->strict_std_compliance;
312     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
313     s->mpeg_quant         = avctx->mpeg_quant;
314     s->rtp_mode           = !!avctx->rtp_payload_size;
315     s->intra_dc_precision = avctx->intra_dc_precision;
316     s->user_specified_pts = AV_NOPTS_VALUE;
317
318     if (s->gop_size <= 1) {
319         s->intra_only = 1;
320         s->gop_size   = 12;
321     } else {
322         s->intra_only = 0;
323     }
324
325     s->me_method = avctx->me_method;
326
327     /* Fixed QSCALE */
328     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
329
330     s->adaptive_quant = (s->avctx->lumi_masking ||
331                          s->avctx->dark_masking ||
332                          s->avctx->temporal_cplx_masking ||
333                          s->avctx->spatial_cplx_masking  ||
334                          s->avctx->p_masking      ||
335                          s->avctx->border_masking ||
336                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
337                         !s->fixed_qscale;
338
339     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
340
341     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
342         av_log(avctx, AV_LOG_ERROR,
343                "a vbv buffer size is needed, "
344                "for encoding with a maximum bitrate\n");
345         return -1;
346     }
347
348     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
349         av_log(avctx, AV_LOG_INFO,
350                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
351     }
352
353     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
354         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
355         return -1;
356     }
357
358     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
359         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
360         return -1;
361     }
362
363     if (avctx->rc_max_rate &&
364         avctx->rc_max_rate == avctx->bit_rate &&
365         avctx->rc_max_rate != avctx->rc_min_rate) {
366         av_log(avctx, AV_LOG_INFO,
367                "impossible bitrate constraints, this will fail\n");
368     }
369
370     if (avctx->rc_buffer_size &&
371         avctx->bit_rate * (int64_t)avctx->time_base.num >
372             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
373         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
374         return -1;
375     }
376
377     if (!s->fixed_qscale &&
378         avctx->bit_rate * av_q2d(avctx->time_base) >
379             avctx->bit_rate_tolerance) {
380         av_log(avctx, AV_LOG_ERROR,
381                "bitrate tolerance too small for bitrate\n");
382         return -1;
383     }
384
385     if (s->avctx->rc_max_rate &&
386         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
387         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
388          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
389         90000LL * (avctx->rc_buffer_size - 1) >
390             s->avctx->rc_max_rate * 0xFFFFLL) {
391         av_log(avctx, AV_LOG_INFO,
392                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
393                "specified vbv buffer is too large for the given bitrate!\n");
394     }
395
396     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
397         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
398         s->codec_id != AV_CODEC_ID_FLV1) {
399         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
400         return -1;
401     }
402
403     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
404         av_log(avctx, AV_LOG_ERROR,
405                "OBMC is only supported with simple mb decision\n");
406         return -1;
407     }
408
409     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
410         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
411         return -1;
412     }
413
414     if (s->max_b_frames                    &&
415         s->codec_id != AV_CODEC_ID_MPEG4      &&
416         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
417         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
418         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
419         return -1;
420     }
421
422     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
423          s->codec_id == AV_CODEC_ID_H263  ||
424          s->codec_id == AV_CODEC_ID_H263P) &&
425         (avctx->sample_aspect_ratio.num > 255 ||
426          avctx->sample_aspect_ratio.den > 255)) {
427         av_log(avctx, AV_LOG_ERROR,
428                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
429                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
430         return -1;
431     }
432
433     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
434         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
435         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
436         return -1;
437     }
438
439     // FIXME mpeg2 uses that too
440     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
441         av_log(avctx, AV_LOG_ERROR,
442                "mpeg2 style quantization not supported by codec\n");
443         return -1;
444     }
445
446     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
447         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
448         return -1;
449     }
450
451     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
452         s->avctx->mb_decision != FF_MB_DECISION_RD) {
453         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
454         return -1;
455     }
456
457     if (s->avctx->scenechange_threshold < 1000000000 &&
458         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
459         av_log(avctx, AV_LOG_ERROR,
460                "closed gop with scene change detection are not supported yet, "
461                "set threshold to 1000000000\n");
462         return -1;
463     }
464
465     if (s->flags & CODEC_FLAG_LOW_DELAY) {
466         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
467             av_log(avctx, AV_LOG_ERROR,
468                   "low delay forcing is only available for mpeg2\n");
469             return -1;
470         }
471         if (s->max_b_frames != 0) {
472             av_log(avctx, AV_LOG_ERROR,
473                    "b frames cannot be used with low delay\n");
474             return -1;
475         }
476     }
477
478     if (s->q_scale_type == 1) {
479         if (avctx->qmax > 12) {
480             av_log(avctx, AV_LOG_ERROR,
481                    "non linear quant only supports qmax <= 12 currently\n");
482             return -1;
483         }
484     }
485
486     if (s->avctx->thread_count > 1         &&
487         s->codec_id != AV_CODEC_ID_MPEG4      &&
488         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
489         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
490         (s->codec_id != AV_CODEC_ID_H263P)) {
491         av_log(avctx, AV_LOG_ERROR,
492                "multi threaded encoding not supported by codec\n");
493         return -1;
494     }
495
496     if (s->avctx->thread_count < 1) {
497         av_log(avctx, AV_LOG_ERROR,
498                "automatic thread number detection not supported by codec,"
499                "patch welcome\n");
500         return -1;
501     }
502
503     if (s->avctx->thread_count > 1)
504         s->rtp_mode = 1;
505
506     if (!avctx->time_base.den || !avctx->time_base.num) {
507         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
508         return -1;
509     }
510
511     i = (INT_MAX / 2 + 128) >> 8;
512     if (avctx->mb_threshold >= i) {
513         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
514                i - 1);
515         return -1;
516     }
517
518     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
519         av_log(avctx, AV_LOG_INFO,
520                "notice: b_frame_strategy only affects the first pass\n");
521         avctx->b_frame_strategy = 0;
522     }
523
524     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
525     if (i > 1) {
526         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
527         avctx->time_base.den /= i;
528         avctx->time_base.num /= i;
529         //return -1;
530     }
531
532     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
533         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
534         // (a + x * 3 / 8) / x
535         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
536         s->inter_quant_bias = 0;
537     } else {
538         s->intra_quant_bias = 0;
539         // (a - x / 4) / x
540         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
541     }
542
543     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
544         s->intra_quant_bias = avctx->intra_quant_bias;
545     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
546         s->inter_quant_bias = avctx->inter_quant_bias;
547
548     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
549         s->avctx->time_base.den > (1 << 16) - 1) {
550         av_log(avctx, AV_LOG_ERROR,
551                "timebase %d/%d not supported by MPEG 4 standard, "
552                "the maximum admitted value for the timebase denominator "
553                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
554                (1 << 16) - 1);
555         return -1;
556     }
557     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
558
559     switch (avctx->codec->id) {
560     case AV_CODEC_ID_MPEG1VIDEO:
561         s->out_format = FMT_MPEG1;
562         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
563         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
564         break;
565     case AV_CODEC_ID_MPEG2VIDEO:
566         s->out_format = FMT_MPEG1;
567         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
568         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
569         s->rtp_mode   = 1;
570         break;
571     case AV_CODEC_ID_MJPEG:
572         s->out_format = FMT_MJPEG;
573         s->intra_only = 1; /* force intra only for jpeg */
574         if (!CONFIG_MJPEG_ENCODER ||
575             ff_mjpeg_encode_init(s) < 0)
576             return -1;
577         avctx->delay = 0;
578         s->low_delay = 1;
579         break;
580     case AV_CODEC_ID_H261:
581         if (!CONFIG_H261_ENCODER)
582             return -1;
583         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
584             av_log(avctx, AV_LOG_ERROR,
585                    "The specified picture size of %dx%d is not valid for the "
586                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
587                     s->width, s->height);
588             return -1;
589         }
590         s->out_format = FMT_H261;
591         avctx->delay  = 0;
592         s->low_delay  = 1;
593         break;
594     case AV_CODEC_ID_H263:
595         if (!CONFIG_H263_ENCODER)
596         return -1;
597         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
598                              s->width, s->height) == 8) {
599             av_log(avctx, AV_LOG_INFO,
600                    "The specified picture size of %dx%d is not valid for "
601                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
602                    "352x288, 704x576, and 1408x1152."
603                    "Try H.263+.\n", s->width, s->height);
604             return -1;
605         }
606         s->out_format = FMT_H263;
607         avctx->delay  = 0;
608         s->low_delay  = 1;
609         break;
610     case AV_CODEC_ID_H263P:
611         s->out_format = FMT_H263;
612         s->h263_plus  = 1;
613         /* Fx */
614         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
615         s->modified_quant  = s->h263_aic;
616         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
617         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
618
619         /* /Fx */
620         /* These are just to be sure */
621         avctx->delay = 0;
622         s->low_delay = 1;
623         break;
624     case AV_CODEC_ID_FLV1:
625         s->out_format      = FMT_H263;
626         s->h263_flv        = 2; /* format = 1; 11-bit codes */
627         s->unrestricted_mv = 1;
628         s->rtp_mode  = 0; /* don't allow GOB */
629         avctx->delay = 0;
630         s->low_delay = 1;
631         break;
632     case AV_CODEC_ID_RV10:
633         s->out_format = FMT_H263;
634         avctx->delay  = 0;
635         s->low_delay  = 1;
636         break;
637     case AV_CODEC_ID_RV20:
638         s->out_format      = FMT_H263;
639         avctx->delay       = 0;
640         s->low_delay       = 1;
641         s->modified_quant  = 1;
642         s->h263_aic        = 1;
643         s->h263_plus       = 1;
644         s->loop_filter     = 1;
645         s->unrestricted_mv = 0;
646         break;
647     case AV_CODEC_ID_MPEG4:
648         s->out_format      = FMT_H263;
649         s->h263_pred       = 1;
650         s->unrestricted_mv = 1;
651         s->low_delay       = s->max_b_frames ? 0 : 1;
652         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
653         break;
654     case AV_CODEC_ID_MSMPEG4V2:
655         s->out_format      = FMT_H263;
656         s->h263_pred       = 1;
657         s->unrestricted_mv = 1;
658         s->msmpeg4_version = 2;
659         avctx->delay       = 0;
660         s->low_delay       = 1;
661         break;
662     case AV_CODEC_ID_MSMPEG4V3:
663         s->out_format        = FMT_H263;
664         s->h263_pred         = 1;
665         s->unrestricted_mv   = 1;
666         s->msmpeg4_version   = 3;
667         s->flipflop_rounding = 1;
668         avctx->delay         = 0;
669         s->low_delay         = 1;
670         break;
671     case AV_CODEC_ID_WMV1:
672         s->out_format        = FMT_H263;
673         s->h263_pred         = 1;
674         s->unrestricted_mv   = 1;
675         s->msmpeg4_version   = 4;
676         s->flipflop_rounding = 1;
677         avctx->delay         = 0;
678         s->low_delay         = 1;
679         break;
680     case AV_CODEC_ID_WMV2:
681         s->out_format        = FMT_H263;
682         s->h263_pred         = 1;
683         s->unrestricted_mv   = 1;
684         s->msmpeg4_version   = 5;
685         s->flipflop_rounding = 1;
686         avctx->delay         = 0;
687         s->low_delay         = 1;
688         break;
689     default:
690         return -1;
691     }
692
693     avctx->has_b_frames = !s->low_delay;
694
695     s->encoding = 1;
696
697     s->progressive_frame    =
698     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
699                                                 CODEC_FLAG_INTERLACED_ME) ||
700                                 s->alternate_scan);
701
702     /* init */
703     ff_mpv_idct_init(s);
704     if (ff_MPV_common_init(s) < 0)
705         return -1;
706
707     if (ARCH_X86)
708         ff_MPV_encode_init_x86(s);
709
710     ff_fdctdsp_init(&s->fdsp, avctx);
711     ff_me_cmp_init(&s->mecc, avctx);
712     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
713     ff_pixblockdsp_init(&s->pdsp, avctx);
714     ff_qpeldsp_init(&s->qdsp);
715
716     s->avctx->coded_frame = s->current_picture.f;
717
718     if (s->msmpeg4_version) {
719         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
720                           2 * 2 * (MAX_LEVEL + 1) *
721                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
722     }
723     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
724
725     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
726     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
727     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
728     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
729     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
730                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
731     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
732                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
733
734     if (s->avctx->noise_reduction) {
735         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
736                           2 * 64 * sizeof(uint16_t), fail);
737     }
738
739     if (CONFIG_H263_ENCODER)
740         ff_h263dsp_init(&s->h263dsp);
741     if (!s->dct_quantize)
742         s->dct_quantize = ff_dct_quantize_c;
743     if (!s->denoise_dct)
744         s->denoise_dct  = denoise_dct_c;
745     s->fast_dct_quantize = s->dct_quantize;
746     if (avctx->trellis)
747         s->dct_quantize  = dct_quantize_trellis_c;
748
749     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
750         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
751
752     s->quant_precision = 5;
753
754     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
755     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
756
757     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
758         ff_h261_encode_init(s);
759     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
760         ff_h263_encode_init(s);
761     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
762         ff_msmpeg4_encode_init(s);
763     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
764         && s->out_format == FMT_MPEG1)
765         ff_mpeg1_encode_init(s);
766
767     /* init q matrix */
768     for (i = 0; i < 64; i++) {
769         int j = s->idsp.idct_permutation[i];
770         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
771             s->mpeg_quant) {
772             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
773             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
774         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
775             s->intra_matrix[j] =
776             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
777         } else {
778             /* mpeg1/2 */
779             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
780             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
781         }
782         if (s->avctx->intra_matrix)
783             s->intra_matrix[j] = s->avctx->intra_matrix[i];
784         if (s->avctx->inter_matrix)
785             s->inter_matrix[j] = s->avctx->inter_matrix[i];
786     }
787
788     /* precompute matrix */
789     /* for mjpeg, we do include qscale in the matrix */
790     if (s->out_format != FMT_MJPEG) {
791         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
792                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
793                           31, 1);
794         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
795                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
796                           31, 0);
797     }
798
799     if (ff_rate_control_init(s) < 0)
800         return -1;
801
802 #if FF_API_ERROR_RATE
803     FF_DISABLE_DEPRECATION_WARNINGS
804     if (avctx->error_rate)
805         s->error_rate = avctx->error_rate;
806     FF_ENABLE_DEPRECATION_WARNINGS;
807 #endif
808
809 #if FF_API_NORMALIZE_AQP
810     FF_DISABLE_DEPRECATION_WARNINGS
811     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
812         s->mpv_flags |= FF_MPV_FLAG_NAQ;
813     FF_ENABLE_DEPRECATION_WARNINGS;
814 #endif
815
816 #if FF_API_MV0
817     FF_DISABLE_DEPRECATION_WARNINGS
818     if (avctx->flags & CODEC_FLAG_MV0)
819         s->mpv_flags |= FF_MPV_FLAG_MV0;
820     FF_ENABLE_DEPRECATION_WARNINGS
821 #endif
822
823     if (avctx->b_frame_strategy == 2) {
824         for (i = 0; i < s->max_b_frames + 2; i++) {
825             s->tmp_frames[i] = av_frame_alloc();
826             if (!s->tmp_frames[i])
827                 return AVERROR(ENOMEM);
828
829             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
830             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
831             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
832
833             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
834             if (ret < 0)
835                 return ret;
836         }
837     }
838
839     return 0;
840 fail:
841     ff_MPV_encode_end(avctx);
842     return AVERROR_UNKNOWN;
843 }
844
845 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
846 {
847     MpegEncContext *s = avctx->priv_data;
848     int i;
849
850     ff_rate_control_uninit(s);
851
852     ff_MPV_common_end(s);
853     if (CONFIG_MJPEG_ENCODER &&
854         s->out_format == FMT_MJPEG)
855         ff_mjpeg_encode_close(s);
856
857     av_freep(&avctx->extradata);
858
859     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
860         av_frame_free(&s->tmp_frames[i]);
861
862     ff_free_picture_tables(&s->new_picture);
863     ff_mpeg_unref_picture(s, &s->new_picture);
864
865     av_freep(&s->avctx->stats_out);
866     av_freep(&s->ac_stats);
867
868     av_freep(&s->q_intra_matrix);
869     av_freep(&s->q_inter_matrix);
870     av_freep(&s->q_intra_matrix16);
871     av_freep(&s->q_inter_matrix16);
872     av_freep(&s->input_picture);
873     av_freep(&s->reordered_input_picture);
874     av_freep(&s->dct_offset);
875
876     return 0;
877 }
878
879 static int get_sae(uint8_t *src, int ref, int stride)
880 {
881     int x,y;
882     int acc = 0;
883
884     for (y = 0; y < 16; y++) {
885         for (x = 0; x < 16; x++) {
886             acc += FFABS(src[x + y * stride] - ref);
887         }
888     }
889
890     return acc;
891 }
892
893 static int get_intra_count(MpegEncContext *s, uint8_t *src,
894                            uint8_t *ref, int stride)
895 {
896     int x, y, w, h;
897     int acc = 0;
898
899     w = s->width  & ~15;
900     h = s->height & ~15;
901
902     for (y = 0; y < h; y += 16) {
903         for (x = 0; x < w; x += 16) {
904             int offset = x + y * stride;
905             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
906                                       stride, 16);
907             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
908             int sae  = get_sae(src + offset, mean, stride);
909
910             acc += sae + 500 < sad;
911         }
912     }
913     return acc;
914 }
915
916
917 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
918 {
919     Picture *pic = NULL;
920     int64_t pts;
921     int i, display_picture_number = 0, ret;
922     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
923                                                  (s->low_delay ? 0 : 1);
924     int direct = 1;
925
926     if (pic_arg) {
927         pts = pic_arg->pts;
928         display_picture_number = s->input_picture_number++;
929
930         if (pts != AV_NOPTS_VALUE) {
931             if (s->user_specified_pts != AV_NOPTS_VALUE) {
932                 int64_t time = pts;
933                 int64_t last = s->user_specified_pts;
934
935                 if (time <= last) {
936                     av_log(s->avctx, AV_LOG_ERROR,
937                            "Error, Invalid timestamp=%"PRId64", "
938                            "last=%"PRId64"\n", pts, s->user_specified_pts);
939                     return -1;
940                 }
941
942                 if (!s->low_delay && display_picture_number == 1)
943                     s->dts_delta = time - last;
944             }
945             s->user_specified_pts = pts;
946         } else {
947             if (s->user_specified_pts != AV_NOPTS_VALUE) {
948                 s->user_specified_pts =
949                 pts = s->user_specified_pts + 1;
950                 av_log(s->avctx, AV_LOG_INFO,
951                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
952                        pts);
953             } else {
954                 pts = display_picture_number;
955             }
956         }
957     }
958
959     if (pic_arg) {
960         if (!pic_arg->buf[0]);
961             direct = 0;
962         if (pic_arg->linesize[0] != s->linesize)
963             direct = 0;
964         if (pic_arg->linesize[1] != s->uvlinesize)
965             direct = 0;
966         if (pic_arg->linesize[2] != s->uvlinesize)
967             direct = 0;
968
969         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
970                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
971
972         if (direct) {
973             i = ff_find_unused_picture(s, 1);
974             if (i < 0)
975                 return i;
976
977             pic = &s->picture[i];
978             pic->reference = 3;
979
980             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
981                 return ret;
982             if (ff_alloc_picture(s, pic, 1) < 0) {
983                 return -1;
984             }
985         } else {
986             i = ff_find_unused_picture(s, 0);
987             if (i < 0)
988                 return i;
989
990             pic = &s->picture[i];
991             pic->reference = 3;
992
993             if (ff_alloc_picture(s, pic, 0) < 0) {
994                 return -1;
995             }
996
997             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
998                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
999                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1000                 // empty
1001             } else {
1002                 int h_chroma_shift, v_chroma_shift;
1003                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1004                                                  &h_chroma_shift,
1005                                                  &v_chroma_shift);
1006
1007                 for (i = 0; i < 3; i++) {
1008                     int src_stride = pic_arg->linesize[i];
1009                     int dst_stride = i ? s->uvlinesize : s->linesize;
1010                     int h_shift = i ? h_chroma_shift : 0;
1011                     int v_shift = i ? v_chroma_shift : 0;
1012                     int w = s->width  >> h_shift;
1013                     int h = s->height >> v_shift;
1014                     uint8_t *src = pic_arg->data[i];
1015                     uint8_t *dst = pic->f->data[i];
1016
1017                     if (!s->avctx->rc_buffer_size)
1018                         dst += INPLACE_OFFSET;
1019
1020                     if (src_stride == dst_stride)
1021                         memcpy(dst, src, src_stride * h);
1022                     else {
1023                         while (h--) {
1024                             memcpy(dst, src, w);
1025                             dst += dst_stride;
1026                             src += src_stride;
1027                         }
1028                     }
1029                 }
1030             }
1031         }
1032         ret = av_frame_copy_props(pic->f, pic_arg);
1033         if (ret < 0)
1034             return ret;
1035
1036         pic->f->display_picture_number = display_picture_number;
1037         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1038     }
1039
1040     /* shift buffer entries */
1041     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1042         s->input_picture[i - 1] = s->input_picture[i];
1043
1044     s->input_picture[encoding_delay] = (Picture*) pic;
1045
1046     return 0;
1047 }
1048
1049 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1050 {
1051     int x, y, plane;
1052     int score = 0;
1053     int64_t score64 = 0;
1054
1055     for (plane = 0; plane < 3; plane++) {
1056         const int stride = p->f->linesize[plane];
1057         const int bw = plane ? 1 : 2;
1058         for (y = 0; y < s->mb_height * bw; y++) {
1059             for (x = 0; x < s->mb_width * bw; x++) {
1060                 int off = p->shared ? 0 : 16;
1061                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1062                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1063                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1064
1065                 switch (s->avctx->frame_skip_exp) {
1066                 case 0: score    =  FFMAX(score, v);          break;
1067                 case 1: score   += FFABS(v);                  break;
1068                 case 2: score   += v * v;                     break;
1069                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1070                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1071                 }
1072             }
1073         }
1074     }
1075
1076     if (score)
1077         score64 = score;
1078
1079     if (score64 < s->avctx->frame_skip_threshold)
1080         return 1;
1081     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1082         return 1;
1083     return 0;
1084 }
1085
1086 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1087 {
1088     AVPacket pkt = { 0 };
1089     int ret, got_output;
1090
1091     av_init_packet(&pkt);
1092     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1093     if (ret < 0)
1094         return ret;
1095
1096     ret = pkt.size;
1097     av_free_packet(&pkt);
1098     return ret;
1099 }
1100
1101 static int estimate_best_b_count(MpegEncContext *s)
1102 {
1103     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1104     AVCodecContext *c = avcodec_alloc_context3(NULL);
1105     const int scale = s->avctx->brd_scale;
1106     int i, j, out_size, p_lambda, b_lambda, lambda2;
1107     int64_t best_rd  = INT64_MAX;
1108     int best_b_count = -1;
1109
1110     assert(scale >= 0 && scale <= 3);
1111
1112     //emms_c();
1113     //s->next_picture_ptr->quality;
1114     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1115     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1116     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1117     if (!b_lambda) // FIXME we should do this somewhere else
1118         b_lambda = p_lambda;
1119     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1120                FF_LAMBDA_SHIFT;
1121
1122     c->width        = s->width  >> scale;
1123     c->height       = s->height >> scale;
1124     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1125     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1126     c->mb_decision  = s->avctx->mb_decision;
1127     c->me_cmp       = s->avctx->me_cmp;
1128     c->mb_cmp       = s->avctx->mb_cmp;
1129     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1130     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1131     c->time_base    = s->avctx->time_base;
1132     c->max_b_frames = s->max_b_frames;
1133
1134     if (avcodec_open2(c, codec, NULL) < 0)
1135         return -1;
1136
1137     for (i = 0; i < s->max_b_frames + 2; i++) {
1138         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1139                                                 s->next_picture_ptr;
1140
1141         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1142             pre_input = *pre_input_ptr;
1143
1144             if (!pre_input.shared && i) {
1145                 pre_input.f->data[0] += INPLACE_OFFSET;
1146                 pre_input.f->data[1] += INPLACE_OFFSET;
1147                 pre_input.f->data[2] += INPLACE_OFFSET;
1148             }
1149
1150             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1151                                        s->tmp_frames[i]->linesize[0],
1152                                        pre_input.f->data[0],
1153                                        pre_input.f->linesize[0],
1154                                        c->width, c->height);
1155             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1156                                        s->tmp_frames[i]->linesize[1],
1157                                        pre_input.f->data[1],
1158                                        pre_input.f->linesize[1],
1159                                        c->width >> 1, c->height >> 1);
1160             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1161                                        s->tmp_frames[i]->linesize[2],
1162                                        pre_input.f->data[2],
1163                                        pre_input.f->linesize[2],
1164                                        c->width >> 1, c->height >> 1);
1165         }
1166     }
1167
1168     for (j = 0; j < s->max_b_frames + 1; j++) {
1169         int64_t rd = 0;
1170
1171         if (!s->input_picture[j])
1172             break;
1173
1174         c->error[0] = c->error[1] = c->error[2] = 0;
1175
1176         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1177         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1178
1179         out_size = encode_frame(c, s->tmp_frames[0]);
1180
1181         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1182
1183         for (i = 0; i < s->max_b_frames + 1; i++) {
1184             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1185
1186             s->tmp_frames[i + 1]->pict_type = is_p ?
1187                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1188             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1189
1190             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1191
1192             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1193         }
1194
1195         /* get the delayed frames */
1196         while (out_size) {
1197             out_size = encode_frame(c, NULL);
1198             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1199         }
1200
1201         rd += c->error[0] + c->error[1] + c->error[2];
1202
1203         if (rd < best_rd) {
1204             best_rd = rd;
1205             best_b_count = j;
1206         }
1207     }
1208
1209     avcodec_close(c);
1210     av_freep(&c);
1211
1212     return best_b_count;
1213 }
1214
1215 static int select_input_picture(MpegEncContext *s)
1216 {
1217     int i, ret;
1218
1219     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1220         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1221     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1222
1223     /* set next picture type & ordering */
1224     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1225         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1226             s->next_picture_ptr == NULL || s->intra_only) {
1227             s->reordered_input_picture[0] = s->input_picture[0];
1228             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1229             s->reordered_input_picture[0]->f->coded_picture_number =
1230                 s->coded_picture_number++;
1231         } else {
1232             int b_frames;
1233
1234             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1235                 if (s->picture_in_gop_number < s->gop_size &&
1236                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1237                     // FIXME check that te gop check above is +-1 correct
1238                     av_frame_unref(s->input_picture[0]->f);
1239
1240                     emms_c();
1241                     ff_vbv_update(s, 0);
1242
1243                     goto no_output_pic;
1244                 }
1245             }
1246
1247             if (s->flags & CODEC_FLAG_PASS2) {
1248                 for (i = 0; i < s->max_b_frames + 1; i++) {
1249                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1250
1251                     if (pict_num >= s->rc_context.num_entries)
1252                         break;
1253                     if (!s->input_picture[i]) {
1254                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1255                         break;
1256                     }
1257
1258                     s->input_picture[i]->f->pict_type =
1259                         s->rc_context.entry[pict_num].new_pict_type;
1260                 }
1261             }
1262
1263             if (s->avctx->b_frame_strategy == 0) {
1264                 b_frames = s->max_b_frames;
1265                 while (b_frames && !s->input_picture[b_frames])
1266                     b_frames--;
1267             } else if (s->avctx->b_frame_strategy == 1) {
1268                 for (i = 1; i < s->max_b_frames + 1; i++) {
1269                     if (s->input_picture[i] &&
1270                         s->input_picture[i]->b_frame_score == 0) {
1271                         s->input_picture[i]->b_frame_score =
1272                             get_intra_count(s,
1273                                             s->input_picture[i    ]->f->data[0],
1274                                             s->input_picture[i - 1]->f->data[0],
1275                                             s->linesize) + 1;
1276                     }
1277                 }
1278                 for (i = 0; i < s->max_b_frames + 1; i++) {
1279                     if (s->input_picture[i] == NULL ||
1280                         s->input_picture[i]->b_frame_score - 1 >
1281                             s->mb_num / s->avctx->b_sensitivity)
1282                         break;
1283                 }
1284
1285                 b_frames = FFMAX(0, i - 1);
1286
1287                 /* reset scores */
1288                 for (i = 0; i < b_frames + 1; i++) {
1289                     s->input_picture[i]->b_frame_score = 0;
1290                 }
1291             } else if (s->avctx->b_frame_strategy == 2) {
1292                 b_frames = estimate_best_b_count(s);
1293             } else {
1294                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1295                 b_frames = 0;
1296             }
1297
1298             emms_c();
1299
1300             for (i = b_frames - 1; i >= 0; i--) {
1301                 int type = s->input_picture[i]->f->pict_type;
1302                 if (type && type != AV_PICTURE_TYPE_B)
1303                     b_frames = i;
1304             }
1305             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1306                 b_frames == s->max_b_frames) {
1307                 av_log(s->avctx, AV_LOG_ERROR,
1308                        "warning, too many b frames in a row\n");
1309             }
1310
1311             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1312                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1313                     s->gop_size > s->picture_in_gop_number) {
1314                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1315                 } else {
1316                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1317                         b_frames = 0;
1318                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1319                 }
1320             }
1321
1322             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1323                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1324                 b_frames--;
1325
1326             s->reordered_input_picture[0] = s->input_picture[b_frames];
1327             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1328                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1329             s->reordered_input_picture[0]->f->coded_picture_number =
1330                 s->coded_picture_number++;
1331             for (i = 0; i < b_frames; i++) {
1332                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1333                 s->reordered_input_picture[i + 1]->f->pict_type =
1334                     AV_PICTURE_TYPE_B;
1335                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1336                     s->coded_picture_number++;
1337             }
1338         }
1339     }
1340 no_output_pic:
1341     if (s->reordered_input_picture[0]) {
1342         s->reordered_input_picture[0]->reference =
1343            s->reordered_input_picture[0]->f->pict_type !=
1344                AV_PICTURE_TYPE_B ? 3 : 0;
1345
1346         ff_mpeg_unref_picture(s, &s->new_picture);
1347         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1348             return ret;
1349
1350         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1351             // input is a shared pix, so we can't modifiy it -> alloc a new
1352             // one & ensure that the shared one is reuseable
1353
1354             Picture *pic;
1355             int i = ff_find_unused_picture(s, 0);
1356             if (i < 0)
1357                 return i;
1358             pic = &s->picture[i];
1359
1360             pic->reference = s->reordered_input_picture[0]->reference;
1361             if (ff_alloc_picture(s, pic, 0) < 0) {
1362                 return -1;
1363             }
1364
1365             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1366             if (ret < 0)
1367                 return ret;
1368
1369             /* mark us unused / free shared pic */
1370             av_frame_unref(s->reordered_input_picture[0]->f);
1371             s->reordered_input_picture[0]->shared = 0;
1372
1373             s->current_picture_ptr = pic;
1374         } else {
1375             // input is not a shared pix -> reuse buffer for current_pix
1376             s->current_picture_ptr = s->reordered_input_picture[0];
1377             for (i = 0; i < 4; i++) {
1378                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1379             }
1380         }
1381         ff_mpeg_unref_picture(s, &s->current_picture);
1382         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1383                                        s->current_picture_ptr)) < 0)
1384             return ret;
1385
1386         s->picture_number = s->new_picture.f->display_picture_number;
1387     } else {
1388         ff_mpeg_unref_picture(s, &s->new_picture);
1389     }
1390     return 0;
1391 }
1392
1393 static void frame_end(MpegEncContext *s)
1394 {
1395     int i;
1396
1397     if (s->unrestricted_mv &&
1398         s->current_picture.reference &&
1399         !s->intra_only) {
1400         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1401         int hshift = desc->log2_chroma_w;
1402         int vshift = desc->log2_chroma_h;
1403         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1404                                 s->h_edge_pos, s->v_edge_pos,
1405                                 EDGE_WIDTH, EDGE_WIDTH,
1406                                 EDGE_TOP | EDGE_BOTTOM);
1407         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1408                                 s->h_edge_pos >> hshift,
1409                                 s->v_edge_pos >> vshift,
1410                                 EDGE_WIDTH >> hshift,
1411                                 EDGE_WIDTH >> vshift,
1412                                 EDGE_TOP | EDGE_BOTTOM);
1413         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1414                                 s->h_edge_pos >> hshift,
1415                                 s->v_edge_pos >> vshift,
1416                                 EDGE_WIDTH >> hshift,
1417                                 EDGE_WIDTH >> vshift,
1418                                 EDGE_TOP | EDGE_BOTTOM);
1419     }
1420
1421     emms_c();
1422
1423     s->last_pict_type                 = s->pict_type;
1424     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1425     if (s->pict_type!= AV_PICTURE_TYPE_B)
1426         s->last_non_b_pict_type = s->pict_type;
1427
1428     if (s->encoding) {
1429         /* release non-reference frames */
1430         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1431             if (!s->picture[i].reference)
1432                 ff_mpeg_unref_picture(s, &s->picture[i]);
1433         }
1434     }
1435
1436     s->avctx->coded_frame = s->current_picture_ptr->f;
1437
1438 }
1439
1440 static void update_noise_reduction(MpegEncContext *s)
1441 {
1442     int intra, i;
1443
1444     for (intra = 0; intra < 2; intra++) {
1445         if (s->dct_count[intra] > (1 << 16)) {
1446             for (i = 0; i < 64; i++) {
1447                 s->dct_error_sum[intra][i] >>= 1;
1448             }
1449             s->dct_count[intra] >>= 1;
1450         }
1451
1452         for (i = 0; i < 64; i++) {
1453             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1454                                        s->dct_count[intra] +
1455                                        s->dct_error_sum[intra][i] / 2) /
1456                                       (s->dct_error_sum[intra][i] + 1);
1457         }
1458     }
1459 }
1460
1461 static int frame_start(MpegEncContext *s)
1462 {
1463     int ret;
1464
1465     /* mark & release old frames */
1466     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1467         s->last_picture_ptr != s->next_picture_ptr &&
1468         s->last_picture_ptr->f->buf[0]) {
1469         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1470     }
1471
1472     s->current_picture_ptr->f->pict_type = s->pict_type;
1473     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1474
1475     ff_mpeg_unref_picture(s, &s->current_picture);
1476     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1477                                    s->current_picture_ptr)) < 0)
1478         return ret;
1479
1480     if (s->pict_type != AV_PICTURE_TYPE_B) {
1481         s->last_picture_ptr = s->next_picture_ptr;
1482         if (!s->droppable)
1483             s->next_picture_ptr = s->current_picture_ptr;
1484     }
1485
1486     if (s->last_picture_ptr) {
1487         ff_mpeg_unref_picture(s, &s->last_picture);
1488         if (s->last_picture_ptr->f->buf[0] &&
1489             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1490                                        s->last_picture_ptr)) < 0)
1491             return ret;
1492     }
1493     if (s->next_picture_ptr) {
1494         ff_mpeg_unref_picture(s, &s->next_picture);
1495         if (s->next_picture_ptr->f->buf[0] &&
1496             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1497                                        s->next_picture_ptr)) < 0)
1498             return ret;
1499     }
1500
1501     if (s->picture_structure!= PICT_FRAME) {
1502         int i;
1503         for (i = 0; i < 4; i++) {
1504             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1505                 s->current_picture.f->data[i] +=
1506                     s->current_picture.f->linesize[i];
1507             }
1508             s->current_picture.f->linesize[i] *= 2;
1509             s->last_picture.f->linesize[i]    *= 2;
1510             s->next_picture.f->linesize[i]    *= 2;
1511         }
1512     }
1513
1514     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1515         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1516         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1517     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1518         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1519         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1520     } else {
1521         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1522         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1523     }
1524
1525     if (s->dct_error_sum) {
1526         assert(s->avctx->noise_reduction && s->encoding);
1527         update_noise_reduction(s);
1528     }
1529
1530     return 0;
1531 }
1532
1533 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1534                           const AVFrame *pic_arg, int *got_packet)
1535 {
1536     MpegEncContext *s = avctx->priv_data;
1537     int i, stuffing_count, ret;
1538     int context_count = s->slice_context_count;
1539
1540     s->picture_in_gop_number++;
1541
1542     if (load_input_picture(s, pic_arg) < 0)
1543         return -1;
1544
1545     if (select_input_picture(s) < 0) {
1546         return -1;
1547     }
1548
1549     /* output? */
1550     if (s->new_picture.f->data[0]) {
1551         if (!pkt->data &&
1552             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1553             return ret;
1554         if (s->mb_info) {
1555             s->mb_info_ptr = av_packet_new_side_data(pkt,
1556                                  AV_PKT_DATA_H263_MB_INFO,
1557                                  s->mb_width*s->mb_height*12);
1558             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1559         }
1560
1561         for (i = 0; i < context_count; i++) {
1562             int start_y = s->thread_context[i]->start_mb_y;
1563             int   end_y = s->thread_context[i]->  end_mb_y;
1564             int h       = s->mb_height;
1565             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1566             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1567
1568             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1569         }
1570
1571         s->pict_type = s->new_picture.f->pict_type;
1572         //emms_c();
1573         ret = frame_start(s);
1574         if (ret < 0)
1575             return ret;
1576 vbv_retry:
1577         if (encode_picture(s, s->picture_number) < 0)
1578             return -1;
1579
1580         avctx->header_bits = s->header_bits;
1581         avctx->mv_bits     = s->mv_bits;
1582         avctx->misc_bits   = s->misc_bits;
1583         avctx->i_tex_bits  = s->i_tex_bits;
1584         avctx->p_tex_bits  = s->p_tex_bits;
1585         avctx->i_count     = s->i_count;
1586         // FIXME f/b_count in avctx
1587         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1588         avctx->skip_count  = s->skip_count;
1589
1590         frame_end(s);
1591
1592         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1593             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1594
1595         if (avctx->rc_buffer_size) {
1596             RateControlContext *rcc = &s->rc_context;
1597             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1598
1599             if (put_bits_count(&s->pb) > max_size &&
1600                 s->lambda < s->avctx->lmax) {
1601                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1602                                        (s->qscale + 1) / s->qscale);
1603                 if (s->adaptive_quant) {
1604                     int i;
1605                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1606                         s->lambda_table[i] =
1607                             FFMAX(s->lambda_table[i] + 1,
1608                                   s->lambda_table[i] * (s->qscale + 1) /
1609                                   s->qscale);
1610                 }
1611                 s->mb_skipped = 0;        // done in frame_start()
1612                 // done in encode_picture() so we must undo it
1613                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1614                     if (s->flipflop_rounding          ||
1615                         s->codec_id == AV_CODEC_ID_H263P ||
1616                         s->codec_id == AV_CODEC_ID_MPEG4)
1617                         s->no_rounding ^= 1;
1618                 }
1619                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1620                     s->time_base       = s->last_time_base;
1621                     s->last_non_b_time = s->time - s->pp_time;
1622                 }
1623                 for (i = 0; i < context_count; i++) {
1624                     PutBitContext *pb = &s->thread_context[i]->pb;
1625                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1626                 }
1627                 goto vbv_retry;
1628             }
1629
1630             assert(s->avctx->rc_max_rate);
1631         }
1632
1633         if (s->flags & CODEC_FLAG_PASS1)
1634             ff_write_pass1_stats(s);
1635
1636         for (i = 0; i < 4; i++) {
1637             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1638             avctx->error[i] += s->current_picture_ptr->f->error[i];
1639         }
1640
1641         if (s->flags & CODEC_FLAG_PASS1)
1642             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1643                    avctx->i_tex_bits + avctx->p_tex_bits ==
1644                        put_bits_count(&s->pb));
1645         flush_put_bits(&s->pb);
1646         s->frame_bits  = put_bits_count(&s->pb);
1647
1648         stuffing_count = ff_vbv_update(s, s->frame_bits);
1649         if (stuffing_count) {
1650             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1651                     stuffing_count + 50) {
1652                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1653                 return -1;
1654             }
1655
1656             switch (s->codec_id) {
1657             case AV_CODEC_ID_MPEG1VIDEO:
1658             case AV_CODEC_ID_MPEG2VIDEO:
1659                 while (stuffing_count--) {
1660                     put_bits(&s->pb, 8, 0);
1661                 }
1662             break;
1663             case AV_CODEC_ID_MPEG4:
1664                 put_bits(&s->pb, 16, 0);
1665                 put_bits(&s->pb, 16, 0x1C3);
1666                 stuffing_count -= 4;
1667                 while (stuffing_count--) {
1668                     put_bits(&s->pb, 8, 0xFF);
1669                 }
1670             break;
1671             default:
1672                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1673             }
1674             flush_put_bits(&s->pb);
1675             s->frame_bits  = put_bits_count(&s->pb);
1676         }
1677
1678         /* update mpeg1/2 vbv_delay for CBR */
1679         if (s->avctx->rc_max_rate                          &&
1680             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1681             s->out_format == FMT_MPEG1                     &&
1682             90000LL * (avctx->rc_buffer_size - 1) <=
1683                 s->avctx->rc_max_rate * 0xFFFFLL) {
1684             int vbv_delay, min_delay;
1685             double inbits  = s->avctx->rc_max_rate *
1686                              av_q2d(s->avctx->time_base);
1687             int    minbits = s->frame_bits - 8 *
1688                              (s->vbv_delay_ptr - s->pb.buf - 1);
1689             double bits    = s->rc_context.buffer_index + minbits - inbits;
1690
1691             if (bits < 0)
1692                 av_log(s->avctx, AV_LOG_ERROR,
1693                        "Internal error, negative bits\n");
1694
1695             assert(s->repeat_first_field == 0);
1696
1697             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1698             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1699                         s->avctx->rc_max_rate;
1700
1701             vbv_delay = FFMAX(vbv_delay, min_delay);
1702
1703             assert(vbv_delay < 0xFFFF);
1704
1705             s->vbv_delay_ptr[0] &= 0xF8;
1706             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1707             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1708             s->vbv_delay_ptr[2] &= 0x07;
1709             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1710             avctx->vbv_delay     = vbv_delay * 300;
1711         }
1712         s->total_bits     += s->frame_bits;
1713         avctx->frame_bits  = s->frame_bits;
1714
1715         pkt->pts = s->current_picture.f->pts;
1716         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1717             if (!s->current_picture.f->coded_picture_number)
1718                 pkt->dts = pkt->pts - s->dts_delta;
1719             else
1720                 pkt->dts = s->reordered_pts;
1721             s->reordered_pts = pkt->pts;
1722         } else
1723             pkt->dts = pkt->pts;
1724         if (s->current_picture.f->key_frame)
1725             pkt->flags |= AV_PKT_FLAG_KEY;
1726         if (s->mb_info)
1727             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1728     } else {
1729         s->frame_bits = 0;
1730     }
1731     assert((s->frame_bits & 7) == 0);
1732
1733     pkt->size = s->frame_bits / 8;
1734     *got_packet = !!pkt->size;
1735     return 0;
1736 }
1737
1738 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1739                                                 int n, int threshold)
1740 {
1741     static const char tab[64] = {
1742         3, 2, 2, 1, 1, 1, 1, 1,
1743         1, 1, 1, 1, 1, 1, 1, 1,
1744         1, 1, 1, 1, 1, 1, 1, 1,
1745         0, 0, 0, 0, 0, 0, 0, 0,
1746         0, 0, 0, 0, 0, 0, 0, 0,
1747         0, 0, 0, 0, 0, 0, 0, 0,
1748         0, 0, 0, 0, 0, 0, 0, 0,
1749         0, 0, 0, 0, 0, 0, 0, 0
1750     };
1751     int score = 0;
1752     int run = 0;
1753     int i;
1754     int16_t *block = s->block[n];
1755     const int last_index = s->block_last_index[n];
1756     int skip_dc;
1757
1758     if (threshold < 0) {
1759         skip_dc = 0;
1760         threshold = -threshold;
1761     } else
1762         skip_dc = 1;
1763
1764     /* Are all we could set to zero already zero? */
1765     if (last_index <= skip_dc - 1)
1766         return;
1767
1768     for (i = 0; i <= last_index; i++) {
1769         const int j = s->intra_scantable.permutated[i];
1770         const int level = FFABS(block[j]);
1771         if (level == 1) {
1772             if (skip_dc && i == 0)
1773                 continue;
1774             score += tab[run];
1775             run = 0;
1776         } else if (level > 1) {
1777             return;
1778         } else {
1779             run++;
1780         }
1781     }
1782     if (score >= threshold)
1783         return;
1784     for (i = skip_dc; i <= last_index; i++) {
1785         const int j = s->intra_scantable.permutated[i];
1786         block[j] = 0;
1787     }
1788     if (block[0])
1789         s->block_last_index[n] = 0;
1790     else
1791         s->block_last_index[n] = -1;
1792 }
1793
1794 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1795                                int last_index)
1796 {
1797     int i;
1798     const int maxlevel = s->max_qcoeff;
1799     const int minlevel = s->min_qcoeff;
1800     int overflow = 0;
1801
1802     if (s->mb_intra) {
1803         i = 1; // skip clipping of intra dc
1804     } else
1805         i = 0;
1806
1807     for (; i <= last_index; i++) {
1808         const int j = s->intra_scantable.permutated[i];
1809         int level = block[j];
1810
1811         if (level > maxlevel) {
1812             level = maxlevel;
1813             overflow++;
1814         } else if (level < minlevel) {
1815             level = minlevel;
1816             overflow++;
1817         }
1818
1819         block[j] = level;
1820     }
1821
1822     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1823         av_log(s->avctx, AV_LOG_INFO,
1824                "warning, clipping %d dct coefficients to %d..%d\n",
1825                overflow, minlevel, maxlevel);
1826 }
1827
1828 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1829 {
1830     int x, y;
1831     // FIXME optimize
1832     for (y = 0; y < 8; y++) {
1833         for (x = 0; x < 8; x++) {
1834             int x2, y2;
1835             int sum = 0;
1836             int sqr = 0;
1837             int count = 0;
1838
1839             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1840                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1841                     int v = ptr[x2 + y2 * stride];
1842                     sum += v;
1843                     sqr += v * v;
1844                     count++;
1845                 }
1846             }
1847             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1848         }
1849     }
1850 }
1851
1852 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1853                                                 int motion_x, int motion_y,
1854                                                 int mb_block_height,
1855                                                 int mb_block_count)
1856 {
1857     int16_t weight[8][64];
1858     int16_t orig[8][64];
1859     const int mb_x = s->mb_x;
1860     const int mb_y = s->mb_y;
1861     int i;
1862     int skip_dct[8];
1863     int dct_offset = s->linesize * 8; // default for progressive frames
1864     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1865     ptrdiff_t wrap_y, wrap_c;
1866
1867     for (i = 0; i < mb_block_count; i++)
1868         skip_dct[i] = s->skipdct;
1869
1870     if (s->adaptive_quant) {
1871         const int last_qp = s->qscale;
1872         const int mb_xy = mb_x + mb_y * s->mb_stride;
1873
1874         s->lambda = s->lambda_table[mb_xy];
1875         update_qscale(s);
1876
1877         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1878             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1879             s->dquant = s->qscale - last_qp;
1880
1881             if (s->out_format == FMT_H263) {
1882                 s->dquant = av_clip(s->dquant, -2, 2);
1883
1884                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1885                     if (!s->mb_intra) {
1886                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1887                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1888                                 s->dquant = 0;
1889                         }
1890                         if (s->mv_type == MV_TYPE_8X8)
1891                             s->dquant = 0;
1892                     }
1893                 }
1894             }
1895         }
1896         ff_set_qscale(s, last_qp + s->dquant);
1897     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1898         ff_set_qscale(s, s->qscale + s->dquant);
1899
1900     wrap_y = s->linesize;
1901     wrap_c = s->uvlinesize;
1902     ptr_y  = s->new_picture.f->data[0] +
1903              (mb_y * 16 * wrap_y)              + mb_x * 16;
1904     ptr_cb = s->new_picture.f->data[1] +
1905              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1906     ptr_cr = s->new_picture.f->data[2] +
1907              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1908
1909     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1910         uint8_t *ebuf = s->edge_emu_buffer + 32;
1911         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1912                                  wrap_y, wrap_y,
1913                                  16, 16, mb_x * 16, mb_y * 16,
1914                                  s->width, s->height);
1915         ptr_y = ebuf;
1916         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1917                                  wrap_c, wrap_c,
1918                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1919                                  s->width >> 1, s->height >> 1);
1920         ptr_cb = ebuf + 18 * wrap_y;
1921         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1922                                  wrap_c, wrap_c,
1923                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1924                                  s->width >> 1, s->height >> 1);
1925         ptr_cr = ebuf + 18 * wrap_y + 8;
1926     }
1927
1928     if (s->mb_intra) {
1929         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1930             int progressive_score, interlaced_score;
1931
1932             s->interlaced_dct = 0;
1933             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
1934                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1935                                                      NULL, wrap_y, 8) - 400;
1936
1937             if (progressive_score > 0) {
1938                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
1939                                                         NULL, wrap_y * 2, 8) +
1940                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
1941                                                         NULL, wrap_y * 2, 8);
1942                 if (progressive_score > interlaced_score) {
1943                     s->interlaced_dct = 1;
1944
1945                     dct_offset = wrap_y;
1946                     wrap_y <<= 1;
1947                     if (s->chroma_format == CHROMA_422)
1948                         wrap_c <<= 1;
1949                 }
1950             }
1951         }
1952
1953         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
1954         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
1955         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
1956         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
1957
1958         if (s->flags & CODEC_FLAG_GRAY) {
1959             skip_dct[4] = 1;
1960             skip_dct[5] = 1;
1961         } else {
1962             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1963             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1964             if (!s->chroma_y_shift) { /* 422 */
1965                 s->pdsp.get_pixels(s->block[6],
1966                                    ptr_cb + (dct_offset >> 1), wrap_c);
1967                 s->pdsp.get_pixels(s->block[7],
1968                                    ptr_cr + (dct_offset >> 1), wrap_c);
1969             }
1970         }
1971     } else {
1972         op_pixels_func (*op_pix)[4];
1973         qpel_mc_func (*op_qpix)[16];
1974         uint8_t *dest_y, *dest_cb, *dest_cr;
1975
1976         dest_y  = s->dest[0];
1977         dest_cb = s->dest[1];
1978         dest_cr = s->dest[2];
1979
1980         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1981             op_pix  = s->hdsp.put_pixels_tab;
1982             op_qpix = s->qdsp.put_qpel_pixels_tab;
1983         } else {
1984             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1985             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
1986         }
1987
1988         if (s->mv_dir & MV_DIR_FORWARD) {
1989             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1990                           s->last_picture.f->data,
1991                           op_pix, op_qpix);
1992             op_pix  = s->hdsp.avg_pixels_tab;
1993             op_qpix = s->qdsp.avg_qpel_pixels_tab;
1994         }
1995         if (s->mv_dir & MV_DIR_BACKWARD) {
1996             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1997                           s->next_picture.f->data,
1998                           op_pix, op_qpix);
1999         }
2000
2001         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2002             int progressive_score, interlaced_score;
2003
2004             s->interlaced_dct = 0;
2005             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2006                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2007                                                      ptr_y + wrap_y * 8,
2008                                                      wrap_y, 8) - 400;
2009
2010             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2011                 progressive_score -= 400;
2012
2013             if (progressive_score > 0) {
2014                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2015                                                         wrap_y * 2, 8) +
2016                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2017                                                         ptr_y + wrap_y,
2018                                                         wrap_y * 2, 8);
2019
2020                 if (progressive_score > interlaced_score) {
2021                     s->interlaced_dct = 1;
2022
2023                     dct_offset = wrap_y;
2024                     wrap_y <<= 1;
2025                     if (s->chroma_format == CHROMA_422)
2026                         wrap_c <<= 1;
2027                 }
2028             }
2029         }
2030
2031         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2032         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2033         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2034                             dest_y + dct_offset, wrap_y);
2035         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2036                             dest_y + dct_offset + 8, wrap_y);
2037
2038         if (s->flags & CODEC_FLAG_GRAY) {
2039             skip_dct[4] = 1;
2040             skip_dct[5] = 1;
2041         } else {
2042             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2043             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2044             if (!s->chroma_y_shift) { /* 422 */
2045                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2046                                     dest_cb + (dct_offset >> 1), wrap_c);
2047                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2048                                     dest_cr + (dct_offset >> 1), wrap_c);
2049             }
2050         }
2051         /* pre quantization */
2052         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2053                 2 * s->qscale * s->qscale) {
2054             // FIXME optimize
2055             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2056                 skip_dct[0] = 1;
2057             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2058                 skip_dct[1] = 1;
2059             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2060                                wrap_y, 8) < 20 * s->qscale)
2061                 skip_dct[2] = 1;
2062             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2063                                wrap_y, 8) < 20 * s->qscale)
2064                 skip_dct[3] = 1;
2065             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2066                 skip_dct[4] = 1;
2067             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2068                 skip_dct[5] = 1;
2069             if (!s->chroma_y_shift) { /* 422 */
2070                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2071                                    dest_cb + (dct_offset >> 1),
2072                                    wrap_c, 8) < 20 * s->qscale)
2073                     skip_dct[6] = 1;
2074                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2075                                    dest_cr + (dct_offset >> 1),
2076                                    wrap_c, 8) < 20 * s->qscale)
2077                     skip_dct[7] = 1;
2078             }
2079         }
2080     }
2081
2082     if (s->quantizer_noise_shaping) {
2083         if (!skip_dct[0])
2084             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2085         if (!skip_dct[1])
2086             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2087         if (!skip_dct[2])
2088             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2089         if (!skip_dct[3])
2090             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2091         if (!skip_dct[4])
2092             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2093         if (!skip_dct[5])
2094             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2095         if (!s->chroma_y_shift) { /* 422 */
2096             if (!skip_dct[6])
2097                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2098                                   wrap_c);
2099             if (!skip_dct[7])
2100                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2101                                   wrap_c);
2102         }
2103         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2104     }
2105
2106     /* DCT & quantize */
2107     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2108     {
2109         for (i = 0; i < mb_block_count; i++) {
2110             if (!skip_dct[i]) {
2111                 int overflow;
2112                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2113                 // FIXME we could decide to change to quantizer instead of
2114                 // clipping
2115                 // JS: I don't think that would be a good idea it could lower
2116                 //     quality instead of improve it. Just INTRADC clipping
2117                 //     deserves changes in quantizer
2118                 if (overflow)
2119                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2120             } else
2121                 s->block_last_index[i] = -1;
2122         }
2123         if (s->quantizer_noise_shaping) {
2124             for (i = 0; i < mb_block_count; i++) {
2125                 if (!skip_dct[i]) {
2126                     s->block_last_index[i] =
2127                         dct_quantize_refine(s, s->block[i], weight[i],
2128                                             orig[i], i, s->qscale);
2129                 }
2130             }
2131         }
2132
2133         if (s->luma_elim_threshold && !s->mb_intra)
2134             for (i = 0; i < 4; i++)
2135                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2136         if (s->chroma_elim_threshold && !s->mb_intra)
2137             for (i = 4; i < mb_block_count; i++)
2138                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2139
2140         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2141             for (i = 0; i < mb_block_count; i++) {
2142                 if (s->block_last_index[i] == -1)
2143                     s->coded_score[i] = INT_MAX / 256;
2144             }
2145         }
2146     }
2147
2148     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2149         s->block_last_index[4] =
2150         s->block_last_index[5] = 0;
2151         s->block[4][0] =
2152         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2153     }
2154
2155     // non c quantize code returns incorrect block_last_index FIXME
2156     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2157         for (i = 0; i < mb_block_count; i++) {
2158             int j;
2159             if (s->block_last_index[i] > 0) {
2160                 for (j = 63; j > 0; j--) {
2161                     if (s->block[i][s->intra_scantable.permutated[j]])
2162                         break;
2163                 }
2164                 s->block_last_index[i] = j;
2165             }
2166         }
2167     }
2168
2169     /* huffman encode */
2170     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2171     case AV_CODEC_ID_MPEG1VIDEO:
2172     case AV_CODEC_ID_MPEG2VIDEO:
2173         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2174             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2175         break;
2176     case AV_CODEC_ID_MPEG4:
2177         if (CONFIG_MPEG4_ENCODER)
2178             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2179         break;
2180     case AV_CODEC_ID_MSMPEG4V2:
2181     case AV_CODEC_ID_MSMPEG4V3:
2182     case AV_CODEC_ID_WMV1:
2183         if (CONFIG_MSMPEG4_ENCODER)
2184             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2185         break;
2186     case AV_CODEC_ID_WMV2:
2187         if (CONFIG_WMV2_ENCODER)
2188             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2189         break;
2190     case AV_CODEC_ID_H261:
2191         if (CONFIG_H261_ENCODER)
2192             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2193         break;
2194     case AV_CODEC_ID_H263:
2195     case AV_CODEC_ID_H263P:
2196     case AV_CODEC_ID_FLV1:
2197     case AV_CODEC_ID_RV10:
2198     case AV_CODEC_ID_RV20:
2199         if (CONFIG_H263_ENCODER)
2200             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2201         break;
2202     case AV_CODEC_ID_MJPEG:
2203         if (CONFIG_MJPEG_ENCODER)
2204             ff_mjpeg_encode_mb(s, s->block);
2205         break;
2206     default:
2207         assert(0);
2208     }
2209 }
2210
2211 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2212 {
2213     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2214     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2215 }
2216
2217 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2218     int i;
2219
2220     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2221
2222     /* mpeg1 */
2223     d->mb_skip_run= s->mb_skip_run;
2224     for(i=0; i<3; i++)
2225         d->last_dc[i] = s->last_dc[i];
2226
2227     /* statistics */
2228     d->mv_bits= s->mv_bits;
2229     d->i_tex_bits= s->i_tex_bits;
2230     d->p_tex_bits= s->p_tex_bits;
2231     d->i_count= s->i_count;
2232     d->f_count= s->f_count;
2233     d->b_count= s->b_count;
2234     d->skip_count= s->skip_count;
2235     d->misc_bits= s->misc_bits;
2236     d->last_bits= 0;
2237
2238     d->mb_skipped= 0;
2239     d->qscale= s->qscale;
2240     d->dquant= s->dquant;
2241
2242     d->esc3_level_length= s->esc3_level_length;
2243 }
2244
2245 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2246     int i;
2247
2248     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2249     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2250
2251     /* mpeg1 */
2252     d->mb_skip_run= s->mb_skip_run;
2253     for(i=0; i<3; i++)
2254         d->last_dc[i] = s->last_dc[i];
2255
2256     /* statistics */
2257     d->mv_bits= s->mv_bits;
2258     d->i_tex_bits= s->i_tex_bits;
2259     d->p_tex_bits= s->p_tex_bits;
2260     d->i_count= s->i_count;
2261     d->f_count= s->f_count;
2262     d->b_count= s->b_count;
2263     d->skip_count= s->skip_count;
2264     d->misc_bits= s->misc_bits;
2265
2266     d->mb_intra= s->mb_intra;
2267     d->mb_skipped= s->mb_skipped;
2268     d->mv_type= s->mv_type;
2269     d->mv_dir= s->mv_dir;
2270     d->pb= s->pb;
2271     if(s->data_partitioning){
2272         d->pb2= s->pb2;
2273         d->tex_pb= s->tex_pb;
2274     }
2275     d->block= s->block;
2276     for(i=0; i<8; i++)
2277         d->block_last_index[i]= s->block_last_index[i];
2278     d->interlaced_dct= s->interlaced_dct;
2279     d->qscale= s->qscale;
2280
2281     d->esc3_level_length= s->esc3_level_length;
2282 }
2283
2284 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2285                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2286                            int *dmin, int *next_block, int motion_x, int motion_y)
2287 {
2288     int score;
2289     uint8_t *dest_backup[3];
2290
2291     copy_context_before_encode(s, backup, type);
2292
2293     s->block= s->blocks[*next_block];
2294     s->pb= pb[*next_block];
2295     if(s->data_partitioning){
2296         s->pb2   = pb2   [*next_block];
2297         s->tex_pb= tex_pb[*next_block];
2298     }
2299
2300     if(*next_block){
2301         memcpy(dest_backup, s->dest, sizeof(s->dest));
2302         s->dest[0] = s->rd_scratchpad;
2303         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2304         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2305         assert(s->linesize >= 32); //FIXME
2306     }
2307
2308     encode_mb(s, motion_x, motion_y);
2309
2310     score= put_bits_count(&s->pb);
2311     if(s->data_partitioning){
2312         score+= put_bits_count(&s->pb2);
2313         score+= put_bits_count(&s->tex_pb);
2314     }
2315
2316     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2317         ff_MPV_decode_mb(s, s->block);
2318
2319         score *= s->lambda2;
2320         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2321     }
2322
2323     if(*next_block){
2324         memcpy(s->dest, dest_backup, sizeof(s->dest));
2325     }
2326
2327     if(score<*dmin){
2328         *dmin= score;
2329         *next_block^=1;
2330
2331         copy_context_after_encode(best, s, type);
2332     }
2333 }
2334
2335 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2336     uint32_t *sq = ff_square_tab + 256;
2337     int acc=0;
2338     int x,y;
2339
2340     if(w==16 && h==16)
2341         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2342     else if(w==8 && h==8)
2343         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2344
2345     for(y=0; y<h; y++){
2346         for(x=0; x<w; x++){
2347             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2348         }
2349     }
2350
2351     assert(acc>=0);
2352
2353     return acc;
2354 }
2355
2356 static int sse_mb(MpegEncContext *s){
2357     int w= 16;
2358     int h= 16;
2359
2360     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2361     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2362
2363     if(w==16 && h==16)
2364       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2365         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2366                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2367                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2368       }else{
2369         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2370                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2371                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2372       }
2373     else
2374         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2375                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2376                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2377 }
2378
2379 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2380     MpegEncContext *s= *(void**)arg;
2381
2382
2383     s->me.pre_pass=1;
2384     s->me.dia_size= s->avctx->pre_dia_size;
2385     s->first_slice_line=1;
2386     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2387         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2388             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2389         }
2390         s->first_slice_line=0;
2391     }
2392
2393     s->me.pre_pass=0;
2394
2395     return 0;
2396 }
2397
2398 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2399     MpegEncContext *s= *(void**)arg;
2400
2401     s->me.dia_size= s->avctx->dia_size;
2402     s->first_slice_line=1;
2403     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2404         s->mb_x=0; //for block init below
2405         ff_init_block_index(s);
2406         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2407             s->block_index[0]+=2;
2408             s->block_index[1]+=2;
2409             s->block_index[2]+=2;
2410             s->block_index[3]+=2;
2411
2412             /* compute motion vector & mb_type and store in context */
2413             if(s->pict_type==AV_PICTURE_TYPE_B)
2414                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2415             else
2416                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2417         }
2418         s->first_slice_line=0;
2419     }
2420     return 0;
2421 }
2422
2423 static int mb_var_thread(AVCodecContext *c, void *arg){
2424     MpegEncContext *s= *(void**)arg;
2425     int mb_x, mb_y;
2426
2427     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2428         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2429             int xx = mb_x * 16;
2430             int yy = mb_y * 16;
2431             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2432             int varc;
2433             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2434
2435             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2436                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2437
2438             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2439             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2440             s->me.mb_var_sum_temp    += varc;
2441         }
2442     }
2443     return 0;
2444 }
2445
2446 static void write_slice_end(MpegEncContext *s){
2447     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2448         if(s->partitioned_frame){
2449             ff_mpeg4_merge_partitions(s);
2450         }
2451
2452         ff_mpeg4_stuffing(&s->pb);
2453     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2454         ff_mjpeg_encode_stuffing(&s->pb);
2455     }
2456
2457     avpriv_align_put_bits(&s->pb);
2458     flush_put_bits(&s->pb);
2459
2460     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2461         s->misc_bits+= get_bits_diff(s);
2462 }
2463
2464 static void write_mb_info(MpegEncContext *s)
2465 {
2466     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2467     int offset = put_bits_count(&s->pb);
2468     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2469     int gobn = s->mb_y / s->gob_index;
2470     int pred_x, pred_y;
2471     if (CONFIG_H263_ENCODER)
2472         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2473     bytestream_put_le32(&ptr, offset);
2474     bytestream_put_byte(&ptr, s->qscale);
2475     bytestream_put_byte(&ptr, gobn);
2476     bytestream_put_le16(&ptr, mba);
2477     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2478     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2479     /* 4MV not implemented */
2480     bytestream_put_byte(&ptr, 0); /* hmv2 */
2481     bytestream_put_byte(&ptr, 0); /* vmv2 */
2482 }
2483
2484 static void update_mb_info(MpegEncContext *s, int startcode)
2485 {
2486     if (!s->mb_info)
2487         return;
2488     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2489         s->mb_info_size += 12;
2490         s->prev_mb_info = s->last_mb_info;
2491     }
2492     if (startcode) {
2493         s->prev_mb_info = put_bits_count(&s->pb)/8;
2494         /* This might have incremented mb_info_size above, and we return without
2495          * actually writing any info into that slot yet. But in that case,
2496          * this will be called again at the start of the after writing the
2497          * start code, actually writing the mb info. */
2498         return;
2499     }
2500
2501     s->last_mb_info = put_bits_count(&s->pb)/8;
2502     if (!s->mb_info_size)
2503         s->mb_info_size += 12;
2504     write_mb_info(s);
2505 }
2506
2507 static int encode_thread(AVCodecContext *c, void *arg){
2508     MpegEncContext *s= *(void**)arg;
2509     int mb_x, mb_y, pdif = 0;
2510     int chr_h= 16>>s->chroma_y_shift;
2511     int i, j;
2512     MpegEncContext best_s, backup_s;
2513     uint8_t bit_buf[2][MAX_MB_BYTES];
2514     uint8_t bit_buf2[2][MAX_MB_BYTES];
2515     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2516     PutBitContext pb[2], pb2[2], tex_pb[2];
2517
2518     for(i=0; i<2; i++){
2519         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2520         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2521         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2522     }
2523
2524     s->last_bits= put_bits_count(&s->pb);
2525     s->mv_bits=0;
2526     s->misc_bits=0;
2527     s->i_tex_bits=0;
2528     s->p_tex_bits=0;
2529     s->i_count=0;
2530     s->f_count=0;
2531     s->b_count=0;
2532     s->skip_count=0;
2533
2534     for(i=0; i<3; i++){
2535         /* init last dc values */
2536         /* note: quant matrix value (8) is implied here */
2537         s->last_dc[i] = 128 << s->intra_dc_precision;
2538
2539         s->current_picture.f->error[i] = 0;
2540     }
2541     s->mb_skip_run = 0;
2542     memset(s->last_mv, 0, sizeof(s->last_mv));
2543
2544     s->last_mv_dir = 0;
2545
2546     switch(s->codec_id){
2547     case AV_CODEC_ID_H263:
2548     case AV_CODEC_ID_H263P:
2549     case AV_CODEC_ID_FLV1:
2550         if (CONFIG_H263_ENCODER)
2551             s->gob_index = ff_h263_get_gob_height(s);
2552         break;
2553     case AV_CODEC_ID_MPEG4:
2554         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2555             ff_mpeg4_init_partitions(s);
2556         break;
2557     }
2558
2559     s->resync_mb_x=0;
2560     s->resync_mb_y=0;
2561     s->first_slice_line = 1;
2562     s->ptr_lastgob = s->pb.buf;
2563     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2564         s->mb_x=0;
2565         s->mb_y= mb_y;
2566
2567         ff_set_qscale(s, s->qscale);
2568         ff_init_block_index(s);
2569
2570         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2571             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2572             int mb_type= s->mb_type[xy];
2573 //            int d;
2574             int dmin= INT_MAX;
2575             int dir;
2576
2577             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2578                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2579                 return -1;
2580             }
2581             if(s->data_partitioning){
2582                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2583                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2584                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2585                     return -1;
2586                 }
2587             }
2588
2589             s->mb_x = mb_x;
2590             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2591             ff_update_block_index(s);
2592
2593             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2594                 ff_h261_reorder_mb_index(s);
2595                 xy= s->mb_y*s->mb_stride + s->mb_x;
2596                 mb_type= s->mb_type[xy];
2597             }
2598
2599             /* write gob / video packet header  */
2600             if(s->rtp_mode){
2601                 int current_packet_size, is_gob_start;
2602
2603                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2604
2605                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2606
2607                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2608
2609                 switch(s->codec_id){
2610                 case AV_CODEC_ID_H263:
2611                 case AV_CODEC_ID_H263P:
2612                     if(!s->h263_slice_structured)
2613                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2614                     break;
2615                 case AV_CODEC_ID_MPEG2VIDEO:
2616                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2617                 case AV_CODEC_ID_MPEG1VIDEO:
2618                     if(s->mb_skip_run) is_gob_start=0;
2619                     break;
2620                 }
2621
2622                 if(is_gob_start){
2623                     if(s->start_mb_y != mb_y || mb_x!=0){
2624                         write_slice_end(s);
2625
2626                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2627                             ff_mpeg4_init_partitions(s);
2628                         }
2629                     }
2630
2631                     assert((put_bits_count(&s->pb)&7) == 0);
2632                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2633
2634                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2635                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2636                         int d = 100 / s->error_rate;
2637                         if(r % d == 0){
2638                             current_packet_size=0;
2639                             s->pb.buf_ptr= s->ptr_lastgob;
2640                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2641                         }
2642                     }
2643
2644                     if (s->avctx->rtp_callback){
2645                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2646                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2647                     }
2648                     update_mb_info(s, 1);
2649
2650                     switch(s->codec_id){
2651                     case AV_CODEC_ID_MPEG4:
2652                         if (CONFIG_MPEG4_ENCODER) {
2653                             ff_mpeg4_encode_video_packet_header(s);
2654                             ff_mpeg4_clean_buffers(s);
2655                         }
2656                     break;
2657                     case AV_CODEC_ID_MPEG1VIDEO:
2658                     case AV_CODEC_ID_MPEG2VIDEO:
2659                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2660                             ff_mpeg1_encode_slice_header(s);
2661                             ff_mpeg1_clean_buffers(s);
2662                         }
2663                     break;
2664                     case AV_CODEC_ID_H263:
2665                     case AV_CODEC_ID_H263P:
2666                         if (CONFIG_H263_ENCODER)
2667                             ff_h263_encode_gob_header(s, mb_y);
2668                     break;
2669                     }
2670
2671                     if(s->flags&CODEC_FLAG_PASS1){
2672                         int bits= put_bits_count(&s->pb);
2673                         s->misc_bits+= bits - s->last_bits;
2674                         s->last_bits= bits;
2675                     }
2676
2677                     s->ptr_lastgob += current_packet_size;
2678                     s->first_slice_line=1;
2679                     s->resync_mb_x=mb_x;
2680                     s->resync_mb_y=mb_y;
2681                 }
2682             }
2683
2684             if(  (s->resync_mb_x   == s->mb_x)
2685                && s->resync_mb_y+1 == s->mb_y){
2686                 s->first_slice_line=0;
2687             }
2688
2689             s->mb_skipped=0;
2690             s->dquant=0; //only for QP_RD
2691
2692             update_mb_info(s, 0);
2693
2694             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2695                 int next_block=0;
2696                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2697
2698                 copy_context_before_encode(&backup_s, s, -1);
2699                 backup_s.pb= s->pb;
2700                 best_s.data_partitioning= s->data_partitioning;
2701                 best_s.partitioned_frame= s->partitioned_frame;
2702                 if(s->data_partitioning){
2703                     backup_s.pb2= s->pb2;
2704                     backup_s.tex_pb= s->tex_pb;
2705                 }
2706
2707                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2708                     s->mv_dir = MV_DIR_FORWARD;
2709                     s->mv_type = MV_TYPE_16X16;
2710                     s->mb_intra= 0;
2711                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2712                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2713                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2714                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2715                 }
2716                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2717                     s->mv_dir = MV_DIR_FORWARD;
2718                     s->mv_type = MV_TYPE_FIELD;
2719                     s->mb_intra= 0;
2720                     for(i=0; i<2; i++){
2721                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2722                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2723                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2724                     }
2725                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2726                                  &dmin, &next_block, 0, 0);
2727                 }
2728                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2729                     s->mv_dir = MV_DIR_FORWARD;
2730                     s->mv_type = MV_TYPE_16X16;
2731                     s->mb_intra= 0;
2732                     s->mv[0][0][0] = 0;
2733                     s->mv[0][0][1] = 0;
2734                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2735                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2736                 }
2737                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2738                     s->mv_dir = MV_DIR_FORWARD;
2739                     s->mv_type = MV_TYPE_8X8;
2740                     s->mb_intra= 0;
2741                     for(i=0; i<4; i++){
2742                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2743                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2744                     }
2745                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2746                                  &dmin, &next_block, 0, 0);
2747                 }
2748                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2749                     s->mv_dir = MV_DIR_FORWARD;
2750                     s->mv_type = MV_TYPE_16X16;
2751                     s->mb_intra= 0;
2752                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2753                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2754                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2755                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2756                 }
2757                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2758                     s->mv_dir = MV_DIR_BACKWARD;
2759                     s->mv_type = MV_TYPE_16X16;
2760                     s->mb_intra= 0;
2761                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2762                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2763                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2764                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2765                 }
2766                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2767                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2768                     s->mv_type = MV_TYPE_16X16;
2769                     s->mb_intra= 0;
2770                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2771                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2772                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2773                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2774                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2775                                  &dmin, &next_block, 0, 0);
2776                 }
2777                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2778                     s->mv_dir = MV_DIR_FORWARD;
2779                     s->mv_type = MV_TYPE_FIELD;
2780                     s->mb_intra= 0;
2781                     for(i=0; i<2; i++){
2782                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2783                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2784                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2785                     }
2786                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2787                                  &dmin, &next_block, 0, 0);
2788                 }
2789                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2790                     s->mv_dir = MV_DIR_BACKWARD;
2791                     s->mv_type = MV_TYPE_FIELD;
2792                     s->mb_intra= 0;
2793                     for(i=0; i<2; i++){
2794                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2795                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2796                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2797                     }
2798                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2799                                  &dmin, &next_block, 0, 0);
2800                 }
2801                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2802                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2803                     s->mv_type = MV_TYPE_FIELD;
2804                     s->mb_intra= 0;
2805                     for(dir=0; dir<2; dir++){
2806                         for(i=0; i<2; i++){
2807                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2808                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2809                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2810                         }
2811                     }
2812                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2813                                  &dmin, &next_block, 0, 0);
2814                 }
2815                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2816                     s->mv_dir = 0;
2817                     s->mv_type = MV_TYPE_16X16;
2818                     s->mb_intra= 1;
2819                     s->mv[0][0][0] = 0;
2820                     s->mv[0][0][1] = 0;
2821                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2822                                  &dmin, &next_block, 0, 0);
2823                     if(s->h263_pred || s->h263_aic){
2824                         if(best_s.mb_intra)
2825                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2826                         else
2827                             ff_clean_intra_table_entries(s); //old mode?
2828                     }
2829                 }
2830
2831                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2832                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2833                         const int last_qp= backup_s.qscale;
2834                         int qpi, qp, dc[6];
2835                         int16_t ac[6][16];
2836                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2837                         static const int dquant_tab[4]={-1,1,-2,2};
2838
2839                         assert(backup_s.dquant == 0);
2840
2841                         //FIXME intra
2842                         s->mv_dir= best_s.mv_dir;
2843                         s->mv_type = MV_TYPE_16X16;
2844                         s->mb_intra= best_s.mb_intra;
2845                         s->mv[0][0][0] = best_s.mv[0][0][0];
2846                         s->mv[0][0][1] = best_s.mv[0][0][1];
2847                         s->mv[1][0][0] = best_s.mv[1][0][0];
2848                         s->mv[1][0][1] = best_s.mv[1][0][1];
2849
2850                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2851                         for(; qpi<4; qpi++){
2852                             int dquant= dquant_tab[qpi];
2853                             qp= last_qp + dquant;
2854                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2855                                 continue;
2856                             backup_s.dquant= dquant;
2857                             if(s->mb_intra && s->dc_val[0]){
2858                                 for(i=0; i<6; i++){
2859                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2860                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2861                                 }
2862                             }
2863
2864                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2865                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2866                             if(best_s.qscale != qp){
2867                                 if(s->mb_intra && s->dc_val[0]){
2868                                     for(i=0; i<6; i++){
2869                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2870                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2871                                     }
2872                                 }
2873                             }
2874                         }
2875                     }
2876                 }
2877                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2878                     int mx= s->b_direct_mv_table[xy][0];
2879                     int my= s->b_direct_mv_table[xy][1];
2880
2881                     backup_s.dquant = 0;
2882                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2883                     s->mb_intra= 0;
2884                     ff_mpeg4_set_direct_mv(s, mx, my);
2885                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2886                                  &dmin, &next_block, mx, my);
2887                 }
2888                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2889                     backup_s.dquant = 0;
2890                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2891                     s->mb_intra= 0;
2892                     ff_mpeg4_set_direct_mv(s, 0, 0);
2893                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2894                                  &dmin, &next_block, 0, 0);
2895                 }
2896                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2897                     int coded=0;
2898                     for(i=0; i<6; i++)
2899                         coded |= s->block_last_index[i];
2900                     if(coded){
2901                         int mx,my;
2902                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2903                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2904                             mx=my=0; //FIXME find the one we actually used
2905                             ff_mpeg4_set_direct_mv(s, mx, my);
2906                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2907                             mx= s->mv[1][0][0];
2908                             my= s->mv[1][0][1];
2909                         }else{
2910                             mx= s->mv[0][0][0];
2911                             my= s->mv[0][0][1];
2912                         }
2913
2914                         s->mv_dir= best_s.mv_dir;
2915                         s->mv_type = best_s.mv_type;
2916                         s->mb_intra= 0;
2917 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2918                         s->mv[0][0][1] = best_s.mv[0][0][1];
2919                         s->mv[1][0][0] = best_s.mv[1][0][0];
2920                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2921                         backup_s.dquant= 0;
2922                         s->skipdct=1;
2923                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2924                                         &dmin, &next_block, mx, my);
2925                         s->skipdct=0;
2926                     }
2927                 }
2928
2929                 s->current_picture.qscale_table[xy] = best_s.qscale;
2930
2931                 copy_context_after_encode(s, &best_s, -1);
2932
2933                 pb_bits_count= put_bits_count(&s->pb);
2934                 flush_put_bits(&s->pb);
2935                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2936                 s->pb= backup_s.pb;
2937
2938                 if(s->data_partitioning){
2939                     pb2_bits_count= put_bits_count(&s->pb2);
2940                     flush_put_bits(&s->pb2);
2941                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2942                     s->pb2= backup_s.pb2;
2943
2944                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2945                     flush_put_bits(&s->tex_pb);
2946                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2947                     s->tex_pb= backup_s.tex_pb;
2948                 }
2949                 s->last_bits= put_bits_count(&s->pb);
2950
2951                 if (CONFIG_H263_ENCODER &&
2952                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2953                     ff_h263_update_motion_val(s);
2954
2955                 if(next_block==0){ //FIXME 16 vs linesize16
2956                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2957                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2958                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2959                 }
2960
2961                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2962                     ff_MPV_decode_mb(s, s->block);
2963             } else {
2964                 int motion_x = 0, motion_y = 0;
2965                 s->mv_type=MV_TYPE_16X16;
2966                 // only one MB-Type possible
2967
2968                 switch(mb_type){
2969                 case CANDIDATE_MB_TYPE_INTRA:
2970                     s->mv_dir = 0;
2971                     s->mb_intra= 1;
2972                     motion_x= s->mv[0][0][0] = 0;
2973                     motion_y= s->mv[0][0][1] = 0;
2974                     break;
2975                 case CANDIDATE_MB_TYPE_INTER:
2976                     s->mv_dir = MV_DIR_FORWARD;
2977                     s->mb_intra= 0;
2978                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2979                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2980                     break;
2981                 case CANDIDATE_MB_TYPE_INTER_I:
2982                     s->mv_dir = MV_DIR_FORWARD;
2983                     s->mv_type = MV_TYPE_FIELD;
2984                     s->mb_intra= 0;
2985                     for(i=0; i<2; i++){
2986                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2987                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2988                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2989                     }
2990                     break;
2991                 case CANDIDATE_MB_TYPE_INTER4V:
2992                     s->mv_dir = MV_DIR_FORWARD;
2993                     s->mv_type = MV_TYPE_8X8;
2994                     s->mb_intra= 0;
2995                     for(i=0; i<4; i++){
2996                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2997                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2998                     }
2999                     break;
3000                 case CANDIDATE_MB_TYPE_DIRECT:
3001                     if (CONFIG_MPEG4_ENCODER) {
3002                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3003                         s->mb_intra= 0;
3004                         motion_x=s->b_direct_mv_table[xy][0];
3005                         motion_y=s->b_direct_mv_table[xy][1];
3006                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3007                     }
3008                     break;
3009                 case CANDIDATE_MB_TYPE_DIRECT0:
3010                     if (CONFIG_MPEG4_ENCODER) {
3011                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3012                         s->mb_intra= 0;
3013                         ff_mpeg4_set_direct_mv(s, 0, 0);
3014                     }
3015                     break;
3016                 case CANDIDATE_MB_TYPE_BIDIR:
3017                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3018                     s->mb_intra= 0;
3019                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3020                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3021                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3022                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3023                     break;
3024                 case CANDIDATE_MB_TYPE_BACKWARD:
3025                     s->mv_dir = MV_DIR_BACKWARD;
3026                     s->mb_intra= 0;
3027                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3028                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3029                     break;
3030                 case CANDIDATE_MB_TYPE_FORWARD:
3031                     s->mv_dir = MV_DIR_FORWARD;
3032                     s->mb_intra= 0;
3033                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3034                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3035                     break;
3036                 case CANDIDATE_MB_TYPE_FORWARD_I:
3037                     s->mv_dir = MV_DIR_FORWARD;
3038                     s->mv_type = MV_TYPE_FIELD;
3039                     s->mb_intra= 0;
3040                     for(i=0; i<2; i++){
3041                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3042                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3043                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3044                     }
3045                     break;
3046                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3047                     s->mv_dir = MV_DIR_BACKWARD;
3048                     s->mv_type = MV_TYPE_FIELD;
3049                     s->mb_intra= 0;
3050                     for(i=0; i<2; i++){
3051                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3052                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3053                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3054                     }
3055                     break;
3056                 case CANDIDATE_MB_TYPE_BIDIR_I:
3057                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3058                     s->mv_type = MV_TYPE_FIELD;
3059                     s->mb_intra= 0;
3060                     for(dir=0; dir<2; dir++){
3061                         for(i=0; i<2; i++){
3062                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3063                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3064                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3065                         }
3066                     }
3067                     break;
3068                 default:
3069                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3070                 }
3071
3072                 encode_mb(s, motion_x, motion_y);
3073
3074                 // RAL: Update last macroblock type
3075                 s->last_mv_dir = s->mv_dir;
3076
3077                 if (CONFIG_H263_ENCODER &&
3078                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3079                     ff_h263_update_motion_val(s);
3080
3081                 ff_MPV_decode_mb(s, s->block);
3082             }
3083
3084             /* clean the MV table in IPS frames for direct mode in B frames */
3085             if(s->mb_intra /* && I,P,S_TYPE */){
3086                 s->p_mv_table[xy][0]=0;
3087                 s->p_mv_table[xy][1]=0;
3088             }
3089
3090             if(s->flags&CODEC_FLAG_PSNR){
3091                 int w= 16;
3092                 int h= 16;
3093
3094                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3095                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3096
3097                 s->current_picture.f->error[0] += sse(
3098                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3099                     s->dest[0], w, h, s->linesize);
3100                 s->current_picture.f->error[1] += sse(
3101                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3102                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3103                 s->current_picture.f->error[2] += sse(
3104                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3105                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3106             }
3107             if(s->loop_filter){
3108                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3109                     ff_h263_loop_filter(s);
3110             }
3111             av_dlog(s->avctx, "MB %d %d bits\n",
3112                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3113         }
3114     }
3115
3116     //not beautiful here but we must write it before flushing so it has to be here
3117     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3118         ff_msmpeg4_encode_ext_header(s);
3119
3120     write_slice_end(s);
3121
3122     /* Send the last GOB if RTP */
3123     if (s->avctx->rtp_callback) {
3124         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3125         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3126         /* Call the RTP callback to send the last GOB */
3127         emms_c();
3128         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3129     }
3130
3131     return 0;
3132 }
3133
3134 #define MERGE(field) dst->field += src->field; src->field=0
3135 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3136     MERGE(me.scene_change_score);
3137     MERGE(me.mc_mb_var_sum_temp);
3138     MERGE(me.mb_var_sum_temp);
3139 }
3140
3141 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3142     int i;
3143
3144     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3145     MERGE(dct_count[1]);
3146     MERGE(mv_bits);
3147     MERGE(i_tex_bits);
3148     MERGE(p_tex_bits);
3149     MERGE(i_count);
3150     MERGE(f_count);
3151     MERGE(b_count);
3152     MERGE(skip_count);
3153     MERGE(misc_bits);
3154     MERGE(er.error_count);
3155     MERGE(padding_bug_score);
3156     MERGE(current_picture.f->error[0]);
3157     MERGE(current_picture.f->error[1]);
3158     MERGE(current_picture.f->error[2]);
3159
3160     if(dst->avctx->noise_reduction){
3161         for(i=0; i<64; i++){
3162             MERGE(dct_error_sum[0][i]);
3163             MERGE(dct_error_sum[1][i]);
3164         }
3165     }
3166
3167     assert(put_bits_count(&src->pb) % 8 ==0);
3168     assert(put_bits_count(&dst->pb) % 8 ==0);
3169     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3170     flush_put_bits(&dst->pb);
3171 }
3172
3173 static int estimate_qp(MpegEncContext *s, int dry_run){
3174     if (s->next_lambda){
3175         s->current_picture_ptr->f->quality =
3176         s->current_picture.f->quality = s->next_lambda;
3177         if(!dry_run) s->next_lambda= 0;
3178     } else if (!s->fixed_qscale) {
3179         s->current_picture_ptr->f->quality =
3180         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3181         if (s->current_picture.f->quality < 0)
3182             return -1;
3183     }
3184
3185     if(s->adaptive_quant){
3186         switch(s->codec_id){
3187         case AV_CODEC_ID_MPEG4:
3188             if (CONFIG_MPEG4_ENCODER)
3189                 ff_clean_mpeg4_qscales(s);
3190             break;
3191         case AV_CODEC_ID_H263:
3192         case AV_CODEC_ID_H263P:
3193         case AV_CODEC_ID_FLV1:
3194             if (CONFIG_H263_ENCODER)
3195                 ff_clean_h263_qscales(s);
3196             break;
3197         default:
3198             ff_init_qscale_tab(s);
3199         }
3200
3201         s->lambda= s->lambda_table[0];
3202         //FIXME broken
3203     }else
3204         s->lambda = s->current_picture.f->quality;
3205     update_qscale(s);
3206     return 0;
3207 }
3208
3209 /* must be called before writing the header */
3210 static void set_frame_distances(MpegEncContext * s){
3211     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3212     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3213
3214     if(s->pict_type==AV_PICTURE_TYPE_B){
3215         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3216         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3217     }else{
3218         s->pp_time= s->time - s->last_non_b_time;
3219         s->last_non_b_time= s->time;
3220         assert(s->picture_number==0 || s->pp_time > 0);
3221     }
3222 }
3223
3224 static int encode_picture(MpegEncContext *s, int picture_number)
3225 {
3226     int i, ret;
3227     int bits;
3228     int context_count = s->slice_context_count;
3229
3230     s->picture_number = picture_number;
3231
3232     /* Reset the average MB variance */
3233     s->me.mb_var_sum_temp    =
3234     s->me.mc_mb_var_sum_temp = 0;
3235
3236     /* we need to initialize some time vars before we can encode b-frames */
3237     // RAL: Condition added for MPEG1VIDEO
3238     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3239         set_frame_distances(s);
3240     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3241         ff_set_mpeg4_time(s);
3242
3243     s->me.scene_change_score=0;
3244
3245 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3246
3247     if(s->pict_type==AV_PICTURE_TYPE_I){
3248         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3249         else                        s->no_rounding=0;
3250     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3251         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3252             s->no_rounding ^= 1;
3253     }
3254
3255     if(s->flags & CODEC_FLAG_PASS2){
3256         if (estimate_qp(s,1) < 0)
3257             return -1;
3258         ff_get_2pass_fcode(s);
3259     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3260         if(s->pict_type==AV_PICTURE_TYPE_B)
3261             s->lambda= s->last_lambda_for[s->pict_type];
3262         else
3263             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3264         update_qscale(s);
3265     }
3266
3267     s->mb_intra=0; //for the rate distortion & bit compare functions
3268     for(i=1; i<context_count; i++){
3269         ret = ff_update_duplicate_context(s->thread_context[i], s);
3270         if (ret < 0)
3271             return ret;
3272     }
3273
3274     if(ff_init_me(s)<0)
3275         return -1;
3276
3277     /* Estimate motion for every MB */
3278     if(s->pict_type != AV_PICTURE_TYPE_I){
3279         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3280         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3281         if (s->pict_type != AV_PICTURE_TYPE_B) {
3282             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3283                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3284             }
3285         }
3286
3287         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3288     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3289         /* I-Frame */
3290         for(i=0; i<s->mb_stride*s->mb_height; i++)
3291             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3292
3293         if(!s->fixed_qscale){
3294             /* finding spatial complexity for I-frame rate control */
3295             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3296         }
3297     }
3298     for(i=1; i<context_count; i++){
3299         merge_context_after_me(s, s->thread_context[i]);
3300     }
3301     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3302     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3303     emms_c();
3304
3305     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3306         s->pict_type= AV_PICTURE_TYPE_I;
3307         for(i=0; i<s->mb_stride*s->mb_height; i++)
3308             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3309         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3310                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3311     }
3312
3313     if(!s->umvplus){
3314         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3315             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3316
3317             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3318                 int a,b;
3319                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3320                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3321                 s->f_code= FFMAX3(s->f_code, a, b);
3322             }
3323
3324             ff_fix_long_p_mvs(s);
3325             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3326             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3327                 int j;
3328                 for(i=0; i<2; i++){
3329                     for(j=0; j<2; j++)
3330                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3331                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3332                 }
3333             }
3334         }
3335
3336         if(s->pict_type==AV_PICTURE_TYPE_B){
3337             int a, b;
3338
3339             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3340             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3341             s->f_code = FFMAX(a, b);
3342
3343             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3344             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3345             s->b_code = FFMAX(a, b);
3346
3347             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3348             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3349             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3350             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3351             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3352                 int dir, j;
3353                 for(dir=0; dir<2; dir++){
3354                     for(i=0; i<2; i++){
3355                         for(j=0; j<2; j++){
3356                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3357                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3358                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3359                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3360                         }
3361                     }
3362                 }
3363             }
3364         }
3365     }
3366
3367     if (estimate_qp(s, 0) < 0)
3368         return -1;
3369
3370     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3371         s->qscale= 3; //reduce clipping problems
3372
3373     if (s->out_format == FMT_MJPEG) {
3374         /* for mjpeg, we do include qscale in the matrix */
3375         for(i=1;i<64;i++){
3376             int j = s->idsp.idct_permutation[i];
3377
3378             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3379         }
3380         s->y_dc_scale_table=
3381         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3382         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3383         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3384                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3385         s->qscale= 8;
3386     }
3387
3388     //FIXME var duplication
3389     s->current_picture_ptr->f->key_frame =
3390     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3391     s->current_picture_ptr->f->pict_type =
3392     s->current_picture.f->pict_type = s->pict_type;
3393
3394     if (s->current_picture.f->key_frame)
3395         s->picture_in_gop_number=0;
3396
3397     s->last_bits= put_bits_count(&s->pb);
3398     switch(s->out_format) {
3399     case FMT_MJPEG:
3400         if (CONFIG_MJPEG_ENCODER)
3401             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3402                                            s->intra_matrix);
3403         break;
3404     case FMT_H261:
3405         if (CONFIG_H261_ENCODER)
3406             ff_h261_encode_picture_header(s, picture_number);
3407         break;
3408     case FMT_H263:
3409         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3410             ff_wmv2_encode_picture_header(s, picture_number);
3411         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3412             ff_msmpeg4_encode_picture_header(s, picture_number);
3413         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3414             ff_mpeg4_encode_picture_header(s, picture_number);
3415         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3416             ff_rv10_encode_picture_header(s, picture_number);
3417         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3418             ff_rv20_encode_picture_header(s, picture_number);
3419         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3420             ff_flv_encode_picture_header(s, picture_number);
3421         else if (CONFIG_H263_ENCODER)
3422             ff_h263_encode_picture_header(s, picture_number);
3423         break;
3424     case FMT_MPEG1:
3425         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3426             ff_mpeg1_encode_picture_header(s, picture_number);
3427         break;
3428     default:
3429         assert(0);
3430     }
3431     bits= put_bits_count(&s->pb);
3432     s->header_bits= bits - s->last_bits;
3433
3434     for(i=1; i<context_count; i++){
3435         update_duplicate_context_after_me(s->thread_context[i], s);
3436     }
3437     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3438     for(i=1; i<context_count; i++){
3439         merge_context_after_encode(s, s->thread_context[i]);
3440     }
3441     emms_c();
3442     return 0;
3443 }
3444
3445 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3446     const int intra= s->mb_intra;
3447     int i;
3448
3449     s->dct_count[intra]++;
3450
3451     for(i=0; i<64; i++){
3452         int level= block[i];
3453
3454         if(level){
3455             if(level>0){
3456                 s->dct_error_sum[intra][i] += level;
3457                 level -= s->dct_offset[intra][i];
3458                 if(level<0) level=0;
3459             }else{
3460                 s->dct_error_sum[intra][i] -= level;
3461                 level += s->dct_offset[intra][i];
3462                 if(level>0) level=0;
3463             }
3464             block[i]= level;
3465         }
3466     }
3467 }
3468
3469 static int dct_quantize_trellis_c(MpegEncContext *s,
3470                                   int16_t *block, int n,
3471                                   int qscale, int *overflow){
3472     const int *qmat;
3473     const uint8_t *scantable= s->intra_scantable.scantable;
3474     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3475     int max=0;
3476     unsigned int threshold1, threshold2;
3477     int bias=0;
3478     int run_tab[65];
3479     int level_tab[65];
3480     int score_tab[65];
3481     int survivor[65];
3482     int survivor_count;
3483     int last_run=0;
3484     int last_level=0;
3485     int last_score= 0;
3486     int last_i;
3487     int coeff[2][64];
3488     int coeff_count[64];
3489     int qmul, qadd, start_i, last_non_zero, i, dc;
3490     const int esc_length= s->ac_esc_length;
3491     uint8_t * length;
3492     uint8_t * last_length;
3493     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3494
3495     s->fdsp.fdct(block);
3496
3497     if(s->dct_error_sum)
3498         s->denoise_dct(s, block);
3499     qmul= qscale*16;
3500     qadd= ((qscale-1)|1)*8;
3501
3502     if (s->mb_intra) {
3503         int q;
3504         if (!s->h263_aic) {
3505             if (n < 4)
3506                 q = s->y_dc_scale;
3507             else
3508                 q = s->c_dc_scale;
3509             q = q << 3;
3510         } else{
3511             /* For AIC we skip quant/dequant of INTRADC */
3512             q = 1 << 3;
3513             qadd=0;
3514         }
3515
3516         /* note: block[0] is assumed to be positive */
3517         block[0] = (block[0] + (q >> 1)) / q;
3518         start_i = 1;
3519         last_non_zero = 0;
3520         qmat = s->q_intra_matrix[qscale];
3521         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3522             bias= 1<<(QMAT_SHIFT-1);
3523         length     = s->intra_ac_vlc_length;
3524         last_length= s->intra_ac_vlc_last_length;
3525     } else {
3526         start_i = 0;
3527         last_non_zero = -1;
3528         qmat = s->q_inter_matrix[qscale];
3529         length     = s->inter_ac_vlc_length;
3530         last_length= s->inter_ac_vlc_last_length;
3531     }
3532     last_i= start_i;
3533
3534     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3535     threshold2= (threshold1<<1);
3536
3537     for(i=63; i>=start_i; i--) {
3538         const int j = scantable[i];
3539         int level = block[j] * qmat[j];
3540
3541         if(((unsigned)(level+threshold1))>threshold2){
3542             last_non_zero = i;
3543             break;
3544         }
3545     }
3546
3547     for(i=start_i; i<=last_non_zero; i++) {
3548         const int j = scantable[i];
3549         int level = block[j] * qmat[j];
3550
3551 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3552 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3553         if(((unsigned)(level+threshold1))>threshold2){
3554             if(level>0){
3555                 level= (bias + level)>>QMAT_SHIFT;
3556                 coeff[0][i]= level;
3557                 coeff[1][i]= level-1;
3558 //                coeff[2][k]= level-2;
3559             }else{
3560                 level= (bias - level)>>QMAT_SHIFT;
3561                 coeff[0][i]= -level;
3562                 coeff[1][i]= -level+1;
3563 //                coeff[2][k]= -level+2;
3564             }
3565             coeff_count[i]= FFMIN(level, 2);
3566             assert(coeff_count[i]);
3567             max |=level;
3568         }else{
3569             coeff[0][i]= (level>>31)|1;
3570             coeff_count[i]= 1;
3571         }
3572     }
3573
3574     *overflow= s->max_qcoeff < max; //overflow might have happened
3575
3576     if(last_non_zero < start_i){
3577         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3578         return last_non_zero;
3579     }
3580
3581     score_tab[start_i]= 0;
3582     survivor[0]= start_i;
3583     survivor_count= 1;
3584
3585     for(i=start_i; i<=last_non_zero; i++){
3586         int level_index, j, zero_distortion;
3587         int dct_coeff= FFABS(block[ scantable[i] ]);
3588         int best_score=256*256*256*120;
3589
3590         if (s->fdsp.fdct == ff_fdct_ifast)
3591             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3592         zero_distortion= dct_coeff*dct_coeff;
3593
3594         for(level_index=0; level_index < coeff_count[i]; level_index++){
3595             int distortion;
3596             int level= coeff[level_index][i];
3597             const int alevel= FFABS(level);
3598             int unquant_coeff;
3599
3600             assert(level);
3601
3602             if(s->out_format == FMT_H263){
3603                 unquant_coeff= alevel*qmul + qadd;
3604             }else{ //MPEG1
3605                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3606                 if(s->mb_intra){
3607                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3608                         unquant_coeff =   (unquant_coeff - 1) | 1;
3609                 }else{
3610                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3611                         unquant_coeff =   (unquant_coeff - 1) | 1;
3612                 }
3613                 unquant_coeff<<= 3;
3614             }
3615
3616             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3617             level+=64;
3618             if((level&(~127)) == 0){
3619                 for(j=survivor_count-1; j>=0; j--){
3620                     int run= i - survivor[j];
3621                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3622                     score += score_tab[i-run];
3623
3624                     if(score < best_score){
3625                         best_score= score;
3626                         run_tab[i+1]= run;
3627                         level_tab[i+1]= level-64;
3628                     }
3629                 }
3630
3631                 if(s->out_format == FMT_H263){
3632                     for(j=survivor_count-1; j>=0; j--){
3633                         int run= i - survivor[j];
3634                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3635                         score += score_tab[i-run];
3636                         if(score < last_score){
3637                             last_score= score;
3638                             last_run= run;
3639                             last_level= level-64;
3640                             last_i= i+1;
3641                         }
3642                     }
3643                 }
3644             }else{
3645                 distortion += esc_length*lambda;
3646                 for(j=survivor_count-1; j>=0; j--){
3647                     int run= i - survivor[j];
3648                     int score= distortion + score_tab[i-run];
3649
3650                     if(score < best_score){
3651                         best_score= score;
3652                         run_tab[i+1]= run;
3653                         level_tab[i+1]= level-64;
3654                     }
3655                 }
3656
3657                 if(s->out_format == FMT_H263){
3658                   for(j=survivor_count-1; j>=0; j--){
3659                         int run= i - survivor[j];
3660                         int score= distortion + score_tab[i-run];
3661                         if(score < last_score){
3662                             last_score= score;
3663                             last_run= run;
3664                             last_level= level-64;
3665                             last_i= i+1;
3666                         }
3667                     }
3668                 }
3669             }
3670         }
3671
3672         score_tab[i+1]= best_score;
3673
3674         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3675         if(last_non_zero <= 27){
3676             for(; survivor_count; survivor_count--){
3677                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3678                     break;
3679             }
3680         }else{
3681             for(; survivor_count; survivor_count--){
3682                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3683                     break;
3684             }
3685         }
3686
3687         survivor[ survivor_count++ ]= i+1;
3688     }
3689
3690     if(s->out_format != FMT_H263){
3691         last_score= 256*256*256*120;
3692         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3693             int score= score_tab[i];
3694             if(i) score += lambda*2; //FIXME exacter?
3695
3696             if(score < last_score){
3697                 last_score= score;
3698                 last_i= i;
3699                 last_level= level_tab[i];
3700                 last_run= run_tab[i];
3701             }
3702         }
3703     }
3704
3705     s->coded_score[n] = last_score;
3706
3707     dc= FFABS(block[0]);
3708     last_non_zero= last_i - 1;
3709     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3710
3711     if(last_non_zero < start_i)
3712         return last_non_zero;
3713
3714     if(last_non_zero == 0 && start_i == 0){
3715         int best_level= 0;
3716         int best_score= dc * dc;
3717
3718         for(i=0; i<coeff_count[0]; i++){
3719             int level= coeff[i][0];
3720             int alevel= FFABS(level);
3721             int unquant_coeff, score, distortion;
3722
3723             if(s->out_format == FMT_H263){
3724                     unquant_coeff= (alevel*qmul + qadd)>>3;
3725             }else{ //MPEG1
3726                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3727                     unquant_coeff =   (unquant_coeff - 1) | 1;
3728             }
3729             unquant_coeff = (unquant_coeff + 4) >> 3;
3730             unquant_coeff<<= 3 + 3;
3731
3732             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3733             level+=64;
3734             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3735             else                    score= distortion + esc_length*lambda;
3736
3737             if(score < best_score){
3738                 best_score= score;
3739                 best_level= level - 64;
3740             }
3741         }
3742         block[0]= best_level;
3743         s->coded_score[n] = best_score - dc*dc;
3744         if(best_level == 0) return -1;
3745         else                return last_non_zero;
3746     }
3747
3748     i= last_i;
3749     assert(last_level);
3750
3751     block[ perm_scantable[last_non_zero] ]= last_level;
3752     i -= last_run + 1;
3753
3754     for(; i>start_i; i -= run_tab[i] + 1){
3755         block[ perm_scantable[i-1] ]= level_tab[i];
3756     }
3757
3758     return last_non_zero;
3759 }
3760
3761 //#define REFINE_STATS 1
3762 static int16_t basis[64][64];
3763
3764 static void build_basis(uint8_t *perm){
3765     int i, j, x, y;
3766     emms_c();
3767     for(i=0; i<8; i++){
3768         for(j=0; j<8; j++){
3769             for(y=0; y<8; y++){
3770                 for(x=0; x<8; x++){
3771                     double s= 0.25*(1<<BASIS_SHIFT);
3772                     int index= 8*i + j;
3773                     int perm_index= perm[index];
3774                     if(i==0) s*= sqrt(0.5);
3775                     if(j==0) s*= sqrt(0.5);
3776                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3777                 }
3778             }
3779         }
3780     }
3781 }
3782
3783 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3784                         int16_t *block, int16_t *weight, int16_t *orig,
3785                         int n, int qscale){
3786     int16_t rem[64];
3787     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3788     const uint8_t *scantable= s->intra_scantable.scantable;
3789     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3790 //    unsigned int threshold1, threshold2;
3791 //    int bias=0;
3792     int run_tab[65];
3793     int prev_run=0;
3794     int prev_level=0;
3795     int qmul, qadd, start_i, last_non_zero, i, dc;
3796     uint8_t * length;
3797     uint8_t * last_length;
3798     int lambda;
3799     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3800 #ifdef REFINE_STATS
3801 static int count=0;
3802 static int after_last=0;
3803 static int to_zero=0;
3804 static int from_zero=0;
3805 static int raise=0;
3806 static int lower=0;
3807 static int messed_sign=0;
3808 #endif
3809
3810     if(basis[0][0] == 0)
3811         build_basis(s->idsp.idct_permutation);
3812
3813     qmul= qscale*2;
3814     qadd= (qscale-1)|1;
3815     if (s->mb_intra) {
3816         if (!s->h263_aic) {
3817             if (n < 4)
3818                 q = s->y_dc_scale;
3819             else
3820                 q = s->c_dc_scale;
3821         } else{
3822             /* For AIC we skip quant/dequant of INTRADC */
3823             q = 1;
3824             qadd=0;
3825         }
3826         q <<= RECON_SHIFT-3;
3827         /* note: block[0] is assumed to be positive */
3828         dc= block[0]*q;
3829 //        block[0] = (block[0] + (q >> 1)) / q;
3830         start_i = 1;
3831 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3832 //            bias= 1<<(QMAT_SHIFT-1);
3833         length     = s->intra_ac_vlc_length;
3834         last_length= s->intra_ac_vlc_last_length;
3835     } else {
3836         dc= 0;
3837         start_i = 0;
3838         length     = s->inter_ac_vlc_length;
3839         last_length= s->inter_ac_vlc_last_length;
3840     }
3841     last_non_zero = s->block_last_index[n];
3842
3843 #ifdef REFINE_STATS
3844 {START_TIMER
3845 #endif
3846     dc += (1<<(RECON_SHIFT-1));
3847     for(i=0; i<64; i++){
3848         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3849     }
3850 #ifdef REFINE_STATS
3851 STOP_TIMER("memset rem[]")}
3852 #endif
3853     sum=0;
3854     for(i=0; i<64; i++){
3855         int one= 36;
3856         int qns=4;
3857         int w;
3858
3859         w= FFABS(weight[i]) + qns*one;
3860         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3861
3862         weight[i] = w;
3863 //        w=weight[i] = (63*qns + (w/2)) / w;
3864
3865         assert(w>0);
3866         assert(w<(1<<6));
3867         sum += w*w;
3868     }
3869     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3870 #ifdef REFINE_STATS
3871 {START_TIMER
3872 #endif
3873     run=0;
3874     rle_index=0;
3875     for(i=start_i; i<=last_non_zero; i++){
3876         int j= perm_scantable[i];
3877         const int level= block[j];
3878         int coeff;
3879
3880         if(level){
3881             if(level<0) coeff= qmul*level - qadd;
3882             else        coeff= qmul*level + qadd;
3883             run_tab[rle_index++]=run;
3884             run=0;
3885
3886             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
3887         }else{
3888             run++;
3889         }
3890     }
3891 #ifdef REFINE_STATS
3892 if(last_non_zero>0){
3893 STOP_TIMER("init rem[]")
3894 }
3895 }
3896
3897 {START_TIMER
3898 #endif
3899     for(;;){
3900         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
3901         int best_coeff=0;
3902         int best_change=0;
3903         int run2, best_unquant_change=0, analyze_gradient;
3904 #ifdef REFINE_STATS
3905 {START_TIMER
3906 #endif
3907         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3908
3909         if(analyze_gradient){
3910 #ifdef REFINE_STATS
3911 {START_TIMER
3912 #endif
3913             for(i=0; i<64; i++){
3914                 int w= weight[i];
3915
3916                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3917             }
3918 #ifdef REFINE_STATS
3919 STOP_TIMER("rem*w*w")}
3920 {START_TIMER
3921 #endif
3922             s->fdsp.fdct(d1);
3923 #ifdef REFINE_STATS
3924 STOP_TIMER("dct")}
3925 #endif
3926         }
3927
3928         if(start_i){
3929             const int level= block[0];
3930             int change, old_coeff;
3931
3932             assert(s->mb_intra);
3933
3934             old_coeff= q*level;
3935
3936             for(change=-1; change<=1; change+=2){
3937                 int new_level= level + change;
3938                 int score, new_coeff;
3939
3940                 new_coeff= q*new_level;
3941                 if(new_coeff >= 2048 || new_coeff < 0)
3942                     continue;
3943
3944                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
3945                                                   new_coeff - old_coeff);
3946                 if(score<best_score){
3947                     best_score= score;
3948                     best_coeff= 0;
3949                     best_change= change;
3950                     best_unquant_change= new_coeff - old_coeff;
3951                 }
3952             }
3953         }
3954
3955         run=0;
3956         rle_index=0;
3957         run2= run_tab[rle_index++];
3958         prev_level=0;
3959         prev_run=0;
3960
3961         for(i=start_i; i<64; i++){
3962             int j= perm_scantable[i];
3963             const int level= block[j];
3964             int change, old_coeff;
3965
3966             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3967                 break;
3968
3969             if(level){
3970                 if(level<0) old_coeff= qmul*level - qadd;
3971                 else        old_coeff= qmul*level + qadd;
3972                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3973             }else{
3974                 old_coeff=0;
3975                 run2--;
3976                 assert(run2>=0 || i >= last_non_zero );
3977             }
3978
3979             for(change=-1; change<=1; change+=2){
3980                 int new_level= level + change;
3981                 int score, new_coeff, unquant_change;
3982
3983                 score=0;
3984                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3985                    continue;
3986
3987                 if(new_level){
3988                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3989                     else            new_coeff= qmul*new_level + qadd;
3990                     if(new_coeff >= 2048 || new_coeff <= -2048)
3991                         continue;
3992                     //FIXME check for overflow
3993
3994                     if(level){
3995                         if(level < 63 && level > -63){
3996                             if(i < last_non_zero)
3997                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3998                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3999                             else
4000                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4001                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4002                         }
4003                     }else{
4004                         assert(FFABS(new_level)==1);
4005
4006                         if(analyze_gradient){
4007                             int g= d1[ scantable[i] ];
4008                             if(g && (g^new_level) >= 0)
4009                                 continue;
4010                         }
4011
4012                         if(i < last_non_zero){
4013                             int next_i= i + run2 + 1;
4014                             int next_level= block[ perm_scantable[next_i] ] + 64;
4015
4016                             if(next_level&(~127))
4017                                 next_level= 0;
4018
4019                             if(next_i < last_non_zero)
4020                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4021                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4022                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4023                             else
4024                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4025                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4026                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4027                         }else{
4028                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4029                             if(prev_level){
4030                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4031                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4032                             }
4033                         }
4034                     }
4035                 }else{
4036                     new_coeff=0;
4037                     assert(FFABS(level)==1);
4038
4039                     if(i < last_non_zero){
4040                         int next_i= i + run2 + 1;
4041                         int next_level= block[ perm_scantable[next_i] ] + 64;
4042
4043                         if(next_level&(~127))
4044                             next_level= 0;
4045
4046                         if(next_i < last_non_zero)
4047                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4048                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4049                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4050                         else
4051                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4052                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4053                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4054                     }else{
4055                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4056                         if(prev_level){
4057                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4058                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4059                         }
4060                     }
4061                 }
4062
4063                 score *= lambda;
4064
4065                 unquant_change= new_coeff - old_coeff;
4066                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4067
4068                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4069                                                    unquant_change);
4070                 if(score<best_score){
4071                     best_score= score;
4072                     best_coeff= i;
4073                     best_change= change;
4074                     best_unquant_change= unquant_change;
4075                 }
4076             }
4077             if(level){
4078                 prev_level= level + 64;
4079                 if(prev_level&(~127))
4080                     prev_level= 0;
4081                 prev_run= run;
4082                 run=0;
4083             }else{
4084                 run++;
4085             }
4086         }
4087 #ifdef REFINE_STATS
4088 STOP_TIMER("iterative step")}
4089 #endif
4090
4091         if(best_change){
4092             int j= perm_scantable[ best_coeff ];
4093
4094             block[j] += best_change;
4095
4096             if(best_coeff > last_non_zero){
4097                 last_non_zero= best_coeff;
4098                 assert(block[j]);
4099 #ifdef REFINE_STATS
4100 after_last++;
4101 #endif
4102             }else{
4103 #ifdef REFINE_STATS
4104 if(block[j]){
4105     if(block[j] - best_change){
4106         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4107             raise++;
4108         }else{
4109             lower++;
4110         }
4111     }else{
4112         from_zero++;
4113     }
4114 }else{
4115     to_zero++;
4116 }
4117 #endif
4118                 for(; last_non_zero>=start_i; last_non_zero--){
4119                     if(block[perm_scantable[last_non_zero]])
4120                         break;
4121                 }
4122             }
4123 #ifdef REFINE_STATS
4124 count++;
4125 if(256*256*256*64 % count == 0){
4126     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4127 }
4128 #endif
4129             run=0;
4130             rle_index=0;
4131             for(i=start_i; i<=last_non_zero; i++){
4132                 int j= perm_scantable[i];
4133                 const int level= block[j];
4134
4135                  if(level){
4136                      run_tab[rle_index++]=run;
4137                      run=0;
4138                  }else{
4139                      run++;
4140                  }
4141             }
4142
4143             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4144         }else{
4145             break;
4146         }
4147     }
4148 #ifdef REFINE_STATS
4149 if(last_non_zero>0){
4150 STOP_TIMER("iterative search")
4151 }
4152 }
4153 #endif
4154
4155     return last_non_zero;
4156 }
4157
4158 int ff_dct_quantize_c(MpegEncContext *s,
4159                         int16_t *block, int n,
4160                         int qscale, int *overflow)
4161 {
4162     int i, j, level, last_non_zero, q, start_i;
4163     const int *qmat;
4164     const uint8_t *scantable= s->intra_scantable.scantable;
4165     int bias;
4166     int max=0;
4167     unsigned int threshold1, threshold2;
4168
4169     s->fdsp.fdct(block);
4170
4171     if(s->dct_error_sum)
4172         s->denoise_dct(s, block);
4173
4174     if (s->mb_intra) {
4175         if (!s->h263_aic) {
4176             if (n < 4)
4177                 q = s->y_dc_scale;
4178             else
4179                 q = s->c_dc_scale;
4180             q = q << 3;
4181         } else
4182             /* For AIC we skip quant/dequant of INTRADC */
4183             q = 1 << 3;
4184
4185         /* note: block[0] is assumed to be positive */
4186         block[0] = (block[0] + (q >> 1)) / q;
4187         start_i = 1;
4188         last_non_zero = 0;
4189         qmat = s->q_intra_matrix[qscale];
4190         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4191     } else {
4192         start_i = 0;
4193         last_non_zero = -1;
4194         qmat = s->q_inter_matrix[qscale];
4195         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4196     }
4197     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4198     threshold2= (threshold1<<1);
4199     for(i=63;i>=start_i;i--) {
4200         j = scantable[i];
4201         level = block[j] * qmat[j];
4202
4203         if(((unsigned)(level+threshold1))>threshold2){
4204             last_non_zero = i;
4205             break;
4206         }else{
4207             block[j]=0;
4208         }
4209     }
4210     for(i=start_i; i<=last_non_zero; i++) {
4211         j = scantable[i];
4212         level = block[j] * qmat[j];
4213
4214 //        if(   bias+level >= (1<<QMAT_SHIFT)
4215 //           || bias-level >= (1<<QMAT_SHIFT)){
4216         if(((unsigned)(level+threshold1))>threshold2){
4217             if(level>0){
4218                 level= (bias + level)>>QMAT_SHIFT;
4219                 block[j]= level;
4220             }else{
4221                 level= (bias - level)>>QMAT_SHIFT;
4222                 block[j]= -level;
4223             }
4224             max |=level;
4225         }else{
4226             block[j]=0;
4227         }
4228     }
4229     *overflow= s->max_qcoeff < max; //overflow might have happened
4230
4231     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4232     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4233         ff_block_permute(block, s->idsp.idct_permutation,
4234                          scantable, last_non_zero);
4235
4236     return last_non_zero;
4237 }
4238
4239 #define OFFSET(x) offsetof(MpegEncContext, x)
4240 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4241 static const AVOption h263_options[] = {
4242     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4243     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4244     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4245     FF_MPV_COMMON_OPTS
4246     { NULL },
4247 };
4248
4249 static const AVClass h263_class = {
4250     .class_name = "H.263 encoder",
4251     .item_name  = av_default_item_name,
4252     .option     = h263_options,
4253     .version    = LIBAVUTIL_VERSION_INT,
4254 };
4255
4256 AVCodec ff_h263_encoder = {
4257     .name           = "h263",
4258     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4259     .type           = AVMEDIA_TYPE_VIDEO,
4260     .id             = AV_CODEC_ID_H263,
4261     .priv_data_size = sizeof(MpegEncContext),
4262     .init           = ff_MPV_encode_init,
4263     .encode2        = ff_MPV_encode_picture,
4264     .close          = ff_MPV_encode_end,
4265     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4266     .priv_class     = &h263_class,
4267 };
4268
4269 static const AVOption h263p_options[] = {
4270     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4271     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4272     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4273     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4274     FF_MPV_COMMON_OPTS
4275     { NULL },
4276 };
4277 static const AVClass h263p_class = {
4278     .class_name = "H.263p encoder",
4279     .item_name  = av_default_item_name,
4280     .option     = h263p_options,
4281     .version    = LIBAVUTIL_VERSION_INT,
4282 };
4283
4284 AVCodec ff_h263p_encoder = {
4285     .name           = "h263p",
4286     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4287     .type           = AVMEDIA_TYPE_VIDEO,
4288     .id             = AV_CODEC_ID_H263P,
4289     .priv_data_size = sizeof(MpegEncContext),
4290     .init           = ff_MPV_encode_init,
4291     .encode2        = ff_MPV_encode_picture,
4292     .close          = ff_MPV_encode_end,
4293     .capabilities   = CODEC_CAP_SLICE_THREADS,
4294     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4295     .priv_class     = &h263p_class,
4296 };
4297
4298 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4299
4300 AVCodec ff_msmpeg4v2_encoder = {
4301     .name           = "msmpeg4v2",
4302     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4303     .type           = AVMEDIA_TYPE_VIDEO,
4304     .id             = AV_CODEC_ID_MSMPEG4V2,
4305     .priv_data_size = sizeof(MpegEncContext),
4306     .init           = ff_MPV_encode_init,
4307     .encode2        = ff_MPV_encode_picture,
4308     .close          = ff_MPV_encode_end,
4309     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4310     .priv_class     = &msmpeg4v2_class,
4311 };
4312
4313 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4314
4315 AVCodec ff_msmpeg4v3_encoder = {
4316     .name           = "msmpeg4",
4317     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4318     .type           = AVMEDIA_TYPE_VIDEO,
4319     .id             = AV_CODEC_ID_MSMPEG4V3,
4320     .priv_data_size = sizeof(MpegEncContext),
4321     .init           = ff_MPV_encode_init,
4322     .encode2        = ff_MPV_encode_picture,
4323     .close          = ff_MPV_encode_end,
4324     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4325     .priv_class     = &msmpeg4v3_class,
4326 };
4327
4328 FF_MPV_GENERIC_CLASS(wmv1)
4329
4330 AVCodec ff_wmv1_encoder = {
4331     .name           = "wmv1",
4332     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4333     .type           = AVMEDIA_TYPE_VIDEO,
4334     .id             = AV_CODEC_ID_WMV1,
4335     .priv_data_size = sizeof(MpegEncContext),
4336     .init           = ff_MPV_encode_init,
4337     .encode2        = ff_MPV_encode_picture,
4338     .close          = ff_MPV_encode_end,
4339     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4340     .priv_class     = &wmv1_class,
4341 };