]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
mpegvideo: Move QMAT_SHIFT* defines to the only place they are used
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mjpegenc_common.h"
46 #include "mathops.h"
47 #include "mpegutils.h"
48 #include "mjpegenc.h"
49 #include "msmpeg4.h"
50 #include "pixblockdsp.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60
61 #define QUANT_BIAS_SHIFT 8
62
63 #define QMAT_SHIFT_MMX 16
64 #define QMAT_SHIFT 22
65
66 static int encode_picture(MpegEncContext *s, int picture_number);
67 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
68 static int sse_mb(MpegEncContext *s);
69 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
70 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
71
72 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
73 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
74
75 const AVOption ff_mpv_generic_options[] = {
76     FF_MPV_COMMON_OPTS
77     { NULL },
78 };
79
80 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
81                        uint16_t (*qmat16)[2][64],
82                        const uint16_t *quant_matrix,
83                        int bias, int qmin, int qmax, int intra)
84 {
85     FDCTDSPContext *fdsp = &s->fdsp;
86     int qscale;
87     int shift = 0;
88
89     for (qscale = qmin; qscale <= qmax; qscale++) {
90         int i;
91         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
92             fdsp->fdct == ff_jpeg_fdct_islow_10 ||
93             fdsp->fdct == ff_faandct) {
94             for (i = 0; i < 64; i++) {
95                 const int j = s->idsp.idct_permutation[i];
96                 /* 16 <= qscale * quant_matrix[i] <= 7905
97                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
98                  *             19952 <=              x  <= 249205026
99                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
100                  *           3444240 >= (1 << 36) / (x) >= 275 */
101
102                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
103                                         (qscale * quant_matrix[j]));
104             }
105         } else if (fdsp->fdct == ff_fdct_ifast) {
106             for (i = 0; i < 64; i++) {
107                 const int j = s->idsp.idct_permutation[i];
108                 /* 16 <= qscale * quant_matrix[i] <= 7905
109                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
110                  *             19952 <=              x  <= 249205026
111                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
112                  *           3444240 >= (1 << 36) / (x) >= 275 */
113
114                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
115                                         (ff_aanscales[i] * qscale *
116                                          quant_matrix[j]));
117             }
118         } else {
119             for (i = 0; i < 64; i++) {
120                 const int j = s->idsp.idct_permutation[i];
121                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
122                  * Assume x = qscale * quant_matrix[i]
123                  * So             16 <=              x  <= 7905
124                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
125                  * so          32768 >= (1 << 19) / (x) >= 67 */
126                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
127                                         (qscale * quant_matrix[j]));
128                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
129                 //                    (qscale * quant_matrix[i]);
130                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
131                                        (qscale * quant_matrix[j]);
132
133                 if (qmat16[qscale][0][i] == 0 ||
134                     qmat16[qscale][0][i] == 128 * 256)
135                     qmat16[qscale][0][i] = 128 * 256 - 1;
136                 qmat16[qscale][1][i] =
137                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
138                                 qmat16[qscale][0][i]);
139             }
140         }
141
142         for (i = intra; i < 64; i++) {
143             int64_t max = 8191;
144             if (fdsp->fdct == ff_fdct_ifast) {
145                 max = (8191LL * ff_aanscales[i]) >> 14;
146             }
147             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
148                 shift++;
149             }
150         }
151     }
152     if (shift) {
153         av_log(NULL, AV_LOG_INFO,
154                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
155                QMAT_SHIFT - shift);
156     }
157 }
158
159 static inline void update_qscale(MpegEncContext *s)
160 {
161     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
162                 (FF_LAMBDA_SHIFT + 7);
163     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
164
165     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
166                  FF_LAMBDA_SHIFT;
167 }
168
169 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
170 {
171     int i;
172
173     if (matrix) {
174         put_bits(pb, 1, 1);
175         for (i = 0; i < 64; i++) {
176             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
177         }
178     } else
179         put_bits(pb, 1, 0);
180 }
181
182 /**
183  * init s->current_picture.qscale_table from s->lambda_table
184  */
185 void ff_init_qscale_tab(MpegEncContext *s)
186 {
187     int8_t * const qscale_table = s->current_picture.qscale_table;
188     int i;
189
190     for (i = 0; i < s->mb_num; i++) {
191         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
192         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
193         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
194                                                   s->avctx->qmax);
195     }
196 }
197
198 static void update_duplicate_context_after_me(MpegEncContext *dst,
199                                               MpegEncContext *src)
200 {
201 #define COPY(a) dst->a= src->a
202     COPY(pict_type);
203     COPY(current_picture);
204     COPY(f_code);
205     COPY(b_code);
206     COPY(qscale);
207     COPY(lambda);
208     COPY(lambda2);
209     COPY(picture_in_gop_number);
210     COPY(gop_picture_number);
211     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
212     COPY(progressive_frame);    // FIXME don't set in encode_header
213     COPY(partitioned_frame);    // FIXME don't set in encode_header
214 #undef COPY
215 }
216
217 /**
218  * Set the given MpegEncContext to defaults for encoding.
219  * the changed fields will not depend upon the prior state of the MpegEncContext.
220  */
221 static void MPV_encode_defaults(MpegEncContext *s)
222 {
223     int i;
224     ff_MPV_common_defaults(s);
225
226     for (i = -16; i < 16; i++) {
227         default_fcode_tab[i + MAX_MV] = 1;
228     }
229     s->me.mv_penalty = default_mv_penalty;
230     s->fcode_tab     = default_fcode_tab;
231
232     s->input_picture_number  = 0;
233     s->picture_in_gop_number = 0;
234 }
235
236 /* init video encoder */
237 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
238 {
239     MpegEncContext *s = avctx->priv_data;
240     int i, ret, format_supported;
241
242     MPV_encode_defaults(s);
243
244     switch (avctx->codec_id) {
245     case AV_CODEC_ID_MPEG2VIDEO:
246         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
247             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
248             av_log(avctx, AV_LOG_ERROR,
249                    "only YUV420 and YUV422 are supported\n");
250             return -1;
251         }
252         break;
253     case AV_CODEC_ID_MJPEG:
254         format_supported = 0;
255         /* JPEG color space */
256         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
257             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
258             (avctx->color_range == AVCOL_RANGE_JPEG &&
259              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
260               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
261             format_supported = 1;
262         /* MPEG color space */
263         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
264                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
265                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
266             format_supported = 1;
267
268         if (!format_supported) {
269             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
270             return -1;
271         }
272         break;
273     default:
274         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
275             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
276             return -1;
277         }
278     }
279
280     switch (avctx->pix_fmt) {
281     case AV_PIX_FMT_YUVJ422P:
282     case AV_PIX_FMT_YUV422P:
283         s->chroma_format = CHROMA_422;
284         break;
285     case AV_PIX_FMT_YUVJ420P:
286     case AV_PIX_FMT_YUV420P:
287     default:
288         s->chroma_format = CHROMA_420;
289         break;
290     }
291
292     s->bit_rate = avctx->bit_rate;
293     s->width    = avctx->width;
294     s->height   = avctx->height;
295     if (avctx->gop_size > 600 &&
296         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
297         av_log(avctx, AV_LOG_ERROR,
298                "Warning keyframe interval too large! reducing it ...\n");
299         avctx->gop_size = 600;
300     }
301     s->gop_size     = avctx->gop_size;
302     s->avctx        = avctx;
303     s->flags        = avctx->flags;
304     s->flags2       = avctx->flags2;
305     if (avctx->max_b_frames > MAX_B_FRAMES) {
306         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
307                "is %d.\n", MAX_B_FRAMES);
308     }
309     s->max_b_frames = avctx->max_b_frames;
310     s->codec_id     = avctx->codec->id;
311     s->strict_std_compliance = avctx->strict_std_compliance;
312     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
313     s->mpeg_quant         = avctx->mpeg_quant;
314     s->rtp_mode           = !!avctx->rtp_payload_size;
315     s->intra_dc_precision = avctx->intra_dc_precision;
316     s->user_specified_pts = AV_NOPTS_VALUE;
317
318     if (s->gop_size <= 1) {
319         s->intra_only = 1;
320         s->gop_size   = 12;
321     } else {
322         s->intra_only = 0;
323     }
324
325     s->me_method = avctx->me_method;
326
327     /* Fixed QSCALE */
328     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
329
330     s->adaptive_quant = (s->avctx->lumi_masking ||
331                          s->avctx->dark_masking ||
332                          s->avctx->temporal_cplx_masking ||
333                          s->avctx->spatial_cplx_masking  ||
334                          s->avctx->p_masking      ||
335                          s->avctx->border_masking ||
336                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
337                         !s->fixed_qscale;
338
339     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
340
341     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
342         av_log(avctx, AV_LOG_ERROR,
343                "a vbv buffer size is needed, "
344                "for encoding with a maximum bitrate\n");
345         return -1;
346     }
347
348     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
349         av_log(avctx, AV_LOG_INFO,
350                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
351     }
352
353     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
354         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
355         return -1;
356     }
357
358     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
359         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
360         return -1;
361     }
362
363     if (avctx->rc_max_rate &&
364         avctx->rc_max_rate == avctx->bit_rate &&
365         avctx->rc_max_rate != avctx->rc_min_rate) {
366         av_log(avctx, AV_LOG_INFO,
367                "impossible bitrate constraints, this will fail\n");
368     }
369
370     if (avctx->rc_buffer_size &&
371         avctx->bit_rate * (int64_t)avctx->time_base.num >
372             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
373         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
374         return -1;
375     }
376
377     if (!s->fixed_qscale &&
378         avctx->bit_rate * av_q2d(avctx->time_base) >
379             avctx->bit_rate_tolerance) {
380         av_log(avctx, AV_LOG_ERROR,
381                "bitrate tolerance too small for bitrate\n");
382         return -1;
383     }
384
385     if (s->avctx->rc_max_rate &&
386         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
387         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
388          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
389         90000LL * (avctx->rc_buffer_size - 1) >
390             s->avctx->rc_max_rate * 0xFFFFLL) {
391         av_log(avctx, AV_LOG_INFO,
392                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
393                "specified vbv buffer is too large for the given bitrate!\n");
394     }
395
396     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
397         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
398         s->codec_id != AV_CODEC_ID_FLV1) {
399         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
400         return -1;
401     }
402
403     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
404         av_log(avctx, AV_LOG_ERROR,
405                "OBMC is only supported with simple mb decision\n");
406         return -1;
407     }
408
409     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
410         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
411         return -1;
412     }
413
414     if (s->max_b_frames                    &&
415         s->codec_id != AV_CODEC_ID_MPEG4      &&
416         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
417         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
418         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
419         return -1;
420     }
421
422     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
423          s->codec_id == AV_CODEC_ID_H263  ||
424          s->codec_id == AV_CODEC_ID_H263P) &&
425         (avctx->sample_aspect_ratio.num > 255 ||
426          avctx->sample_aspect_ratio.den > 255)) {
427         av_log(avctx, AV_LOG_ERROR,
428                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
429                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
430         return -1;
431     }
432
433     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
434         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
435         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
436         return -1;
437     }
438
439     // FIXME mpeg2 uses that too
440     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
441         av_log(avctx, AV_LOG_ERROR,
442                "mpeg2 style quantization not supported by codec\n");
443         return -1;
444     }
445
446     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
447         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
448         return -1;
449     }
450
451     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
452         s->avctx->mb_decision != FF_MB_DECISION_RD) {
453         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
454         return -1;
455     }
456
457     if (s->avctx->scenechange_threshold < 1000000000 &&
458         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
459         av_log(avctx, AV_LOG_ERROR,
460                "closed gop with scene change detection are not supported yet, "
461                "set threshold to 1000000000\n");
462         return -1;
463     }
464
465     if (s->flags & CODEC_FLAG_LOW_DELAY) {
466         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
467             av_log(avctx, AV_LOG_ERROR,
468                   "low delay forcing is only available for mpeg2\n");
469             return -1;
470         }
471         if (s->max_b_frames != 0) {
472             av_log(avctx, AV_LOG_ERROR,
473                    "b frames cannot be used with low delay\n");
474             return -1;
475         }
476     }
477
478     if (s->q_scale_type == 1) {
479         if (avctx->qmax > 12) {
480             av_log(avctx, AV_LOG_ERROR,
481                    "non linear quant only supports qmax <= 12 currently\n");
482             return -1;
483         }
484     }
485
486     if (s->avctx->thread_count > 1         &&
487         s->codec_id != AV_CODEC_ID_MPEG4      &&
488         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
489         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
490         (s->codec_id != AV_CODEC_ID_H263P)) {
491         av_log(avctx, AV_LOG_ERROR,
492                "multi threaded encoding not supported by codec\n");
493         return -1;
494     }
495
496     if (s->avctx->thread_count < 1) {
497         av_log(avctx, AV_LOG_ERROR,
498                "automatic thread number detection not supported by codec,"
499                "patch welcome\n");
500         return -1;
501     }
502
503     if (s->avctx->thread_count > 1)
504         s->rtp_mode = 1;
505
506     if (!avctx->time_base.den || !avctx->time_base.num) {
507         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
508         return -1;
509     }
510
511     i = (INT_MAX / 2 + 128) >> 8;
512     if (avctx->mb_threshold >= i) {
513         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
514                i - 1);
515         return -1;
516     }
517
518     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
519         av_log(avctx, AV_LOG_INFO,
520                "notice: b_frame_strategy only affects the first pass\n");
521         avctx->b_frame_strategy = 0;
522     }
523
524     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
525     if (i > 1) {
526         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
527         avctx->time_base.den /= i;
528         avctx->time_base.num /= i;
529         //return -1;
530     }
531
532     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
533         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
534         // (a + x * 3 / 8) / x
535         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
536         s->inter_quant_bias = 0;
537     } else {
538         s->intra_quant_bias = 0;
539         // (a - x / 4) / x
540         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
541     }
542
543     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
544         s->intra_quant_bias = avctx->intra_quant_bias;
545     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
546         s->inter_quant_bias = avctx->inter_quant_bias;
547
548     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
549         s->avctx->time_base.den > (1 << 16) - 1) {
550         av_log(avctx, AV_LOG_ERROR,
551                "timebase %d/%d not supported by MPEG 4 standard, "
552                "the maximum admitted value for the timebase denominator "
553                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
554                (1 << 16) - 1);
555         return -1;
556     }
557     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
558
559     switch (avctx->codec->id) {
560     case AV_CODEC_ID_MPEG1VIDEO:
561         s->out_format = FMT_MPEG1;
562         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
563         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
564         break;
565     case AV_CODEC_ID_MPEG2VIDEO:
566         s->out_format = FMT_MPEG1;
567         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
568         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
569         s->rtp_mode   = 1;
570         break;
571     case AV_CODEC_ID_MJPEG:
572         s->out_format = FMT_MJPEG;
573         s->intra_only = 1; /* force intra only for jpeg */
574         if (!CONFIG_MJPEG_ENCODER ||
575             ff_mjpeg_encode_init(s) < 0)
576             return -1;
577         avctx->delay = 0;
578         s->low_delay = 1;
579         break;
580     case AV_CODEC_ID_H261:
581         if (!CONFIG_H261_ENCODER)
582             return -1;
583         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
584             av_log(avctx, AV_LOG_ERROR,
585                    "The specified picture size of %dx%d is not valid for the "
586                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
587                     s->width, s->height);
588             return -1;
589         }
590         s->out_format = FMT_H261;
591         avctx->delay  = 0;
592         s->low_delay  = 1;
593         break;
594     case AV_CODEC_ID_H263:
595         if (!CONFIG_H263_ENCODER)
596         return -1;
597         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
598                              s->width, s->height) == 8) {
599             av_log(avctx, AV_LOG_INFO,
600                    "The specified picture size of %dx%d is not valid for "
601                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
602                    "352x288, 704x576, and 1408x1152."
603                    "Try H.263+.\n", s->width, s->height);
604             return -1;
605         }
606         s->out_format = FMT_H263;
607         avctx->delay  = 0;
608         s->low_delay  = 1;
609         break;
610     case AV_CODEC_ID_H263P:
611         s->out_format = FMT_H263;
612         s->h263_plus  = 1;
613         /* Fx */
614         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
615         s->modified_quant  = s->h263_aic;
616         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
617         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
618
619         /* /Fx */
620         /* These are just to be sure */
621         avctx->delay = 0;
622         s->low_delay = 1;
623         break;
624     case AV_CODEC_ID_FLV1:
625         s->out_format      = FMT_H263;
626         s->h263_flv        = 2; /* format = 1; 11-bit codes */
627         s->unrestricted_mv = 1;
628         s->rtp_mode  = 0; /* don't allow GOB */
629         avctx->delay = 0;
630         s->low_delay = 1;
631         break;
632     case AV_CODEC_ID_RV10:
633         s->out_format = FMT_H263;
634         avctx->delay  = 0;
635         s->low_delay  = 1;
636         break;
637     case AV_CODEC_ID_RV20:
638         s->out_format      = FMT_H263;
639         avctx->delay       = 0;
640         s->low_delay       = 1;
641         s->modified_quant  = 1;
642         s->h263_aic        = 1;
643         s->h263_plus       = 1;
644         s->loop_filter     = 1;
645         s->unrestricted_mv = 0;
646         break;
647     case AV_CODEC_ID_MPEG4:
648         s->out_format      = FMT_H263;
649         s->h263_pred       = 1;
650         s->unrestricted_mv = 1;
651         s->low_delay       = s->max_b_frames ? 0 : 1;
652         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
653         break;
654     case AV_CODEC_ID_MSMPEG4V2:
655         s->out_format      = FMT_H263;
656         s->h263_pred       = 1;
657         s->unrestricted_mv = 1;
658         s->msmpeg4_version = 2;
659         avctx->delay       = 0;
660         s->low_delay       = 1;
661         break;
662     case AV_CODEC_ID_MSMPEG4V3:
663         s->out_format        = FMT_H263;
664         s->h263_pred         = 1;
665         s->unrestricted_mv   = 1;
666         s->msmpeg4_version   = 3;
667         s->flipflop_rounding = 1;
668         avctx->delay         = 0;
669         s->low_delay         = 1;
670         break;
671     case AV_CODEC_ID_WMV1:
672         s->out_format        = FMT_H263;
673         s->h263_pred         = 1;
674         s->unrestricted_mv   = 1;
675         s->msmpeg4_version   = 4;
676         s->flipflop_rounding = 1;
677         avctx->delay         = 0;
678         s->low_delay         = 1;
679         break;
680     case AV_CODEC_ID_WMV2:
681         s->out_format        = FMT_H263;
682         s->h263_pred         = 1;
683         s->unrestricted_mv   = 1;
684         s->msmpeg4_version   = 5;
685         s->flipflop_rounding = 1;
686         avctx->delay         = 0;
687         s->low_delay         = 1;
688         break;
689     default:
690         return -1;
691     }
692
693     avctx->has_b_frames = !s->low_delay;
694
695     s->encoding = 1;
696
697     s->progressive_frame    =
698     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
699                                                 CODEC_FLAG_INTERLACED_ME) ||
700                                 s->alternate_scan);
701
702     /* init */
703     if (ff_MPV_common_init(s) < 0)
704         return -1;
705
706     if (ARCH_X86)
707         ff_MPV_encode_init_x86(s);
708
709     ff_fdctdsp_init(&s->fdsp, avctx);
710     ff_me_cmp_init(&s->mecc, avctx);
711     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
712     ff_pixblockdsp_init(&s->pdsp, avctx);
713     ff_qpeldsp_init(&s->qdsp);
714
715     s->avctx->coded_frame = s->current_picture.f;
716
717     if (s->msmpeg4_version) {
718         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
719                           2 * 2 * (MAX_LEVEL + 1) *
720                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
721     }
722     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
723
724     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
725     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
726     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
727     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
728     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
729                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
730     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
731                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
732
733     if (s->avctx->noise_reduction) {
734         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
735                           2 * 64 * sizeof(uint16_t), fail);
736     }
737
738     if (CONFIG_H263_ENCODER)
739         ff_h263dsp_init(&s->h263dsp);
740     if (!s->dct_quantize)
741         s->dct_quantize = ff_dct_quantize_c;
742     if (!s->denoise_dct)
743         s->denoise_dct  = denoise_dct_c;
744     s->fast_dct_quantize = s->dct_quantize;
745     if (avctx->trellis)
746         s->dct_quantize  = dct_quantize_trellis_c;
747
748     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
749         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
750
751     s->quant_precision = 5;
752
753     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
754     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
755
756     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
757         ff_h261_encode_init(s);
758     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
759         ff_h263_encode_init(s);
760     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
761         ff_msmpeg4_encode_init(s);
762     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
763         && s->out_format == FMT_MPEG1)
764         ff_mpeg1_encode_init(s);
765
766     /* init q matrix */
767     for (i = 0; i < 64; i++) {
768         int j = s->idsp.idct_permutation[i];
769         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
770             s->mpeg_quant) {
771             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
772             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
773         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
774             s->intra_matrix[j] =
775             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
776         } else {
777             /* mpeg1/2 */
778             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
779             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
780         }
781         if (s->avctx->intra_matrix)
782             s->intra_matrix[j] = s->avctx->intra_matrix[i];
783         if (s->avctx->inter_matrix)
784             s->inter_matrix[j] = s->avctx->inter_matrix[i];
785     }
786
787     /* precompute matrix */
788     /* for mjpeg, we do include qscale in the matrix */
789     if (s->out_format != FMT_MJPEG) {
790         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
791                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
792                           31, 1);
793         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
794                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
795                           31, 0);
796     }
797
798     if (ff_rate_control_init(s) < 0)
799         return -1;
800
801 #if FF_API_ERROR_RATE
802     FF_DISABLE_DEPRECATION_WARNINGS
803     if (avctx->error_rate)
804         s->error_rate = avctx->error_rate;
805     FF_ENABLE_DEPRECATION_WARNINGS;
806 #endif
807
808 #if FF_API_NORMALIZE_AQP
809     FF_DISABLE_DEPRECATION_WARNINGS
810     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
811         s->mpv_flags |= FF_MPV_FLAG_NAQ;
812     FF_ENABLE_DEPRECATION_WARNINGS;
813 #endif
814
815 #if FF_API_MV0
816     FF_DISABLE_DEPRECATION_WARNINGS
817     if (avctx->flags & CODEC_FLAG_MV0)
818         s->mpv_flags |= FF_MPV_FLAG_MV0;
819     FF_ENABLE_DEPRECATION_WARNINGS
820 #endif
821
822     if (avctx->b_frame_strategy == 2) {
823         for (i = 0; i < s->max_b_frames + 2; i++) {
824             s->tmp_frames[i] = av_frame_alloc();
825             if (!s->tmp_frames[i])
826                 return AVERROR(ENOMEM);
827
828             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
829             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
830             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
831
832             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
833             if (ret < 0)
834                 return ret;
835         }
836     }
837
838     return 0;
839 fail:
840     ff_MPV_encode_end(avctx);
841     return AVERROR_UNKNOWN;
842 }
843
844 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
845 {
846     MpegEncContext *s = avctx->priv_data;
847     int i;
848
849     ff_rate_control_uninit(s);
850
851     ff_MPV_common_end(s);
852     if (CONFIG_MJPEG_ENCODER &&
853         s->out_format == FMT_MJPEG)
854         ff_mjpeg_encode_close(s);
855
856     av_freep(&avctx->extradata);
857
858     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
859         av_frame_free(&s->tmp_frames[i]);
860
861     ff_free_picture_tables(&s->new_picture);
862     ff_mpeg_unref_picture(s, &s->new_picture);
863
864     av_freep(&s->avctx->stats_out);
865     av_freep(&s->ac_stats);
866
867     av_freep(&s->q_intra_matrix);
868     av_freep(&s->q_inter_matrix);
869     av_freep(&s->q_intra_matrix16);
870     av_freep(&s->q_inter_matrix16);
871     av_freep(&s->input_picture);
872     av_freep(&s->reordered_input_picture);
873     av_freep(&s->dct_offset);
874
875     return 0;
876 }
877
878 static int get_sae(uint8_t *src, int ref, int stride)
879 {
880     int x,y;
881     int acc = 0;
882
883     for (y = 0; y < 16; y++) {
884         for (x = 0; x < 16; x++) {
885             acc += FFABS(src[x + y * stride] - ref);
886         }
887     }
888
889     return acc;
890 }
891
892 static int get_intra_count(MpegEncContext *s, uint8_t *src,
893                            uint8_t *ref, int stride)
894 {
895     int x, y, w, h;
896     int acc = 0;
897
898     w = s->width  & ~15;
899     h = s->height & ~15;
900
901     for (y = 0; y < h; y += 16) {
902         for (x = 0; x < w; x += 16) {
903             int offset = x + y * stride;
904             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
905                                       stride, 16);
906             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
907             int sae  = get_sae(src + offset, mean, stride);
908
909             acc += sae + 500 < sad;
910         }
911     }
912     return acc;
913 }
914
915
916 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
917 {
918     Picture *pic = NULL;
919     int64_t pts;
920     int i, display_picture_number = 0, ret;
921     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
922                                                  (s->low_delay ? 0 : 1);
923     int direct = 1;
924
925     if (pic_arg) {
926         pts = pic_arg->pts;
927         display_picture_number = s->input_picture_number++;
928
929         if (pts != AV_NOPTS_VALUE) {
930             if (s->user_specified_pts != AV_NOPTS_VALUE) {
931                 int64_t time = pts;
932                 int64_t last = s->user_specified_pts;
933
934                 if (time <= last) {
935                     av_log(s->avctx, AV_LOG_ERROR,
936                            "Error, Invalid timestamp=%"PRId64", "
937                            "last=%"PRId64"\n", pts, s->user_specified_pts);
938                     return -1;
939                 }
940
941                 if (!s->low_delay && display_picture_number == 1)
942                     s->dts_delta = time - last;
943             }
944             s->user_specified_pts = pts;
945         } else {
946             if (s->user_specified_pts != AV_NOPTS_VALUE) {
947                 s->user_specified_pts =
948                 pts = s->user_specified_pts + 1;
949                 av_log(s->avctx, AV_LOG_INFO,
950                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
951                        pts);
952             } else {
953                 pts = display_picture_number;
954             }
955         }
956     }
957
958     if (pic_arg) {
959         if (!pic_arg->buf[0]);
960             direct = 0;
961         if (pic_arg->linesize[0] != s->linesize)
962             direct = 0;
963         if (pic_arg->linesize[1] != s->uvlinesize)
964             direct = 0;
965         if (pic_arg->linesize[2] != s->uvlinesize)
966             direct = 0;
967
968         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
969                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
970
971         if (direct) {
972             i = ff_find_unused_picture(s, 1);
973             if (i < 0)
974                 return i;
975
976             pic = &s->picture[i];
977             pic->reference = 3;
978
979             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
980                 return ret;
981             if (ff_alloc_picture(s, pic, 1) < 0) {
982                 return -1;
983             }
984         } else {
985             i = ff_find_unused_picture(s, 0);
986             if (i < 0)
987                 return i;
988
989             pic = &s->picture[i];
990             pic->reference = 3;
991
992             if (ff_alloc_picture(s, pic, 0) < 0) {
993                 return -1;
994             }
995
996             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
997                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
998                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
999                 // empty
1000             } else {
1001                 int h_chroma_shift, v_chroma_shift;
1002                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1003                                                  &h_chroma_shift,
1004                                                  &v_chroma_shift);
1005
1006                 for (i = 0; i < 3; i++) {
1007                     int src_stride = pic_arg->linesize[i];
1008                     int dst_stride = i ? s->uvlinesize : s->linesize;
1009                     int h_shift = i ? h_chroma_shift : 0;
1010                     int v_shift = i ? v_chroma_shift : 0;
1011                     int w = s->width  >> h_shift;
1012                     int h = s->height >> v_shift;
1013                     uint8_t *src = pic_arg->data[i];
1014                     uint8_t *dst = pic->f->data[i];
1015
1016                     if (!s->avctx->rc_buffer_size)
1017                         dst += INPLACE_OFFSET;
1018
1019                     if (src_stride == dst_stride)
1020                         memcpy(dst, src, src_stride * h);
1021                     else {
1022                         while (h--) {
1023                             memcpy(dst, src, w);
1024                             dst += dst_stride;
1025                             src += src_stride;
1026                         }
1027                     }
1028                 }
1029             }
1030         }
1031         ret = av_frame_copy_props(pic->f, pic_arg);
1032         if (ret < 0)
1033             return ret;
1034
1035         pic->f->display_picture_number = display_picture_number;
1036         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1037     }
1038
1039     /* shift buffer entries */
1040     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1041         s->input_picture[i - 1] = s->input_picture[i];
1042
1043     s->input_picture[encoding_delay] = (Picture*) pic;
1044
1045     return 0;
1046 }
1047
1048 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1049 {
1050     int x, y, plane;
1051     int score = 0;
1052     int64_t score64 = 0;
1053
1054     for (plane = 0; plane < 3; plane++) {
1055         const int stride = p->f->linesize[plane];
1056         const int bw = plane ? 1 : 2;
1057         for (y = 0; y < s->mb_height * bw; y++) {
1058             for (x = 0; x < s->mb_width * bw; x++) {
1059                 int off = p->shared ? 0 : 16;
1060                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1061                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1062                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1063
1064                 switch (s->avctx->frame_skip_exp) {
1065                 case 0: score    =  FFMAX(score, v);          break;
1066                 case 1: score   += FFABS(v);                  break;
1067                 case 2: score   += v * v;                     break;
1068                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1069                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1070                 }
1071             }
1072         }
1073     }
1074
1075     if (score)
1076         score64 = score;
1077
1078     if (score64 < s->avctx->frame_skip_threshold)
1079         return 1;
1080     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1081         return 1;
1082     return 0;
1083 }
1084
1085 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1086 {
1087     AVPacket pkt = { 0 };
1088     int ret, got_output;
1089
1090     av_init_packet(&pkt);
1091     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1092     if (ret < 0)
1093         return ret;
1094
1095     ret = pkt.size;
1096     av_free_packet(&pkt);
1097     return ret;
1098 }
1099
1100 static int estimate_best_b_count(MpegEncContext *s)
1101 {
1102     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1103     AVCodecContext *c = avcodec_alloc_context3(NULL);
1104     const int scale = s->avctx->brd_scale;
1105     int i, j, out_size, p_lambda, b_lambda, lambda2;
1106     int64_t best_rd  = INT64_MAX;
1107     int best_b_count = -1;
1108
1109     assert(scale >= 0 && scale <= 3);
1110
1111     //emms_c();
1112     //s->next_picture_ptr->quality;
1113     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1114     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1115     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1116     if (!b_lambda) // FIXME we should do this somewhere else
1117         b_lambda = p_lambda;
1118     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1119                FF_LAMBDA_SHIFT;
1120
1121     c->width        = s->width  >> scale;
1122     c->height       = s->height >> scale;
1123     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1124     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1125     c->mb_decision  = s->avctx->mb_decision;
1126     c->me_cmp       = s->avctx->me_cmp;
1127     c->mb_cmp       = s->avctx->mb_cmp;
1128     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1129     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1130     c->time_base    = s->avctx->time_base;
1131     c->max_b_frames = s->max_b_frames;
1132
1133     if (avcodec_open2(c, codec, NULL) < 0)
1134         return -1;
1135
1136     for (i = 0; i < s->max_b_frames + 2; i++) {
1137         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1138                                                 s->next_picture_ptr;
1139
1140         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1141             pre_input = *pre_input_ptr;
1142
1143             if (!pre_input.shared && i) {
1144                 pre_input.f->data[0] += INPLACE_OFFSET;
1145                 pre_input.f->data[1] += INPLACE_OFFSET;
1146                 pre_input.f->data[2] += INPLACE_OFFSET;
1147             }
1148
1149             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1150                                        s->tmp_frames[i]->linesize[0],
1151                                        pre_input.f->data[0],
1152                                        pre_input.f->linesize[0],
1153                                        c->width, c->height);
1154             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1155                                        s->tmp_frames[i]->linesize[1],
1156                                        pre_input.f->data[1],
1157                                        pre_input.f->linesize[1],
1158                                        c->width >> 1, c->height >> 1);
1159             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1160                                        s->tmp_frames[i]->linesize[2],
1161                                        pre_input.f->data[2],
1162                                        pre_input.f->linesize[2],
1163                                        c->width >> 1, c->height >> 1);
1164         }
1165     }
1166
1167     for (j = 0; j < s->max_b_frames + 1; j++) {
1168         int64_t rd = 0;
1169
1170         if (!s->input_picture[j])
1171             break;
1172
1173         c->error[0] = c->error[1] = c->error[2] = 0;
1174
1175         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1176         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1177
1178         out_size = encode_frame(c, s->tmp_frames[0]);
1179
1180         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1181
1182         for (i = 0; i < s->max_b_frames + 1; i++) {
1183             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1184
1185             s->tmp_frames[i + 1]->pict_type = is_p ?
1186                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1187             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1188
1189             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1190
1191             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1192         }
1193
1194         /* get the delayed frames */
1195         while (out_size) {
1196             out_size = encode_frame(c, NULL);
1197             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1198         }
1199
1200         rd += c->error[0] + c->error[1] + c->error[2];
1201
1202         if (rd < best_rd) {
1203             best_rd = rd;
1204             best_b_count = j;
1205         }
1206     }
1207
1208     avcodec_close(c);
1209     av_freep(&c);
1210
1211     return best_b_count;
1212 }
1213
1214 static int select_input_picture(MpegEncContext *s)
1215 {
1216     int i, ret;
1217
1218     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1219         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1220     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1221
1222     /* set next picture type & ordering */
1223     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1224         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1225             s->next_picture_ptr == NULL || s->intra_only) {
1226             s->reordered_input_picture[0] = s->input_picture[0];
1227             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1228             s->reordered_input_picture[0]->f->coded_picture_number =
1229                 s->coded_picture_number++;
1230         } else {
1231             int b_frames;
1232
1233             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1234                 if (s->picture_in_gop_number < s->gop_size &&
1235                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1236                     // FIXME check that te gop check above is +-1 correct
1237                     av_frame_unref(s->input_picture[0]->f);
1238
1239                     emms_c();
1240                     ff_vbv_update(s, 0);
1241
1242                     goto no_output_pic;
1243                 }
1244             }
1245
1246             if (s->flags & CODEC_FLAG_PASS2) {
1247                 for (i = 0; i < s->max_b_frames + 1; i++) {
1248                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1249
1250                     if (pict_num >= s->rc_context.num_entries)
1251                         break;
1252                     if (!s->input_picture[i]) {
1253                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1254                         break;
1255                     }
1256
1257                     s->input_picture[i]->f->pict_type =
1258                         s->rc_context.entry[pict_num].new_pict_type;
1259                 }
1260             }
1261
1262             if (s->avctx->b_frame_strategy == 0) {
1263                 b_frames = s->max_b_frames;
1264                 while (b_frames && !s->input_picture[b_frames])
1265                     b_frames--;
1266             } else if (s->avctx->b_frame_strategy == 1) {
1267                 for (i = 1; i < s->max_b_frames + 1; i++) {
1268                     if (s->input_picture[i] &&
1269                         s->input_picture[i]->b_frame_score == 0) {
1270                         s->input_picture[i]->b_frame_score =
1271                             get_intra_count(s,
1272                                             s->input_picture[i    ]->f->data[0],
1273                                             s->input_picture[i - 1]->f->data[0],
1274                                             s->linesize) + 1;
1275                     }
1276                 }
1277                 for (i = 0; i < s->max_b_frames + 1; i++) {
1278                     if (s->input_picture[i] == NULL ||
1279                         s->input_picture[i]->b_frame_score - 1 >
1280                             s->mb_num / s->avctx->b_sensitivity)
1281                         break;
1282                 }
1283
1284                 b_frames = FFMAX(0, i - 1);
1285
1286                 /* reset scores */
1287                 for (i = 0; i < b_frames + 1; i++) {
1288                     s->input_picture[i]->b_frame_score = 0;
1289                 }
1290             } else if (s->avctx->b_frame_strategy == 2) {
1291                 b_frames = estimate_best_b_count(s);
1292             } else {
1293                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1294                 b_frames = 0;
1295             }
1296
1297             emms_c();
1298
1299             for (i = b_frames - 1; i >= 0; i--) {
1300                 int type = s->input_picture[i]->f->pict_type;
1301                 if (type && type != AV_PICTURE_TYPE_B)
1302                     b_frames = i;
1303             }
1304             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1305                 b_frames == s->max_b_frames) {
1306                 av_log(s->avctx, AV_LOG_ERROR,
1307                        "warning, too many b frames in a row\n");
1308             }
1309
1310             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1311                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1312                     s->gop_size > s->picture_in_gop_number) {
1313                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1314                 } else {
1315                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1316                         b_frames = 0;
1317                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1318                 }
1319             }
1320
1321             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1322                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1323                 b_frames--;
1324
1325             s->reordered_input_picture[0] = s->input_picture[b_frames];
1326             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1327                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1328             s->reordered_input_picture[0]->f->coded_picture_number =
1329                 s->coded_picture_number++;
1330             for (i = 0; i < b_frames; i++) {
1331                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1332                 s->reordered_input_picture[i + 1]->f->pict_type =
1333                     AV_PICTURE_TYPE_B;
1334                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1335                     s->coded_picture_number++;
1336             }
1337         }
1338     }
1339 no_output_pic:
1340     if (s->reordered_input_picture[0]) {
1341         s->reordered_input_picture[0]->reference =
1342            s->reordered_input_picture[0]->f->pict_type !=
1343                AV_PICTURE_TYPE_B ? 3 : 0;
1344
1345         ff_mpeg_unref_picture(s, &s->new_picture);
1346         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1347             return ret;
1348
1349         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1350             // input is a shared pix, so we can't modifiy it -> alloc a new
1351             // one & ensure that the shared one is reuseable
1352
1353             Picture *pic;
1354             int i = ff_find_unused_picture(s, 0);
1355             if (i < 0)
1356                 return i;
1357             pic = &s->picture[i];
1358
1359             pic->reference = s->reordered_input_picture[0]->reference;
1360             if (ff_alloc_picture(s, pic, 0) < 0) {
1361                 return -1;
1362             }
1363
1364             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1365             if (ret < 0)
1366                 return ret;
1367
1368             /* mark us unused / free shared pic */
1369             av_frame_unref(s->reordered_input_picture[0]->f);
1370             s->reordered_input_picture[0]->shared = 0;
1371
1372             s->current_picture_ptr = pic;
1373         } else {
1374             // input is not a shared pix -> reuse buffer for current_pix
1375             s->current_picture_ptr = s->reordered_input_picture[0];
1376             for (i = 0; i < 4; i++) {
1377                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1378             }
1379         }
1380         ff_mpeg_unref_picture(s, &s->current_picture);
1381         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1382                                        s->current_picture_ptr)) < 0)
1383             return ret;
1384
1385         s->picture_number = s->new_picture.f->display_picture_number;
1386     } else {
1387         ff_mpeg_unref_picture(s, &s->new_picture);
1388     }
1389     return 0;
1390 }
1391
1392 static void frame_end(MpegEncContext *s)
1393 {
1394     int i;
1395
1396     if (s->unrestricted_mv &&
1397         s->current_picture.reference &&
1398         !s->intra_only) {
1399         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1400         int hshift = desc->log2_chroma_w;
1401         int vshift = desc->log2_chroma_h;
1402         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1403                                 s->h_edge_pos, s->v_edge_pos,
1404                                 EDGE_WIDTH, EDGE_WIDTH,
1405                                 EDGE_TOP | EDGE_BOTTOM);
1406         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1407                                 s->h_edge_pos >> hshift,
1408                                 s->v_edge_pos >> vshift,
1409                                 EDGE_WIDTH >> hshift,
1410                                 EDGE_WIDTH >> vshift,
1411                                 EDGE_TOP | EDGE_BOTTOM);
1412         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1413                                 s->h_edge_pos >> hshift,
1414                                 s->v_edge_pos >> vshift,
1415                                 EDGE_WIDTH >> hshift,
1416                                 EDGE_WIDTH >> vshift,
1417                                 EDGE_TOP | EDGE_BOTTOM);
1418     }
1419
1420     emms_c();
1421
1422     s->last_pict_type                 = s->pict_type;
1423     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1424     if (s->pict_type!= AV_PICTURE_TYPE_B)
1425         s->last_non_b_pict_type = s->pict_type;
1426
1427     if (s->encoding) {
1428         /* release non-reference frames */
1429         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1430             if (!s->picture[i].reference)
1431                 ff_mpeg_unref_picture(s, &s->picture[i]);
1432         }
1433     }
1434
1435     s->avctx->coded_frame = s->current_picture_ptr->f;
1436
1437 }
1438
1439 static void update_noise_reduction(MpegEncContext *s)
1440 {
1441     int intra, i;
1442
1443     for (intra = 0; intra < 2; intra++) {
1444         if (s->dct_count[intra] > (1 << 16)) {
1445             for (i = 0; i < 64; i++) {
1446                 s->dct_error_sum[intra][i] >>= 1;
1447             }
1448             s->dct_count[intra] >>= 1;
1449         }
1450
1451         for (i = 0; i < 64; i++) {
1452             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1453                                        s->dct_count[intra] +
1454                                        s->dct_error_sum[intra][i] / 2) /
1455                                       (s->dct_error_sum[intra][i] + 1);
1456         }
1457     }
1458 }
1459
1460 static int frame_start(MpegEncContext *s)
1461 {
1462     int ret;
1463
1464     /* mark & release old frames */
1465     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1466         s->last_picture_ptr != s->next_picture_ptr &&
1467         s->last_picture_ptr->f->buf[0]) {
1468         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1469     }
1470
1471     s->current_picture_ptr->f->pict_type = s->pict_type;
1472     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1473
1474     ff_mpeg_unref_picture(s, &s->current_picture);
1475     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1476                                    s->current_picture_ptr)) < 0)
1477         return ret;
1478
1479     if (s->pict_type != AV_PICTURE_TYPE_B) {
1480         s->last_picture_ptr = s->next_picture_ptr;
1481         if (!s->droppable)
1482             s->next_picture_ptr = s->current_picture_ptr;
1483     }
1484
1485     if (s->last_picture_ptr) {
1486         ff_mpeg_unref_picture(s, &s->last_picture);
1487         if (s->last_picture_ptr->f->buf[0] &&
1488             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1489                                        s->last_picture_ptr)) < 0)
1490             return ret;
1491     }
1492     if (s->next_picture_ptr) {
1493         ff_mpeg_unref_picture(s, &s->next_picture);
1494         if (s->next_picture_ptr->f->buf[0] &&
1495             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1496                                        s->next_picture_ptr)) < 0)
1497             return ret;
1498     }
1499
1500     if (s->picture_structure!= PICT_FRAME) {
1501         int i;
1502         for (i = 0; i < 4; i++) {
1503             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1504                 s->current_picture.f->data[i] +=
1505                     s->current_picture.f->linesize[i];
1506             }
1507             s->current_picture.f->linesize[i] *= 2;
1508             s->last_picture.f->linesize[i]    *= 2;
1509             s->next_picture.f->linesize[i]    *= 2;
1510         }
1511     }
1512
1513     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1514         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1515         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1516     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1517         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1518         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1519     } else {
1520         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1521         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1522     }
1523
1524     if (s->dct_error_sum) {
1525         assert(s->avctx->noise_reduction && s->encoding);
1526         update_noise_reduction(s);
1527     }
1528
1529     return 0;
1530 }
1531
1532 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1533                           const AVFrame *pic_arg, int *got_packet)
1534 {
1535     MpegEncContext *s = avctx->priv_data;
1536     int i, stuffing_count, ret;
1537     int context_count = s->slice_context_count;
1538
1539     s->picture_in_gop_number++;
1540
1541     if (load_input_picture(s, pic_arg) < 0)
1542         return -1;
1543
1544     if (select_input_picture(s) < 0) {
1545         return -1;
1546     }
1547
1548     /* output? */
1549     if (s->new_picture.f->data[0]) {
1550         if (!pkt->data &&
1551             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1552             return ret;
1553         if (s->mb_info) {
1554             s->mb_info_ptr = av_packet_new_side_data(pkt,
1555                                  AV_PKT_DATA_H263_MB_INFO,
1556                                  s->mb_width*s->mb_height*12);
1557             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1558         }
1559
1560         for (i = 0; i < context_count; i++) {
1561             int start_y = s->thread_context[i]->start_mb_y;
1562             int   end_y = s->thread_context[i]->  end_mb_y;
1563             int h       = s->mb_height;
1564             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1565             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1566
1567             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1568         }
1569
1570         s->pict_type = s->new_picture.f->pict_type;
1571         //emms_c();
1572         ret = frame_start(s);
1573         if (ret < 0)
1574             return ret;
1575 vbv_retry:
1576         if (encode_picture(s, s->picture_number) < 0)
1577             return -1;
1578
1579         avctx->header_bits = s->header_bits;
1580         avctx->mv_bits     = s->mv_bits;
1581         avctx->misc_bits   = s->misc_bits;
1582         avctx->i_tex_bits  = s->i_tex_bits;
1583         avctx->p_tex_bits  = s->p_tex_bits;
1584         avctx->i_count     = s->i_count;
1585         // FIXME f/b_count in avctx
1586         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1587         avctx->skip_count  = s->skip_count;
1588
1589         frame_end(s);
1590
1591         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1592             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1593
1594         if (avctx->rc_buffer_size) {
1595             RateControlContext *rcc = &s->rc_context;
1596             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1597
1598             if (put_bits_count(&s->pb) > max_size &&
1599                 s->lambda < s->avctx->lmax) {
1600                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1601                                        (s->qscale + 1) / s->qscale);
1602                 if (s->adaptive_quant) {
1603                     int i;
1604                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1605                         s->lambda_table[i] =
1606                             FFMAX(s->lambda_table[i] + 1,
1607                                   s->lambda_table[i] * (s->qscale + 1) /
1608                                   s->qscale);
1609                 }
1610                 s->mb_skipped = 0;        // done in frame_start()
1611                 // done in encode_picture() so we must undo it
1612                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1613                     if (s->flipflop_rounding          ||
1614                         s->codec_id == AV_CODEC_ID_H263P ||
1615                         s->codec_id == AV_CODEC_ID_MPEG4)
1616                         s->no_rounding ^= 1;
1617                 }
1618                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1619                     s->time_base       = s->last_time_base;
1620                     s->last_non_b_time = s->time - s->pp_time;
1621                 }
1622                 for (i = 0; i < context_count; i++) {
1623                     PutBitContext *pb = &s->thread_context[i]->pb;
1624                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1625                 }
1626                 goto vbv_retry;
1627             }
1628
1629             assert(s->avctx->rc_max_rate);
1630         }
1631
1632         if (s->flags & CODEC_FLAG_PASS1)
1633             ff_write_pass1_stats(s);
1634
1635         for (i = 0; i < 4; i++) {
1636             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1637             avctx->error[i] += s->current_picture_ptr->f->error[i];
1638         }
1639
1640         if (s->flags & CODEC_FLAG_PASS1)
1641             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1642                    avctx->i_tex_bits + avctx->p_tex_bits ==
1643                        put_bits_count(&s->pb));
1644         flush_put_bits(&s->pb);
1645         s->frame_bits  = put_bits_count(&s->pb);
1646
1647         stuffing_count = ff_vbv_update(s, s->frame_bits);
1648         if (stuffing_count) {
1649             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1650                     stuffing_count + 50) {
1651                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1652                 return -1;
1653             }
1654
1655             switch (s->codec_id) {
1656             case AV_CODEC_ID_MPEG1VIDEO:
1657             case AV_CODEC_ID_MPEG2VIDEO:
1658                 while (stuffing_count--) {
1659                     put_bits(&s->pb, 8, 0);
1660                 }
1661             break;
1662             case AV_CODEC_ID_MPEG4:
1663                 put_bits(&s->pb, 16, 0);
1664                 put_bits(&s->pb, 16, 0x1C3);
1665                 stuffing_count -= 4;
1666                 while (stuffing_count--) {
1667                     put_bits(&s->pb, 8, 0xFF);
1668                 }
1669             break;
1670             default:
1671                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1672             }
1673             flush_put_bits(&s->pb);
1674             s->frame_bits  = put_bits_count(&s->pb);
1675         }
1676
1677         /* update mpeg1/2 vbv_delay for CBR */
1678         if (s->avctx->rc_max_rate                          &&
1679             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1680             s->out_format == FMT_MPEG1                     &&
1681             90000LL * (avctx->rc_buffer_size - 1) <=
1682                 s->avctx->rc_max_rate * 0xFFFFLL) {
1683             int vbv_delay, min_delay;
1684             double inbits  = s->avctx->rc_max_rate *
1685                              av_q2d(s->avctx->time_base);
1686             int    minbits = s->frame_bits - 8 *
1687                              (s->vbv_delay_ptr - s->pb.buf - 1);
1688             double bits    = s->rc_context.buffer_index + minbits - inbits;
1689
1690             if (bits < 0)
1691                 av_log(s->avctx, AV_LOG_ERROR,
1692                        "Internal error, negative bits\n");
1693
1694             assert(s->repeat_first_field == 0);
1695
1696             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1697             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1698                         s->avctx->rc_max_rate;
1699
1700             vbv_delay = FFMAX(vbv_delay, min_delay);
1701
1702             assert(vbv_delay < 0xFFFF);
1703
1704             s->vbv_delay_ptr[0] &= 0xF8;
1705             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1706             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1707             s->vbv_delay_ptr[2] &= 0x07;
1708             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1709             avctx->vbv_delay     = vbv_delay * 300;
1710         }
1711         s->total_bits     += s->frame_bits;
1712         avctx->frame_bits  = s->frame_bits;
1713
1714         pkt->pts = s->current_picture.f->pts;
1715         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1716             if (!s->current_picture.f->coded_picture_number)
1717                 pkt->dts = pkt->pts - s->dts_delta;
1718             else
1719                 pkt->dts = s->reordered_pts;
1720             s->reordered_pts = pkt->pts;
1721         } else
1722             pkt->dts = pkt->pts;
1723         if (s->current_picture.f->key_frame)
1724             pkt->flags |= AV_PKT_FLAG_KEY;
1725         if (s->mb_info)
1726             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1727     } else {
1728         s->frame_bits = 0;
1729     }
1730     assert((s->frame_bits & 7) == 0);
1731
1732     pkt->size = s->frame_bits / 8;
1733     *got_packet = !!pkt->size;
1734     return 0;
1735 }
1736
1737 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1738                                                 int n, int threshold)
1739 {
1740     static const char tab[64] = {
1741         3, 2, 2, 1, 1, 1, 1, 1,
1742         1, 1, 1, 1, 1, 1, 1, 1,
1743         1, 1, 1, 1, 1, 1, 1, 1,
1744         0, 0, 0, 0, 0, 0, 0, 0,
1745         0, 0, 0, 0, 0, 0, 0, 0,
1746         0, 0, 0, 0, 0, 0, 0, 0,
1747         0, 0, 0, 0, 0, 0, 0, 0,
1748         0, 0, 0, 0, 0, 0, 0, 0
1749     };
1750     int score = 0;
1751     int run = 0;
1752     int i;
1753     int16_t *block = s->block[n];
1754     const int last_index = s->block_last_index[n];
1755     int skip_dc;
1756
1757     if (threshold < 0) {
1758         skip_dc = 0;
1759         threshold = -threshold;
1760     } else
1761         skip_dc = 1;
1762
1763     /* Are all we could set to zero already zero? */
1764     if (last_index <= skip_dc - 1)
1765         return;
1766
1767     for (i = 0; i <= last_index; i++) {
1768         const int j = s->intra_scantable.permutated[i];
1769         const int level = FFABS(block[j]);
1770         if (level == 1) {
1771             if (skip_dc && i == 0)
1772                 continue;
1773             score += tab[run];
1774             run = 0;
1775         } else if (level > 1) {
1776             return;
1777         } else {
1778             run++;
1779         }
1780     }
1781     if (score >= threshold)
1782         return;
1783     for (i = skip_dc; i <= last_index; i++) {
1784         const int j = s->intra_scantable.permutated[i];
1785         block[j] = 0;
1786     }
1787     if (block[0])
1788         s->block_last_index[n] = 0;
1789     else
1790         s->block_last_index[n] = -1;
1791 }
1792
1793 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1794                                int last_index)
1795 {
1796     int i;
1797     const int maxlevel = s->max_qcoeff;
1798     const int minlevel = s->min_qcoeff;
1799     int overflow = 0;
1800
1801     if (s->mb_intra) {
1802         i = 1; // skip clipping of intra dc
1803     } else
1804         i = 0;
1805
1806     for (; i <= last_index; i++) {
1807         const int j = s->intra_scantable.permutated[i];
1808         int level = block[j];
1809
1810         if (level > maxlevel) {
1811             level = maxlevel;
1812             overflow++;
1813         } else if (level < minlevel) {
1814             level = minlevel;
1815             overflow++;
1816         }
1817
1818         block[j] = level;
1819     }
1820
1821     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1822         av_log(s->avctx, AV_LOG_INFO,
1823                "warning, clipping %d dct coefficients to %d..%d\n",
1824                overflow, minlevel, maxlevel);
1825 }
1826
1827 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1828 {
1829     int x, y;
1830     // FIXME optimize
1831     for (y = 0; y < 8; y++) {
1832         for (x = 0; x < 8; x++) {
1833             int x2, y2;
1834             int sum = 0;
1835             int sqr = 0;
1836             int count = 0;
1837
1838             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1839                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1840                     int v = ptr[x2 + y2 * stride];
1841                     sum += v;
1842                     sqr += v * v;
1843                     count++;
1844                 }
1845             }
1846             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1847         }
1848     }
1849 }
1850
1851 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1852                                                 int motion_x, int motion_y,
1853                                                 int mb_block_height,
1854                                                 int mb_block_count)
1855 {
1856     int16_t weight[8][64];
1857     int16_t orig[8][64];
1858     const int mb_x = s->mb_x;
1859     const int mb_y = s->mb_y;
1860     int i;
1861     int skip_dct[8];
1862     int dct_offset = s->linesize * 8; // default for progressive frames
1863     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1864     ptrdiff_t wrap_y, wrap_c;
1865
1866     for (i = 0; i < mb_block_count; i++)
1867         skip_dct[i] = s->skipdct;
1868
1869     if (s->adaptive_quant) {
1870         const int last_qp = s->qscale;
1871         const int mb_xy = mb_x + mb_y * s->mb_stride;
1872
1873         s->lambda = s->lambda_table[mb_xy];
1874         update_qscale(s);
1875
1876         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1877             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1878             s->dquant = s->qscale - last_qp;
1879
1880             if (s->out_format == FMT_H263) {
1881                 s->dquant = av_clip(s->dquant, -2, 2);
1882
1883                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1884                     if (!s->mb_intra) {
1885                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1886                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1887                                 s->dquant = 0;
1888                         }
1889                         if (s->mv_type == MV_TYPE_8X8)
1890                             s->dquant = 0;
1891                     }
1892                 }
1893             }
1894         }
1895         ff_set_qscale(s, last_qp + s->dquant);
1896     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1897         ff_set_qscale(s, s->qscale + s->dquant);
1898
1899     wrap_y = s->linesize;
1900     wrap_c = s->uvlinesize;
1901     ptr_y  = s->new_picture.f->data[0] +
1902              (mb_y * 16 * wrap_y)              + mb_x * 16;
1903     ptr_cb = s->new_picture.f->data[1] +
1904              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1905     ptr_cr = s->new_picture.f->data[2] +
1906              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1907
1908     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1909         uint8_t *ebuf = s->edge_emu_buffer + 32;
1910         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1911                                  wrap_y, wrap_y,
1912                                  16, 16, mb_x * 16, mb_y * 16,
1913                                  s->width, s->height);
1914         ptr_y = ebuf;
1915         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1916                                  wrap_c, wrap_c,
1917                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1918                                  s->width >> 1, s->height >> 1);
1919         ptr_cb = ebuf + 18 * wrap_y;
1920         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1921                                  wrap_c, wrap_c,
1922                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1923                                  s->width >> 1, s->height >> 1);
1924         ptr_cr = ebuf + 18 * wrap_y + 8;
1925     }
1926
1927     if (s->mb_intra) {
1928         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1929             int progressive_score, interlaced_score;
1930
1931             s->interlaced_dct = 0;
1932             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
1933                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1934                                                      NULL, wrap_y, 8) - 400;
1935
1936             if (progressive_score > 0) {
1937                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
1938                                                         NULL, wrap_y * 2, 8) +
1939                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
1940                                                         NULL, wrap_y * 2, 8);
1941                 if (progressive_score > interlaced_score) {
1942                     s->interlaced_dct = 1;
1943
1944                     dct_offset = wrap_y;
1945                     wrap_y <<= 1;
1946                     if (s->chroma_format == CHROMA_422)
1947                         wrap_c <<= 1;
1948                 }
1949             }
1950         }
1951
1952         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
1953         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
1954         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
1955         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
1956
1957         if (s->flags & CODEC_FLAG_GRAY) {
1958             skip_dct[4] = 1;
1959             skip_dct[5] = 1;
1960         } else {
1961             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1962             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1963             if (!s->chroma_y_shift) { /* 422 */
1964                 s->pdsp.get_pixels(s->block[6],
1965                                    ptr_cb + (dct_offset >> 1), wrap_c);
1966                 s->pdsp.get_pixels(s->block[7],
1967                                    ptr_cr + (dct_offset >> 1), wrap_c);
1968             }
1969         }
1970     } else {
1971         op_pixels_func (*op_pix)[4];
1972         qpel_mc_func (*op_qpix)[16];
1973         uint8_t *dest_y, *dest_cb, *dest_cr;
1974
1975         dest_y  = s->dest[0];
1976         dest_cb = s->dest[1];
1977         dest_cr = s->dest[2];
1978
1979         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1980             op_pix  = s->hdsp.put_pixels_tab;
1981             op_qpix = s->qdsp.put_qpel_pixels_tab;
1982         } else {
1983             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1984             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
1985         }
1986
1987         if (s->mv_dir & MV_DIR_FORWARD) {
1988             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1989                           s->last_picture.f->data,
1990                           op_pix, op_qpix);
1991             op_pix  = s->hdsp.avg_pixels_tab;
1992             op_qpix = s->qdsp.avg_qpel_pixels_tab;
1993         }
1994         if (s->mv_dir & MV_DIR_BACKWARD) {
1995             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1996                           s->next_picture.f->data,
1997                           op_pix, op_qpix);
1998         }
1999
2000         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2001             int progressive_score, interlaced_score;
2002
2003             s->interlaced_dct = 0;
2004             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2005                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2006                                                      ptr_y + wrap_y * 8,
2007                                                      wrap_y, 8) - 400;
2008
2009             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2010                 progressive_score -= 400;
2011
2012             if (progressive_score > 0) {
2013                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2014                                                         wrap_y * 2, 8) +
2015                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2016                                                         ptr_y + wrap_y,
2017                                                         wrap_y * 2, 8);
2018
2019                 if (progressive_score > interlaced_score) {
2020                     s->interlaced_dct = 1;
2021
2022                     dct_offset = wrap_y;
2023                     wrap_y <<= 1;
2024                     if (s->chroma_format == CHROMA_422)
2025                         wrap_c <<= 1;
2026                 }
2027             }
2028         }
2029
2030         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2031         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2032         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2033                             dest_y + dct_offset, wrap_y);
2034         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2035                             dest_y + dct_offset + 8, wrap_y);
2036
2037         if (s->flags & CODEC_FLAG_GRAY) {
2038             skip_dct[4] = 1;
2039             skip_dct[5] = 1;
2040         } else {
2041             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2042             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2043             if (!s->chroma_y_shift) { /* 422 */
2044                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2045                                     dest_cb + (dct_offset >> 1), wrap_c);
2046                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2047                                     dest_cr + (dct_offset >> 1), wrap_c);
2048             }
2049         }
2050         /* pre quantization */
2051         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2052                 2 * s->qscale * s->qscale) {
2053             // FIXME optimize
2054             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2055                 skip_dct[0] = 1;
2056             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2057                 skip_dct[1] = 1;
2058             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2059                                wrap_y, 8) < 20 * s->qscale)
2060                 skip_dct[2] = 1;
2061             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2062                                wrap_y, 8) < 20 * s->qscale)
2063                 skip_dct[3] = 1;
2064             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2065                 skip_dct[4] = 1;
2066             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2067                 skip_dct[5] = 1;
2068             if (!s->chroma_y_shift) { /* 422 */
2069                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2070                                    dest_cb + (dct_offset >> 1),
2071                                    wrap_c, 8) < 20 * s->qscale)
2072                     skip_dct[6] = 1;
2073                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2074                                    dest_cr + (dct_offset >> 1),
2075                                    wrap_c, 8) < 20 * s->qscale)
2076                     skip_dct[7] = 1;
2077             }
2078         }
2079     }
2080
2081     if (s->quantizer_noise_shaping) {
2082         if (!skip_dct[0])
2083             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2084         if (!skip_dct[1])
2085             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2086         if (!skip_dct[2])
2087             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2088         if (!skip_dct[3])
2089             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2090         if (!skip_dct[4])
2091             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2092         if (!skip_dct[5])
2093             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2094         if (!s->chroma_y_shift) { /* 422 */
2095             if (!skip_dct[6])
2096                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2097                                   wrap_c);
2098             if (!skip_dct[7])
2099                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2100                                   wrap_c);
2101         }
2102         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2103     }
2104
2105     /* DCT & quantize */
2106     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2107     {
2108         for (i = 0; i < mb_block_count; i++) {
2109             if (!skip_dct[i]) {
2110                 int overflow;
2111                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2112                 // FIXME we could decide to change to quantizer instead of
2113                 // clipping
2114                 // JS: I don't think that would be a good idea it could lower
2115                 //     quality instead of improve it. Just INTRADC clipping
2116                 //     deserves changes in quantizer
2117                 if (overflow)
2118                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2119             } else
2120                 s->block_last_index[i] = -1;
2121         }
2122         if (s->quantizer_noise_shaping) {
2123             for (i = 0; i < mb_block_count; i++) {
2124                 if (!skip_dct[i]) {
2125                     s->block_last_index[i] =
2126                         dct_quantize_refine(s, s->block[i], weight[i],
2127                                             orig[i], i, s->qscale);
2128                 }
2129             }
2130         }
2131
2132         if (s->luma_elim_threshold && !s->mb_intra)
2133             for (i = 0; i < 4; i++)
2134                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2135         if (s->chroma_elim_threshold && !s->mb_intra)
2136             for (i = 4; i < mb_block_count; i++)
2137                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2138
2139         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2140             for (i = 0; i < mb_block_count; i++) {
2141                 if (s->block_last_index[i] == -1)
2142                     s->coded_score[i] = INT_MAX / 256;
2143             }
2144         }
2145     }
2146
2147     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2148         s->block_last_index[4] =
2149         s->block_last_index[5] = 0;
2150         s->block[4][0] =
2151         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2152     }
2153
2154     // non c quantize code returns incorrect block_last_index FIXME
2155     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2156         for (i = 0; i < mb_block_count; i++) {
2157             int j;
2158             if (s->block_last_index[i] > 0) {
2159                 for (j = 63; j > 0; j--) {
2160                     if (s->block[i][s->intra_scantable.permutated[j]])
2161                         break;
2162                 }
2163                 s->block_last_index[i] = j;
2164             }
2165         }
2166     }
2167
2168     /* huffman encode */
2169     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2170     case AV_CODEC_ID_MPEG1VIDEO:
2171     case AV_CODEC_ID_MPEG2VIDEO:
2172         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2173             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2174         break;
2175     case AV_CODEC_ID_MPEG4:
2176         if (CONFIG_MPEG4_ENCODER)
2177             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2178         break;
2179     case AV_CODEC_ID_MSMPEG4V2:
2180     case AV_CODEC_ID_MSMPEG4V3:
2181     case AV_CODEC_ID_WMV1:
2182         if (CONFIG_MSMPEG4_ENCODER)
2183             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2184         break;
2185     case AV_CODEC_ID_WMV2:
2186         if (CONFIG_WMV2_ENCODER)
2187             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2188         break;
2189     case AV_CODEC_ID_H261:
2190         if (CONFIG_H261_ENCODER)
2191             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2192         break;
2193     case AV_CODEC_ID_H263:
2194     case AV_CODEC_ID_H263P:
2195     case AV_CODEC_ID_FLV1:
2196     case AV_CODEC_ID_RV10:
2197     case AV_CODEC_ID_RV20:
2198         if (CONFIG_H263_ENCODER)
2199             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2200         break;
2201     case AV_CODEC_ID_MJPEG:
2202         if (CONFIG_MJPEG_ENCODER)
2203             ff_mjpeg_encode_mb(s, s->block);
2204         break;
2205     default:
2206         assert(0);
2207     }
2208 }
2209
2210 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2211 {
2212     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2213     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2214 }
2215
2216 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2217     int i;
2218
2219     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2220
2221     /* mpeg1 */
2222     d->mb_skip_run= s->mb_skip_run;
2223     for(i=0; i<3; i++)
2224         d->last_dc[i] = s->last_dc[i];
2225
2226     /* statistics */
2227     d->mv_bits= s->mv_bits;
2228     d->i_tex_bits= s->i_tex_bits;
2229     d->p_tex_bits= s->p_tex_bits;
2230     d->i_count= s->i_count;
2231     d->f_count= s->f_count;
2232     d->b_count= s->b_count;
2233     d->skip_count= s->skip_count;
2234     d->misc_bits= s->misc_bits;
2235     d->last_bits= 0;
2236
2237     d->mb_skipped= 0;
2238     d->qscale= s->qscale;
2239     d->dquant= s->dquant;
2240
2241     d->esc3_level_length= s->esc3_level_length;
2242 }
2243
2244 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2245     int i;
2246
2247     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2248     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2249
2250     /* mpeg1 */
2251     d->mb_skip_run= s->mb_skip_run;
2252     for(i=0; i<3; i++)
2253         d->last_dc[i] = s->last_dc[i];
2254
2255     /* statistics */
2256     d->mv_bits= s->mv_bits;
2257     d->i_tex_bits= s->i_tex_bits;
2258     d->p_tex_bits= s->p_tex_bits;
2259     d->i_count= s->i_count;
2260     d->f_count= s->f_count;
2261     d->b_count= s->b_count;
2262     d->skip_count= s->skip_count;
2263     d->misc_bits= s->misc_bits;
2264
2265     d->mb_intra= s->mb_intra;
2266     d->mb_skipped= s->mb_skipped;
2267     d->mv_type= s->mv_type;
2268     d->mv_dir= s->mv_dir;
2269     d->pb= s->pb;
2270     if(s->data_partitioning){
2271         d->pb2= s->pb2;
2272         d->tex_pb= s->tex_pb;
2273     }
2274     d->block= s->block;
2275     for(i=0; i<8; i++)
2276         d->block_last_index[i]= s->block_last_index[i];
2277     d->interlaced_dct= s->interlaced_dct;
2278     d->qscale= s->qscale;
2279
2280     d->esc3_level_length= s->esc3_level_length;
2281 }
2282
2283 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2284                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2285                            int *dmin, int *next_block, int motion_x, int motion_y)
2286 {
2287     int score;
2288     uint8_t *dest_backup[3];
2289
2290     copy_context_before_encode(s, backup, type);
2291
2292     s->block= s->blocks[*next_block];
2293     s->pb= pb[*next_block];
2294     if(s->data_partitioning){
2295         s->pb2   = pb2   [*next_block];
2296         s->tex_pb= tex_pb[*next_block];
2297     }
2298
2299     if(*next_block){
2300         memcpy(dest_backup, s->dest, sizeof(s->dest));
2301         s->dest[0] = s->rd_scratchpad;
2302         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2303         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2304         assert(s->linesize >= 32); //FIXME
2305     }
2306
2307     encode_mb(s, motion_x, motion_y);
2308
2309     score= put_bits_count(&s->pb);
2310     if(s->data_partitioning){
2311         score+= put_bits_count(&s->pb2);
2312         score+= put_bits_count(&s->tex_pb);
2313     }
2314
2315     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2316         ff_MPV_decode_mb(s, s->block);
2317
2318         score *= s->lambda2;
2319         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2320     }
2321
2322     if(*next_block){
2323         memcpy(s->dest, dest_backup, sizeof(s->dest));
2324     }
2325
2326     if(score<*dmin){
2327         *dmin= score;
2328         *next_block^=1;
2329
2330         copy_context_after_encode(best, s, type);
2331     }
2332 }
2333
2334 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2335     uint32_t *sq = ff_square_tab + 256;
2336     int acc=0;
2337     int x,y;
2338
2339     if(w==16 && h==16)
2340         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2341     else if(w==8 && h==8)
2342         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2343
2344     for(y=0; y<h; y++){
2345         for(x=0; x<w; x++){
2346             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2347         }
2348     }
2349
2350     assert(acc>=0);
2351
2352     return acc;
2353 }
2354
2355 static int sse_mb(MpegEncContext *s){
2356     int w= 16;
2357     int h= 16;
2358
2359     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2360     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2361
2362     if(w==16 && h==16)
2363       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2364         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2365                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2366                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2367       }else{
2368         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2369                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2370                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2371       }
2372     else
2373         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2374                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2375                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2376 }
2377
2378 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2379     MpegEncContext *s= *(void**)arg;
2380
2381
2382     s->me.pre_pass=1;
2383     s->me.dia_size= s->avctx->pre_dia_size;
2384     s->first_slice_line=1;
2385     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2386         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2387             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2388         }
2389         s->first_slice_line=0;
2390     }
2391
2392     s->me.pre_pass=0;
2393
2394     return 0;
2395 }
2396
2397 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2398     MpegEncContext *s= *(void**)arg;
2399
2400     s->me.dia_size= s->avctx->dia_size;
2401     s->first_slice_line=1;
2402     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2403         s->mb_x=0; //for block init below
2404         ff_init_block_index(s);
2405         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2406             s->block_index[0]+=2;
2407             s->block_index[1]+=2;
2408             s->block_index[2]+=2;
2409             s->block_index[3]+=2;
2410
2411             /* compute motion vector & mb_type and store in context */
2412             if(s->pict_type==AV_PICTURE_TYPE_B)
2413                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2414             else
2415                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2416         }
2417         s->first_slice_line=0;
2418     }
2419     return 0;
2420 }
2421
2422 static int mb_var_thread(AVCodecContext *c, void *arg){
2423     MpegEncContext *s= *(void**)arg;
2424     int mb_x, mb_y;
2425
2426     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2427         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2428             int xx = mb_x * 16;
2429             int yy = mb_y * 16;
2430             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2431             int varc;
2432             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2433
2434             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2435                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2436
2437             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2438             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2439             s->me.mb_var_sum_temp    += varc;
2440         }
2441     }
2442     return 0;
2443 }
2444
2445 static void write_slice_end(MpegEncContext *s){
2446     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2447         if(s->partitioned_frame){
2448             ff_mpeg4_merge_partitions(s);
2449         }
2450
2451         ff_mpeg4_stuffing(&s->pb);
2452     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2453         ff_mjpeg_encode_stuffing(&s->pb);
2454     }
2455
2456     avpriv_align_put_bits(&s->pb);
2457     flush_put_bits(&s->pb);
2458
2459     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2460         s->misc_bits+= get_bits_diff(s);
2461 }
2462
2463 static void write_mb_info(MpegEncContext *s)
2464 {
2465     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2466     int offset = put_bits_count(&s->pb);
2467     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2468     int gobn = s->mb_y / s->gob_index;
2469     int pred_x, pred_y;
2470     if (CONFIG_H263_ENCODER)
2471         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2472     bytestream_put_le32(&ptr, offset);
2473     bytestream_put_byte(&ptr, s->qscale);
2474     bytestream_put_byte(&ptr, gobn);
2475     bytestream_put_le16(&ptr, mba);
2476     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2477     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2478     /* 4MV not implemented */
2479     bytestream_put_byte(&ptr, 0); /* hmv2 */
2480     bytestream_put_byte(&ptr, 0); /* vmv2 */
2481 }
2482
2483 static void update_mb_info(MpegEncContext *s, int startcode)
2484 {
2485     if (!s->mb_info)
2486         return;
2487     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2488         s->mb_info_size += 12;
2489         s->prev_mb_info = s->last_mb_info;
2490     }
2491     if (startcode) {
2492         s->prev_mb_info = put_bits_count(&s->pb)/8;
2493         /* This might have incremented mb_info_size above, and we return without
2494          * actually writing any info into that slot yet. But in that case,
2495          * this will be called again at the start of the after writing the
2496          * start code, actually writing the mb info. */
2497         return;
2498     }
2499
2500     s->last_mb_info = put_bits_count(&s->pb)/8;
2501     if (!s->mb_info_size)
2502         s->mb_info_size += 12;
2503     write_mb_info(s);
2504 }
2505
2506 static int encode_thread(AVCodecContext *c, void *arg){
2507     MpegEncContext *s= *(void**)arg;
2508     int mb_x, mb_y, pdif = 0;
2509     int chr_h= 16>>s->chroma_y_shift;
2510     int i, j;
2511     MpegEncContext best_s, backup_s;
2512     uint8_t bit_buf[2][MAX_MB_BYTES];
2513     uint8_t bit_buf2[2][MAX_MB_BYTES];
2514     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2515     PutBitContext pb[2], pb2[2], tex_pb[2];
2516
2517     for(i=0; i<2; i++){
2518         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2519         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2520         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2521     }
2522
2523     s->last_bits= put_bits_count(&s->pb);
2524     s->mv_bits=0;
2525     s->misc_bits=0;
2526     s->i_tex_bits=0;
2527     s->p_tex_bits=0;
2528     s->i_count=0;
2529     s->f_count=0;
2530     s->b_count=0;
2531     s->skip_count=0;
2532
2533     for(i=0; i<3; i++){
2534         /* init last dc values */
2535         /* note: quant matrix value (8) is implied here */
2536         s->last_dc[i] = 128 << s->intra_dc_precision;
2537
2538         s->current_picture.f->error[i] = 0;
2539     }
2540     s->mb_skip_run = 0;
2541     memset(s->last_mv, 0, sizeof(s->last_mv));
2542
2543     s->last_mv_dir = 0;
2544
2545     switch(s->codec_id){
2546     case AV_CODEC_ID_H263:
2547     case AV_CODEC_ID_H263P:
2548     case AV_CODEC_ID_FLV1:
2549         if (CONFIG_H263_ENCODER)
2550             s->gob_index = ff_h263_get_gob_height(s);
2551         break;
2552     case AV_CODEC_ID_MPEG4:
2553         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2554             ff_mpeg4_init_partitions(s);
2555         break;
2556     }
2557
2558     s->resync_mb_x=0;
2559     s->resync_mb_y=0;
2560     s->first_slice_line = 1;
2561     s->ptr_lastgob = s->pb.buf;
2562     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2563         s->mb_x=0;
2564         s->mb_y= mb_y;
2565
2566         ff_set_qscale(s, s->qscale);
2567         ff_init_block_index(s);
2568
2569         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2570             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2571             int mb_type= s->mb_type[xy];
2572 //            int d;
2573             int dmin= INT_MAX;
2574             int dir;
2575
2576             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2577                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2578                 return -1;
2579             }
2580             if(s->data_partitioning){
2581                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2582                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2583                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2584                     return -1;
2585                 }
2586             }
2587
2588             s->mb_x = mb_x;
2589             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2590             ff_update_block_index(s);
2591
2592             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2593                 ff_h261_reorder_mb_index(s);
2594                 xy= s->mb_y*s->mb_stride + s->mb_x;
2595                 mb_type= s->mb_type[xy];
2596             }
2597
2598             /* write gob / video packet header  */
2599             if(s->rtp_mode){
2600                 int current_packet_size, is_gob_start;
2601
2602                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2603
2604                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2605
2606                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2607
2608                 switch(s->codec_id){
2609                 case AV_CODEC_ID_H263:
2610                 case AV_CODEC_ID_H263P:
2611                     if(!s->h263_slice_structured)
2612                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2613                     break;
2614                 case AV_CODEC_ID_MPEG2VIDEO:
2615                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2616                 case AV_CODEC_ID_MPEG1VIDEO:
2617                     if(s->mb_skip_run) is_gob_start=0;
2618                     break;
2619                 }
2620
2621                 if(is_gob_start){
2622                     if(s->start_mb_y != mb_y || mb_x!=0){
2623                         write_slice_end(s);
2624
2625                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2626                             ff_mpeg4_init_partitions(s);
2627                         }
2628                     }
2629
2630                     assert((put_bits_count(&s->pb)&7) == 0);
2631                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2632
2633                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2634                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2635                         int d = 100 / s->error_rate;
2636                         if(r % d == 0){
2637                             current_packet_size=0;
2638                             s->pb.buf_ptr= s->ptr_lastgob;
2639                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2640                         }
2641                     }
2642
2643                     if (s->avctx->rtp_callback){
2644                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2645                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2646                     }
2647                     update_mb_info(s, 1);
2648
2649                     switch(s->codec_id){
2650                     case AV_CODEC_ID_MPEG4:
2651                         if (CONFIG_MPEG4_ENCODER) {
2652                             ff_mpeg4_encode_video_packet_header(s);
2653                             ff_mpeg4_clean_buffers(s);
2654                         }
2655                     break;
2656                     case AV_CODEC_ID_MPEG1VIDEO:
2657                     case AV_CODEC_ID_MPEG2VIDEO:
2658                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2659                             ff_mpeg1_encode_slice_header(s);
2660                             ff_mpeg1_clean_buffers(s);
2661                         }
2662                     break;
2663                     case AV_CODEC_ID_H263:
2664                     case AV_CODEC_ID_H263P:
2665                         if (CONFIG_H263_ENCODER)
2666                             ff_h263_encode_gob_header(s, mb_y);
2667                     break;
2668                     }
2669
2670                     if(s->flags&CODEC_FLAG_PASS1){
2671                         int bits= put_bits_count(&s->pb);
2672                         s->misc_bits+= bits - s->last_bits;
2673                         s->last_bits= bits;
2674                     }
2675
2676                     s->ptr_lastgob += current_packet_size;
2677                     s->first_slice_line=1;
2678                     s->resync_mb_x=mb_x;
2679                     s->resync_mb_y=mb_y;
2680                 }
2681             }
2682
2683             if(  (s->resync_mb_x   == s->mb_x)
2684                && s->resync_mb_y+1 == s->mb_y){
2685                 s->first_slice_line=0;
2686             }
2687
2688             s->mb_skipped=0;
2689             s->dquant=0; //only for QP_RD
2690
2691             update_mb_info(s, 0);
2692
2693             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2694                 int next_block=0;
2695                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2696
2697                 copy_context_before_encode(&backup_s, s, -1);
2698                 backup_s.pb= s->pb;
2699                 best_s.data_partitioning= s->data_partitioning;
2700                 best_s.partitioned_frame= s->partitioned_frame;
2701                 if(s->data_partitioning){
2702                     backup_s.pb2= s->pb2;
2703                     backup_s.tex_pb= s->tex_pb;
2704                 }
2705
2706                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2707                     s->mv_dir = MV_DIR_FORWARD;
2708                     s->mv_type = MV_TYPE_16X16;
2709                     s->mb_intra= 0;
2710                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2711                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2712                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2713                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2714                 }
2715                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2716                     s->mv_dir = MV_DIR_FORWARD;
2717                     s->mv_type = MV_TYPE_FIELD;
2718                     s->mb_intra= 0;
2719                     for(i=0; i<2; i++){
2720                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2721                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2722                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2723                     }
2724                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2725                                  &dmin, &next_block, 0, 0);
2726                 }
2727                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2728                     s->mv_dir = MV_DIR_FORWARD;
2729                     s->mv_type = MV_TYPE_16X16;
2730                     s->mb_intra= 0;
2731                     s->mv[0][0][0] = 0;
2732                     s->mv[0][0][1] = 0;
2733                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2734                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2735                 }
2736                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2737                     s->mv_dir = MV_DIR_FORWARD;
2738                     s->mv_type = MV_TYPE_8X8;
2739                     s->mb_intra= 0;
2740                     for(i=0; i<4; i++){
2741                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2742                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2743                     }
2744                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2745                                  &dmin, &next_block, 0, 0);
2746                 }
2747                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2748                     s->mv_dir = MV_DIR_FORWARD;
2749                     s->mv_type = MV_TYPE_16X16;
2750                     s->mb_intra= 0;
2751                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2752                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2753                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2754                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2755                 }
2756                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2757                     s->mv_dir = MV_DIR_BACKWARD;
2758                     s->mv_type = MV_TYPE_16X16;
2759                     s->mb_intra= 0;
2760                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2761                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2762                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2763                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2764                 }
2765                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2766                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2767                     s->mv_type = MV_TYPE_16X16;
2768                     s->mb_intra= 0;
2769                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2770                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2771                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2772                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2773                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2774                                  &dmin, &next_block, 0, 0);
2775                 }
2776                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2777                     s->mv_dir = MV_DIR_FORWARD;
2778                     s->mv_type = MV_TYPE_FIELD;
2779                     s->mb_intra= 0;
2780                     for(i=0; i<2; i++){
2781                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2782                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2783                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2784                     }
2785                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2786                                  &dmin, &next_block, 0, 0);
2787                 }
2788                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2789                     s->mv_dir = MV_DIR_BACKWARD;
2790                     s->mv_type = MV_TYPE_FIELD;
2791                     s->mb_intra= 0;
2792                     for(i=0; i<2; i++){
2793                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2794                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2795                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2796                     }
2797                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2798                                  &dmin, &next_block, 0, 0);
2799                 }
2800                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2801                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2802                     s->mv_type = MV_TYPE_FIELD;
2803                     s->mb_intra= 0;
2804                     for(dir=0; dir<2; dir++){
2805                         for(i=0; i<2; i++){
2806                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2807                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2808                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2809                         }
2810                     }
2811                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2812                                  &dmin, &next_block, 0, 0);
2813                 }
2814                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2815                     s->mv_dir = 0;
2816                     s->mv_type = MV_TYPE_16X16;
2817                     s->mb_intra= 1;
2818                     s->mv[0][0][0] = 0;
2819                     s->mv[0][0][1] = 0;
2820                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2821                                  &dmin, &next_block, 0, 0);
2822                     if(s->h263_pred || s->h263_aic){
2823                         if(best_s.mb_intra)
2824                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2825                         else
2826                             ff_clean_intra_table_entries(s); //old mode?
2827                     }
2828                 }
2829
2830                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2831                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2832                         const int last_qp= backup_s.qscale;
2833                         int qpi, qp, dc[6];
2834                         int16_t ac[6][16];
2835                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2836                         static const int dquant_tab[4]={-1,1,-2,2};
2837
2838                         assert(backup_s.dquant == 0);
2839
2840                         //FIXME intra
2841                         s->mv_dir= best_s.mv_dir;
2842                         s->mv_type = MV_TYPE_16X16;
2843                         s->mb_intra= best_s.mb_intra;
2844                         s->mv[0][0][0] = best_s.mv[0][0][0];
2845                         s->mv[0][0][1] = best_s.mv[0][0][1];
2846                         s->mv[1][0][0] = best_s.mv[1][0][0];
2847                         s->mv[1][0][1] = best_s.mv[1][0][1];
2848
2849                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2850                         for(; qpi<4; qpi++){
2851                             int dquant= dquant_tab[qpi];
2852                             qp= last_qp + dquant;
2853                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2854                                 continue;
2855                             backup_s.dquant= dquant;
2856                             if(s->mb_intra && s->dc_val[0]){
2857                                 for(i=0; i<6; i++){
2858                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2859                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2860                                 }
2861                             }
2862
2863                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2864                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2865                             if(best_s.qscale != qp){
2866                                 if(s->mb_intra && s->dc_val[0]){
2867                                     for(i=0; i<6; i++){
2868                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2869                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2870                                     }
2871                                 }
2872                             }
2873                         }
2874                     }
2875                 }
2876                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2877                     int mx= s->b_direct_mv_table[xy][0];
2878                     int my= s->b_direct_mv_table[xy][1];
2879
2880                     backup_s.dquant = 0;
2881                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2882                     s->mb_intra= 0;
2883                     ff_mpeg4_set_direct_mv(s, mx, my);
2884                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2885                                  &dmin, &next_block, mx, my);
2886                 }
2887                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2888                     backup_s.dquant = 0;
2889                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2890                     s->mb_intra= 0;
2891                     ff_mpeg4_set_direct_mv(s, 0, 0);
2892                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2893                                  &dmin, &next_block, 0, 0);
2894                 }
2895                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2896                     int coded=0;
2897                     for(i=0; i<6; i++)
2898                         coded |= s->block_last_index[i];
2899                     if(coded){
2900                         int mx,my;
2901                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2902                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2903                             mx=my=0; //FIXME find the one we actually used
2904                             ff_mpeg4_set_direct_mv(s, mx, my);
2905                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2906                             mx= s->mv[1][0][0];
2907                             my= s->mv[1][0][1];
2908                         }else{
2909                             mx= s->mv[0][0][0];
2910                             my= s->mv[0][0][1];
2911                         }
2912
2913                         s->mv_dir= best_s.mv_dir;
2914                         s->mv_type = best_s.mv_type;
2915                         s->mb_intra= 0;
2916 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2917                         s->mv[0][0][1] = best_s.mv[0][0][1];
2918                         s->mv[1][0][0] = best_s.mv[1][0][0];
2919                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2920                         backup_s.dquant= 0;
2921                         s->skipdct=1;
2922                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2923                                         &dmin, &next_block, mx, my);
2924                         s->skipdct=0;
2925                     }
2926                 }
2927
2928                 s->current_picture.qscale_table[xy] = best_s.qscale;
2929
2930                 copy_context_after_encode(s, &best_s, -1);
2931
2932                 pb_bits_count= put_bits_count(&s->pb);
2933                 flush_put_bits(&s->pb);
2934                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2935                 s->pb= backup_s.pb;
2936
2937                 if(s->data_partitioning){
2938                     pb2_bits_count= put_bits_count(&s->pb2);
2939                     flush_put_bits(&s->pb2);
2940                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2941                     s->pb2= backup_s.pb2;
2942
2943                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2944                     flush_put_bits(&s->tex_pb);
2945                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2946                     s->tex_pb= backup_s.tex_pb;
2947                 }
2948                 s->last_bits= put_bits_count(&s->pb);
2949
2950                 if (CONFIG_H263_ENCODER &&
2951                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2952                     ff_h263_update_motion_val(s);
2953
2954                 if(next_block==0){ //FIXME 16 vs linesize16
2955                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2956                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2957                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2958                 }
2959
2960                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2961                     ff_MPV_decode_mb(s, s->block);
2962             } else {
2963                 int motion_x = 0, motion_y = 0;
2964                 s->mv_type=MV_TYPE_16X16;
2965                 // only one MB-Type possible
2966
2967                 switch(mb_type){
2968                 case CANDIDATE_MB_TYPE_INTRA:
2969                     s->mv_dir = 0;
2970                     s->mb_intra= 1;
2971                     motion_x= s->mv[0][0][0] = 0;
2972                     motion_y= s->mv[0][0][1] = 0;
2973                     break;
2974                 case CANDIDATE_MB_TYPE_INTER:
2975                     s->mv_dir = MV_DIR_FORWARD;
2976                     s->mb_intra= 0;
2977                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2978                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2979                     break;
2980                 case CANDIDATE_MB_TYPE_INTER_I:
2981                     s->mv_dir = MV_DIR_FORWARD;
2982                     s->mv_type = MV_TYPE_FIELD;
2983                     s->mb_intra= 0;
2984                     for(i=0; i<2; i++){
2985                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2986                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2987                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2988                     }
2989                     break;
2990                 case CANDIDATE_MB_TYPE_INTER4V:
2991                     s->mv_dir = MV_DIR_FORWARD;
2992                     s->mv_type = MV_TYPE_8X8;
2993                     s->mb_intra= 0;
2994                     for(i=0; i<4; i++){
2995                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2996                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2997                     }
2998                     break;
2999                 case CANDIDATE_MB_TYPE_DIRECT:
3000                     if (CONFIG_MPEG4_ENCODER) {
3001                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3002                         s->mb_intra= 0;
3003                         motion_x=s->b_direct_mv_table[xy][0];
3004                         motion_y=s->b_direct_mv_table[xy][1];
3005                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3006                     }
3007                     break;
3008                 case CANDIDATE_MB_TYPE_DIRECT0:
3009                     if (CONFIG_MPEG4_ENCODER) {
3010                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3011                         s->mb_intra= 0;
3012                         ff_mpeg4_set_direct_mv(s, 0, 0);
3013                     }
3014                     break;
3015                 case CANDIDATE_MB_TYPE_BIDIR:
3016                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3017                     s->mb_intra= 0;
3018                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3019                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3020                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3021                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3022                     break;
3023                 case CANDIDATE_MB_TYPE_BACKWARD:
3024                     s->mv_dir = MV_DIR_BACKWARD;
3025                     s->mb_intra= 0;
3026                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3027                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3028                     break;
3029                 case CANDIDATE_MB_TYPE_FORWARD:
3030                     s->mv_dir = MV_DIR_FORWARD;
3031                     s->mb_intra= 0;
3032                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3033                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3034                     break;
3035                 case CANDIDATE_MB_TYPE_FORWARD_I:
3036                     s->mv_dir = MV_DIR_FORWARD;
3037                     s->mv_type = MV_TYPE_FIELD;
3038                     s->mb_intra= 0;
3039                     for(i=0; i<2; i++){
3040                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3041                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3042                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3043                     }
3044                     break;
3045                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3046                     s->mv_dir = MV_DIR_BACKWARD;
3047                     s->mv_type = MV_TYPE_FIELD;
3048                     s->mb_intra= 0;
3049                     for(i=0; i<2; i++){
3050                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3051                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3052                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3053                     }
3054                     break;
3055                 case CANDIDATE_MB_TYPE_BIDIR_I:
3056                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3057                     s->mv_type = MV_TYPE_FIELD;
3058                     s->mb_intra= 0;
3059                     for(dir=0; dir<2; dir++){
3060                         for(i=0; i<2; i++){
3061                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3062                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3063                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3064                         }
3065                     }
3066                     break;
3067                 default:
3068                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3069                 }
3070
3071                 encode_mb(s, motion_x, motion_y);
3072
3073                 // RAL: Update last macroblock type
3074                 s->last_mv_dir = s->mv_dir;
3075
3076                 if (CONFIG_H263_ENCODER &&
3077                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3078                     ff_h263_update_motion_val(s);
3079
3080                 ff_MPV_decode_mb(s, s->block);
3081             }
3082
3083             /* clean the MV table in IPS frames for direct mode in B frames */
3084             if(s->mb_intra /* && I,P,S_TYPE */){
3085                 s->p_mv_table[xy][0]=0;
3086                 s->p_mv_table[xy][1]=0;
3087             }
3088
3089             if(s->flags&CODEC_FLAG_PSNR){
3090                 int w= 16;
3091                 int h= 16;
3092
3093                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3094                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3095
3096                 s->current_picture.f->error[0] += sse(
3097                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3098                     s->dest[0], w, h, s->linesize);
3099                 s->current_picture.f->error[1] += sse(
3100                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3101                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3102                 s->current_picture.f->error[2] += sse(
3103                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3104                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3105             }
3106             if(s->loop_filter){
3107                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3108                     ff_h263_loop_filter(s);
3109             }
3110             av_dlog(s->avctx, "MB %d %d bits\n",
3111                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3112         }
3113     }
3114
3115     //not beautiful here but we must write it before flushing so it has to be here
3116     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3117         ff_msmpeg4_encode_ext_header(s);
3118
3119     write_slice_end(s);
3120
3121     /* Send the last GOB if RTP */
3122     if (s->avctx->rtp_callback) {
3123         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3124         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3125         /* Call the RTP callback to send the last GOB */
3126         emms_c();
3127         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3128     }
3129
3130     return 0;
3131 }
3132
3133 #define MERGE(field) dst->field += src->field; src->field=0
3134 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3135     MERGE(me.scene_change_score);
3136     MERGE(me.mc_mb_var_sum_temp);
3137     MERGE(me.mb_var_sum_temp);
3138 }
3139
3140 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3141     int i;
3142
3143     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3144     MERGE(dct_count[1]);
3145     MERGE(mv_bits);
3146     MERGE(i_tex_bits);
3147     MERGE(p_tex_bits);
3148     MERGE(i_count);
3149     MERGE(f_count);
3150     MERGE(b_count);
3151     MERGE(skip_count);
3152     MERGE(misc_bits);
3153     MERGE(er.error_count);
3154     MERGE(padding_bug_score);
3155     MERGE(current_picture.f->error[0]);
3156     MERGE(current_picture.f->error[1]);
3157     MERGE(current_picture.f->error[2]);
3158
3159     if(dst->avctx->noise_reduction){
3160         for(i=0; i<64; i++){
3161             MERGE(dct_error_sum[0][i]);
3162             MERGE(dct_error_sum[1][i]);
3163         }
3164     }
3165
3166     assert(put_bits_count(&src->pb) % 8 ==0);
3167     assert(put_bits_count(&dst->pb) % 8 ==0);
3168     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3169     flush_put_bits(&dst->pb);
3170 }
3171
3172 static int estimate_qp(MpegEncContext *s, int dry_run){
3173     if (s->next_lambda){
3174         s->current_picture_ptr->f->quality =
3175         s->current_picture.f->quality = s->next_lambda;
3176         if(!dry_run) s->next_lambda= 0;
3177     } else if (!s->fixed_qscale) {
3178         s->current_picture_ptr->f->quality =
3179         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3180         if (s->current_picture.f->quality < 0)
3181             return -1;
3182     }
3183
3184     if(s->adaptive_quant){
3185         switch(s->codec_id){
3186         case AV_CODEC_ID_MPEG4:
3187             if (CONFIG_MPEG4_ENCODER)
3188                 ff_clean_mpeg4_qscales(s);
3189             break;
3190         case AV_CODEC_ID_H263:
3191         case AV_CODEC_ID_H263P:
3192         case AV_CODEC_ID_FLV1:
3193             if (CONFIG_H263_ENCODER)
3194                 ff_clean_h263_qscales(s);
3195             break;
3196         default:
3197             ff_init_qscale_tab(s);
3198         }
3199
3200         s->lambda= s->lambda_table[0];
3201         //FIXME broken
3202     }else
3203         s->lambda = s->current_picture.f->quality;
3204     update_qscale(s);
3205     return 0;
3206 }
3207
3208 /* must be called before writing the header */
3209 static void set_frame_distances(MpegEncContext * s){
3210     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3211     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3212
3213     if(s->pict_type==AV_PICTURE_TYPE_B){
3214         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3215         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3216     }else{
3217         s->pp_time= s->time - s->last_non_b_time;
3218         s->last_non_b_time= s->time;
3219         assert(s->picture_number==0 || s->pp_time > 0);
3220     }
3221 }
3222
3223 static int encode_picture(MpegEncContext *s, int picture_number)
3224 {
3225     int i, ret;
3226     int bits;
3227     int context_count = s->slice_context_count;
3228
3229     s->picture_number = picture_number;
3230
3231     /* Reset the average MB variance */
3232     s->me.mb_var_sum_temp    =
3233     s->me.mc_mb_var_sum_temp = 0;
3234
3235     /* we need to initialize some time vars before we can encode b-frames */
3236     // RAL: Condition added for MPEG1VIDEO
3237     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3238         set_frame_distances(s);
3239     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3240         ff_set_mpeg4_time(s);
3241
3242     s->me.scene_change_score=0;
3243
3244 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3245
3246     if(s->pict_type==AV_PICTURE_TYPE_I){
3247         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3248         else                        s->no_rounding=0;
3249     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3250         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3251             s->no_rounding ^= 1;
3252     }
3253
3254     if(s->flags & CODEC_FLAG_PASS2){
3255         if (estimate_qp(s,1) < 0)
3256             return -1;
3257         ff_get_2pass_fcode(s);
3258     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3259         if(s->pict_type==AV_PICTURE_TYPE_B)
3260             s->lambda= s->last_lambda_for[s->pict_type];
3261         else
3262             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3263         update_qscale(s);
3264     }
3265
3266     s->mb_intra=0; //for the rate distortion & bit compare functions
3267     for(i=1; i<context_count; i++){
3268         ret = ff_update_duplicate_context(s->thread_context[i], s);
3269         if (ret < 0)
3270             return ret;
3271     }
3272
3273     if(ff_init_me(s)<0)
3274         return -1;
3275
3276     /* Estimate motion for every MB */
3277     if(s->pict_type != AV_PICTURE_TYPE_I){
3278         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3279         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3280         if (s->pict_type != AV_PICTURE_TYPE_B) {
3281             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3282                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3283             }
3284         }
3285
3286         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3287     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3288         /* I-Frame */
3289         for(i=0; i<s->mb_stride*s->mb_height; i++)
3290             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3291
3292         if(!s->fixed_qscale){
3293             /* finding spatial complexity for I-frame rate control */
3294             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3295         }
3296     }
3297     for(i=1; i<context_count; i++){
3298         merge_context_after_me(s, s->thread_context[i]);
3299     }
3300     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3301     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3302     emms_c();
3303
3304     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3305         s->pict_type= AV_PICTURE_TYPE_I;
3306         for(i=0; i<s->mb_stride*s->mb_height; i++)
3307             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3308         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3309                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3310     }
3311
3312     if(!s->umvplus){
3313         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3314             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3315
3316             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3317                 int a,b;
3318                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3319                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3320                 s->f_code= FFMAX3(s->f_code, a, b);
3321             }
3322
3323             ff_fix_long_p_mvs(s);
3324             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3325             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3326                 int j;
3327                 for(i=0; i<2; i++){
3328                     for(j=0; j<2; j++)
3329                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3330                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3331                 }
3332             }
3333         }
3334
3335         if(s->pict_type==AV_PICTURE_TYPE_B){
3336             int a, b;
3337
3338             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3339             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3340             s->f_code = FFMAX(a, b);
3341
3342             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3343             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3344             s->b_code = FFMAX(a, b);
3345
3346             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3347             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3348             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3349             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3350             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3351                 int dir, j;
3352                 for(dir=0; dir<2; dir++){
3353                     for(i=0; i<2; i++){
3354                         for(j=0; j<2; j++){
3355                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3356                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3357                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3358                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3359                         }
3360                     }
3361                 }
3362             }
3363         }
3364     }
3365
3366     if (estimate_qp(s, 0) < 0)
3367         return -1;
3368
3369     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3370         s->qscale= 3; //reduce clipping problems
3371
3372     if (s->out_format == FMT_MJPEG) {
3373         /* for mjpeg, we do include qscale in the matrix */
3374         for(i=1;i<64;i++){
3375             int j = s->idsp.idct_permutation[i];
3376
3377             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3378         }
3379         s->y_dc_scale_table=
3380         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3381         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3382         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3383                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3384         s->qscale= 8;
3385     }
3386
3387     //FIXME var duplication
3388     s->current_picture_ptr->f->key_frame =
3389     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3390     s->current_picture_ptr->f->pict_type =
3391     s->current_picture.f->pict_type = s->pict_type;
3392
3393     if (s->current_picture.f->key_frame)
3394         s->picture_in_gop_number=0;
3395
3396     s->last_bits= put_bits_count(&s->pb);
3397     switch(s->out_format) {
3398     case FMT_MJPEG:
3399         if (CONFIG_MJPEG_ENCODER)
3400             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3401                                            s->intra_matrix);
3402         break;
3403     case FMT_H261:
3404         if (CONFIG_H261_ENCODER)
3405             ff_h261_encode_picture_header(s, picture_number);
3406         break;
3407     case FMT_H263:
3408         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3409             ff_wmv2_encode_picture_header(s, picture_number);
3410         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3411             ff_msmpeg4_encode_picture_header(s, picture_number);
3412         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3413             ff_mpeg4_encode_picture_header(s, picture_number);
3414         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3415             ff_rv10_encode_picture_header(s, picture_number);
3416         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3417             ff_rv20_encode_picture_header(s, picture_number);
3418         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3419             ff_flv_encode_picture_header(s, picture_number);
3420         else if (CONFIG_H263_ENCODER)
3421             ff_h263_encode_picture_header(s, picture_number);
3422         break;
3423     case FMT_MPEG1:
3424         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3425             ff_mpeg1_encode_picture_header(s, picture_number);
3426         break;
3427     default:
3428         assert(0);
3429     }
3430     bits= put_bits_count(&s->pb);
3431     s->header_bits= bits - s->last_bits;
3432
3433     for(i=1; i<context_count; i++){
3434         update_duplicate_context_after_me(s->thread_context[i], s);
3435     }
3436     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3437     for(i=1; i<context_count; i++){
3438         merge_context_after_encode(s, s->thread_context[i]);
3439     }
3440     emms_c();
3441     return 0;
3442 }
3443
3444 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3445     const int intra= s->mb_intra;
3446     int i;
3447
3448     s->dct_count[intra]++;
3449
3450     for(i=0; i<64; i++){
3451         int level= block[i];
3452
3453         if(level){
3454             if(level>0){
3455                 s->dct_error_sum[intra][i] += level;
3456                 level -= s->dct_offset[intra][i];
3457                 if(level<0) level=0;
3458             }else{
3459                 s->dct_error_sum[intra][i] -= level;
3460                 level += s->dct_offset[intra][i];
3461                 if(level>0) level=0;
3462             }
3463             block[i]= level;
3464         }
3465     }
3466 }
3467
3468 static int dct_quantize_trellis_c(MpegEncContext *s,
3469                                   int16_t *block, int n,
3470                                   int qscale, int *overflow){
3471     const int *qmat;
3472     const uint8_t *scantable= s->intra_scantable.scantable;
3473     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3474     int max=0;
3475     unsigned int threshold1, threshold2;
3476     int bias=0;
3477     int run_tab[65];
3478     int level_tab[65];
3479     int score_tab[65];
3480     int survivor[65];
3481     int survivor_count;
3482     int last_run=0;
3483     int last_level=0;
3484     int last_score= 0;
3485     int last_i;
3486     int coeff[2][64];
3487     int coeff_count[64];
3488     int qmul, qadd, start_i, last_non_zero, i, dc;
3489     const int esc_length= s->ac_esc_length;
3490     uint8_t * length;
3491     uint8_t * last_length;
3492     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3493
3494     s->fdsp.fdct(block);
3495
3496     if(s->dct_error_sum)
3497         s->denoise_dct(s, block);
3498     qmul= qscale*16;
3499     qadd= ((qscale-1)|1)*8;
3500
3501     if (s->mb_intra) {
3502         int q;
3503         if (!s->h263_aic) {
3504             if (n < 4)
3505                 q = s->y_dc_scale;
3506             else
3507                 q = s->c_dc_scale;
3508             q = q << 3;
3509         } else{
3510             /* For AIC we skip quant/dequant of INTRADC */
3511             q = 1 << 3;
3512             qadd=0;
3513         }
3514
3515         /* note: block[0] is assumed to be positive */
3516         block[0] = (block[0] + (q >> 1)) / q;
3517         start_i = 1;
3518         last_non_zero = 0;
3519         qmat = s->q_intra_matrix[qscale];
3520         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3521             bias= 1<<(QMAT_SHIFT-1);
3522         length     = s->intra_ac_vlc_length;
3523         last_length= s->intra_ac_vlc_last_length;
3524     } else {
3525         start_i = 0;
3526         last_non_zero = -1;
3527         qmat = s->q_inter_matrix[qscale];
3528         length     = s->inter_ac_vlc_length;
3529         last_length= s->inter_ac_vlc_last_length;
3530     }
3531     last_i= start_i;
3532
3533     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3534     threshold2= (threshold1<<1);
3535
3536     for(i=63; i>=start_i; i--) {
3537         const int j = scantable[i];
3538         int level = block[j] * qmat[j];
3539
3540         if(((unsigned)(level+threshold1))>threshold2){
3541             last_non_zero = i;
3542             break;
3543         }
3544     }
3545
3546     for(i=start_i; i<=last_non_zero; i++) {
3547         const int j = scantable[i];
3548         int level = block[j] * qmat[j];
3549
3550 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3551 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3552         if(((unsigned)(level+threshold1))>threshold2){
3553             if(level>0){
3554                 level= (bias + level)>>QMAT_SHIFT;
3555                 coeff[0][i]= level;
3556                 coeff[1][i]= level-1;
3557 //                coeff[2][k]= level-2;
3558             }else{
3559                 level= (bias - level)>>QMAT_SHIFT;
3560                 coeff[0][i]= -level;
3561                 coeff[1][i]= -level+1;
3562 //                coeff[2][k]= -level+2;
3563             }
3564             coeff_count[i]= FFMIN(level, 2);
3565             assert(coeff_count[i]);
3566             max |=level;
3567         }else{
3568             coeff[0][i]= (level>>31)|1;
3569             coeff_count[i]= 1;
3570         }
3571     }
3572
3573     *overflow= s->max_qcoeff < max; //overflow might have happened
3574
3575     if(last_non_zero < start_i){
3576         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3577         return last_non_zero;
3578     }
3579
3580     score_tab[start_i]= 0;
3581     survivor[0]= start_i;
3582     survivor_count= 1;
3583
3584     for(i=start_i; i<=last_non_zero; i++){
3585         int level_index, j, zero_distortion;
3586         int dct_coeff= FFABS(block[ scantable[i] ]);
3587         int best_score=256*256*256*120;
3588
3589         if (s->fdsp.fdct == ff_fdct_ifast)
3590             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3591         zero_distortion= dct_coeff*dct_coeff;
3592
3593         for(level_index=0; level_index < coeff_count[i]; level_index++){
3594             int distortion;
3595             int level= coeff[level_index][i];
3596             const int alevel= FFABS(level);
3597             int unquant_coeff;
3598
3599             assert(level);
3600
3601             if(s->out_format == FMT_H263){
3602                 unquant_coeff= alevel*qmul + qadd;
3603             }else{ //MPEG1
3604                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3605                 if(s->mb_intra){
3606                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3607                         unquant_coeff =   (unquant_coeff - 1) | 1;
3608                 }else{
3609                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3610                         unquant_coeff =   (unquant_coeff - 1) | 1;
3611                 }
3612                 unquant_coeff<<= 3;
3613             }
3614
3615             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3616             level+=64;
3617             if((level&(~127)) == 0){
3618                 for(j=survivor_count-1; j>=0; j--){
3619                     int run= i - survivor[j];
3620                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3621                     score += score_tab[i-run];
3622
3623                     if(score < best_score){
3624                         best_score= score;
3625                         run_tab[i+1]= run;
3626                         level_tab[i+1]= level-64;
3627                     }
3628                 }
3629
3630                 if(s->out_format == FMT_H263){
3631                     for(j=survivor_count-1; j>=0; j--){
3632                         int run= i - survivor[j];
3633                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3634                         score += score_tab[i-run];
3635                         if(score < last_score){
3636                             last_score= score;
3637                             last_run= run;
3638                             last_level= level-64;
3639                             last_i= i+1;
3640                         }
3641                     }
3642                 }
3643             }else{
3644                 distortion += esc_length*lambda;
3645                 for(j=survivor_count-1; j>=0; j--){
3646                     int run= i - survivor[j];
3647                     int score= distortion + score_tab[i-run];
3648
3649                     if(score < best_score){
3650                         best_score= score;
3651                         run_tab[i+1]= run;
3652                         level_tab[i+1]= level-64;
3653                     }
3654                 }
3655
3656                 if(s->out_format == FMT_H263){
3657                   for(j=survivor_count-1; j>=0; j--){
3658                         int run= i - survivor[j];
3659                         int score= distortion + score_tab[i-run];
3660                         if(score < last_score){
3661                             last_score= score;
3662                             last_run= run;
3663                             last_level= level-64;
3664                             last_i= i+1;
3665                         }
3666                     }
3667                 }
3668             }
3669         }
3670
3671         score_tab[i+1]= best_score;
3672
3673         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3674         if(last_non_zero <= 27){
3675             for(; survivor_count; survivor_count--){
3676                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3677                     break;
3678             }
3679         }else{
3680             for(; survivor_count; survivor_count--){
3681                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3682                     break;
3683             }
3684         }
3685
3686         survivor[ survivor_count++ ]= i+1;
3687     }
3688
3689     if(s->out_format != FMT_H263){
3690         last_score= 256*256*256*120;
3691         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3692             int score= score_tab[i];
3693             if(i) score += lambda*2; //FIXME exacter?
3694
3695             if(score < last_score){
3696                 last_score= score;
3697                 last_i= i;
3698                 last_level= level_tab[i];
3699                 last_run= run_tab[i];
3700             }
3701         }
3702     }
3703
3704     s->coded_score[n] = last_score;
3705
3706     dc= FFABS(block[0]);
3707     last_non_zero= last_i - 1;
3708     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3709
3710     if(last_non_zero < start_i)
3711         return last_non_zero;
3712
3713     if(last_non_zero == 0 && start_i == 0){
3714         int best_level= 0;
3715         int best_score= dc * dc;
3716
3717         for(i=0; i<coeff_count[0]; i++){
3718             int level= coeff[i][0];
3719             int alevel= FFABS(level);
3720             int unquant_coeff, score, distortion;
3721
3722             if(s->out_format == FMT_H263){
3723                     unquant_coeff= (alevel*qmul + qadd)>>3;
3724             }else{ //MPEG1
3725                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3726                     unquant_coeff =   (unquant_coeff - 1) | 1;
3727             }
3728             unquant_coeff = (unquant_coeff + 4) >> 3;
3729             unquant_coeff<<= 3 + 3;
3730
3731             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3732             level+=64;
3733             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3734             else                    score= distortion + esc_length*lambda;
3735
3736             if(score < best_score){
3737                 best_score= score;
3738                 best_level= level - 64;
3739             }
3740         }
3741         block[0]= best_level;
3742         s->coded_score[n] = best_score - dc*dc;
3743         if(best_level == 0) return -1;
3744         else                return last_non_zero;
3745     }
3746
3747     i= last_i;
3748     assert(last_level);
3749
3750     block[ perm_scantable[last_non_zero] ]= last_level;
3751     i -= last_run + 1;
3752
3753     for(; i>start_i; i -= run_tab[i] + 1){
3754         block[ perm_scantable[i-1] ]= level_tab[i];
3755     }
3756
3757     return last_non_zero;
3758 }
3759
3760 //#define REFINE_STATS 1
3761 static int16_t basis[64][64];
3762
3763 static void build_basis(uint8_t *perm){
3764     int i, j, x, y;
3765     emms_c();
3766     for(i=0; i<8; i++){
3767         for(j=0; j<8; j++){
3768             for(y=0; y<8; y++){
3769                 for(x=0; x<8; x++){
3770                     double s= 0.25*(1<<BASIS_SHIFT);
3771                     int index= 8*i + j;
3772                     int perm_index= perm[index];
3773                     if(i==0) s*= sqrt(0.5);
3774                     if(j==0) s*= sqrt(0.5);
3775                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3776                 }
3777             }
3778         }
3779     }
3780 }
3781
3782 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3783                         int16_t *block, int16_t *weight, int16_t *orig,
3784                         int n, int qscale){
3785     int16_t rem[64];
3786     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3787     const uint8_t *scantable= s->intra_scantable.scantable;
3788     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3789 //    unsigned int threshold1, threshold2;
3790 //    int bias=0;
3791     int run_tab[65];
3792     int prev_run=0;
3793     int prev_level=0;
3794     int qmul, qadd, start_i, last_non_zero, i, dc;
3795     uint8_t * length;
3796     uint8_t * last_length;
3797     int lambda;
3798     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3799 #ifdef REFINE_STATS
3800 static int count=0;
3801 static int after_last=0;
3802 static int to_zero=0;
3803 static int from_zero=0;
3804 static int raise=0;
3805 static int lower=0;
3806 static int messed_sign=0;
3807 #endif
3808
3809     if(basis[0][0] == 0)
3810         build_basis(s->idsp.idct_permutation);
3811
3812     qmul= qscale*2;
3813     qadd= (qscale-1)|1;
3814     if (s->mb_intra) {
3815         if (!s->h263_aic) {
3816             if (n < 4)
3817                 q = s->y_dc_scale;
3818             else
3819                 q = s->c_dc_scale;
3820         } else{
3821             /* For AIC we skip quant/dequant of INTRADC */
3822             q = 1;
3823             qadd=0;
3824         }
3825         q <<= RECON_SHIFT-3;
3826         /* note: block[0] is assumed to be positive */
3827         dc= block[0]*q;
3828 //        block[0] = (block[0] + (q >> 1)) / q;
3829         start_i = 1;
3830 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3831 //            bias= 1<<(QMAT_SHIFT-1);
3832         length     = s->intra_ac_vlc_length;
3833         last_length= s->intra_ac_vlc_last_length;
3834     } else {
3835         dc= 0;
3836         start_i = 0;
3837         length     = s->inter_ac_vlc_length;
3838         last_length= s->inter_ac_vlc_last_length;
3839     }
3840     last_non_zero = s->block_last_index[n];
3841
3842 #ifdef REFINE_STATS
3843 {START_TIMER
3844 #endif
3845     dc += (1<<(RECON_SHIFT-1));
3846     for(i=0; i<64; i++){
3847         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3848     }
3849 #ifdef REFINE_STATS
3850 STOP_TIMER("memset rem[]")}
3851 #endif
3852     sum=0;
3853     for(i=0; i<64; i++){
3854         int one= 36;
3855         int qns=4;
3856         int w;
3857
3858         w= FFABS(weight[i]) + qns*one;
3859         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3860
3861         weight[i] = w;
3862 //        w=weight[i] = (63*qns + (w/2)) / w;
3863
3864         assert(w>0);
3865         assert(w<(1<<6));
3866         sum += w*w;
3867     }
3868     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3869 #ifdef REFINE_STATS
3870 {START_TIMER
3871 #endif
3872     run=0;
3873     rle_index=0;
3874     for(i=start_i; i<=last_non_zero; i++){
3875         int j= perm_scantable[i];
3876         const int level= block[j];
3877         int coeff;
3878
3879         if(level){
3880             if(level<0) coeff= qmul*level - qadd;
3881             else        coeff= qmul*level + qadd;
3882             run_tab[rle_index++]=run;
3883             run=0;
3884
3885             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
3886         }else{
3887             run++;
3888         }
3889     }
3890 #ifdef REFINE_STATS
3891 if(last_non_zero>0){
3892 STOP_TIMER("init rem[]")
3893 }
3894 }
3895
3896 {START_TIMER
3897 #endif
3898     for(;;){
3899         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
3900         int best_coeff=0;
3901         int best_change=0;
3902         int run2, best_unquant_change=0, analyze_gradient;
3903 #ifdef REFINE_STATS
3904 {START_TIMER
3905 #endif
3906         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3907
3908         if(analyze_gradient){
3909 #ifdef REFINE_STATS
3910 {START_TIMER
3911 #endif
3912             for(i=0; i<64; i++){
3913                 int w= weight[i];
3914
3915                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3916             }
3917 #ifdef REFINE_STATS
3918 STOP_TIMER("rem*w*w")}
3919 {START_TIMER
3920 #endif
3921             s->fdsp.fdct(d1);
3922 #ifdef REFINE_STATS
3923 STOP_TIMER("dct")}
3924 #endif
3925         }
3926
3927         if(start_i){
3928             const int level= block[0];
3929             int change, old_coeff;
3930
3931             assert(s->mb_intra);
3932
3933             old_coeff= q*level;
3934
3935             for(change=-1; change<=1; change+=2){
3936                 int new_level= level + change;
3937                 int score, new_coeff;
3938
3939                 new_coeff= q*new_level;
3940                 if(new_coeff >= 2048 || new_coeff < 0)
3941                     continue;
3942
3943                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
3944                                                   new_coeff - old_coeff);
3945                 if(score<best_score){
3946                     best_score= score;
3947                     best_coeff= 0;
3948                     best_change= change;
3949                     best_unquant_change= new_coeff - old_coeff;
3950                 }
3951             }
3952         }
3953
3954         run=0;
3955         rle_index=0;
3956         run2= run_tab[rle_index++];
3957         prev_level=0;
3958         prev_run=0;
3959
3960         for(i=start_i; i<64; i++){
3961             int j= perm_scantable[i];
3962             const int level= block[j];
3963             int change, old_coeff;
3964
3965             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3966                 break;
3967
3968             if(level){
3969                 if(level<0) old_coeff= qmul*level - qadd;
3970                 else        old_coeff= qmul*level + qadd;
3971                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3972             }else{
3973                 old_coeff=0;
3974                 run2--;
3975                 assert(run2>=0 || i >= last_non_zero );
3976             }
3977
3978             for(change=-1; change<=1; change+=2){
3979                 int new_level= level + change;
3980                 int score, new_coeff, unquant_change;
3981
3982                 score=0;
3983                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3984                    continue;
3985
3986                 if(new_level){
3987                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3988                     else            new_coeff= qmul*new_level + qadd;
3989                     if(new_coeff >= 2048 || new_coeff <= -2048)
3990                         continue;
3991                     //FIXME check for overflow
3992
3993                     if(level){
3994                         if(level < 63 && level > -63){
3995                             if(i < last_non_zero)
3996                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3997                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3998                             else
3999                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4000                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4001                         }
4002                     }else{
4003                         assert(FFABS(new_level)==1);
4004
4005                         if(analyze_gradient){
4006                             int g= d1[ scantable[i] ];
4007                             if(g && (g^new_level) >= 0)
4008                                 continue;
4009                         }
4010
4011                         if(i < last_non_zero){
4012                             int next_i= i + run2 + 1;
4013                             int next_level= block[ perm_scantable[next_i] ] + 64;
4014
4015                             if(next_level&(~127))
4016                                 next_level= 0;
4017
4018                             if(next_i < last_non_zero)
4019                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4020                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4021                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4022                             else
4023                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4024                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4025                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4026                         }else{
4027                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4028                             if(prev_level){
4029                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4030                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4031                             }
4032                         }
4033                     }
4034                 }else{
4035                     new_coeff=0;
4036                     assert(FFABS(level)==1);
4037
4038                     if(i < last_non_zero){
4039                         int next_i= i + run2 + 1;
4040                         int next_level= block[ perm_scantable[next_i] ] + 64;
4041
4042                         if(next_level&(~127))
4043                             next_level= 0;
4044
4045                         if(next_i < last_non_zero)
4046                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4047                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4048                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4049                         else
4050                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4051                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4052                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4053                     }else{
4054                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4055                         if(prev_level){
4056                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4057                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4058                         }
4059                     }
4060                 }
4061
4062                 score *= lambda;
4063
4064                 unquant_change= new_coeff - old_coeff;
4065                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4066
4067                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4068                                                    unquant_change);
4069                 if(score<best_score){
4070                     best_score= score;
4071                     best_coeff= i;
4072                     best_change= change;
4073                     best_unquant_change= unquant_change;
4074                 }
4075             }
4076             if(level){
4077                 prev_level= level + 64;
4078                 if(prev_level&(~127))
4079                     prev_level= 0;
4080                 prev_run= run;
4081                 run=0;
4082             }else{
4083                 run++;
4084             }
4085         }
4086 #ifdef REFINE_STATS
4087 STOP_TIMER("iterative step")}
4088 #endif
4089
4090         if(best_change){
4091             int j= perm_scantable[ best_coeff ];
4092
4093             block[j] += best_change;
4094
4095             if(best_coeff > last_non_zero){
4096                 last_non_zero= best_coeff;
4097                 assert(block[j]);
4098 #ifdef REFINE_STATS
4099 after_last++;
4100 #endif
4101             }else{
4102 #ifdef REFINE_STATS
4103 if(block[j]){
4104     if(block[j] - best_change){
4105         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4106             raise++;
4107         }else{
4108             lower++;
4109         }
4110     }else{
4111         from_zero++;
4112     }
4113 }else{
4114     to_zero++;
4115 }
4116 #endif
4117                 for(; last_non_zero>=start_i; last_non_zero--){
4118                     if(block[perm_scantable[last_non_zero]])
4119                         break;
4120                 }
4121             }
4122 #ifdef REFINE_STATS
4123 count++;
4124 if(256*256*256*64 % count == 0){
4125     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4126 }
4127 #endif
4128             run=0;
4129             rle_index=0;
4130             for(i=start_i; i<=last_non_zero; i++){
4131                 int j= perm_scantable[i];
4132                 const int level= block[j];
4133
4134                  if(level){
4135                      run_tab[rle_index++]=run;
4136                      run=0;
4137                  }else{
4138                      run++;
4139                  }
4140             }
4141
4142             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4143         }else{
4144             break;
4145         }
4146     }
4147 #ifdef REFINE_STATS
4148 if(last_non_zero>0){
4149 STOP_TIMER("iterative search")
4150 }
4151 }
4152 #endif
4153
4154     return last_non_zero;
4155 }
4156
4157 int ff_dct_quantize_c(MpegEncContext *s,
4158                         int16_t *block, int n,
4159                         int qscale, int *overflow)
4160 {
4161     int i, j, level, last_non_zero, q, start_i;
4162     const int *qmat;
4163     const uint8_t *scantable= s->intra_scantable.scantable;
4164     int bias;
4165     int max=0;
4166     unsigned int threshold1, threshold2;
4167
4168     s->fdsp.fdct(block);
4169
4170     if(s->dct_error_sum)
4171         s->denoise_dct(s, block);
4172
4173     if (s->mb_intra) {
4174         if (!s->h263_aic) {
4175             if (n < 4)
4176                 q = s->y_dc_scale;
4177             else
4178                 q = s->c_dc_scale;
4179             q = q << 3;
4180         } else
4181             /* For AIC we skip quant/dequant of INTRADC */
4182             q = 1 << 3;
4183
4184         /* note: block[0] is assumed to be positive */
4185         block[0] = (block[0] + (q >> 1)) / q;
4186         start_i = 1;
4187         last_non_zero = 0;
4188         qmat = s->q_intra_matrix[qscale];
4189         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4190     } else {
4191         start_i = 0;
4192         last_non_zero = -1;
4193         qmat = s->q_inter_matrix[qscale];
4194         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4195     }
4196     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4197     threshold2= (threshold1<<1);
4198     for(i=63;i>=start_i;i--) {
4199         j = scantable[i];
4200         level = block[j] * qmat[j];
4201
4202         if(((unsigned)(level+threshold1))>threshold2){
4203             last_non_zero = i;
4204             break;
4205         }else{
4206             block[j]=0;
4207         }
4208     }
4209     for(i=start_i; i<=last_non_zero; i++) {
4210         j = scantable[i];
4211         level = block[j] * qmat[j];
4212
4213 //        if(   bias+level >= (1<<QMAT_SHIFT)
4214 //           || bias-level >= (1<<QMAT_SHIFT)){
4215         if(((unsigned)(level+threshold1))>threshold2){
4216             if(level>0){
4217                 level= (bias + level)>>QMAT_SHIFT;
4218                 block[j]= level;
4219             }else{
4220                 level= (bias - level)>>QMAT_SHIFT;
4221                 block[j]= -level;
4222             }
4223             max |=level;
4224         }else{
4225             block[j]=0;
4226         }
4227     }
4228     *overflow= s->max_qcoeff < max; //overflow might have happened
4229
4230     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4231     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4232         ff_block_permute(block, s->idsp.idct_permutation,
4233                          scantable, last_non_zero);
4234
4235     return last_non_zero;
4236 }
4237
4238 #define OFFSET(x) offsetof(MpegEncContext, x)
4239 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4240 static const AVOption h263_options[] = {
4241     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4242     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4243     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4244     FF_MPV_COMMON_OPTS
4245     { NULL },
4246 };
4247
4248 static const AVClass h263_class = {
4249     .class_name = "H.263 encoder",
4250     .item_name  = av_default_item_name,
4251     .option     = h263_options,
4252     .version    = LIBAVUTIL_VERSION_INT,
4253 };
4254
4255 AVCodec ff_h263_encoder = {
4256     .name           = "h263",
4257     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4258     .type           = AVMEDIA_TYPE_VIDEO,
4259     .id             = AV_CODEC_ID_H263,
4260     .priv_data_size = sizeof(MpegEncContext),
4261     .init           = ff_MPV_encode_init,
4262     .encode2        = ff_MPV_encode_picture,
4263     .close          = ff_MPV_encode_end,
4264     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4265     .priv_class     = &h263_class,
4266 };
4267
4268 static const AVOption h263p_options[] = {
4269     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4270     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4271     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4272     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4273     FF_MPV_COMMON_OPTS
4274     { NULL },
4275 };
4276 static const AVClass h263p_class = {
4277     .class_name = "H.263p encoder",
4278     .item_name  = av_default_item_name,
4279     .option     = h263p_options,
4280     .version    = LIBAVUTIL_VERSION_INT,
4281 };
4282
4283 AVCodec ff_h263p_encoder = {
4284     .name           = "h263p",
4285     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4286     .type           = AVMEDIA_TYPE_VIDEO,
4287     .id             = AV_CODEC_ID_H263P,
4288     .priv_data_size = sizeof(MpegEncContext),
4289     .init           = ff_MPV_encode_init,
4290     .encode2        = ff_MPV_encode_picture,
4291     .close          = ff_MPV_encode_end,
4292     .capabilities   = CODEC_CAP_SLICE_THREADS,
4293     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4294     .priv_class     = &h263p_class,
4295 };
4296
4297 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4298
4299 AVCodec ff_msmpeg4v2_encoder = {
4300     .name           = "msmpeg4v2",
4301     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4302     .type           = AVMEDIA_TYPE_VIDEO,
4303     .id             = AV_CODEC_ID_MSMPEG4V2,
4304     .priv_data_size = sizeof(MpegEncContext),
4305     .init           = ff_MPV_encode_init,
4306     .encode2        = ff_MPV_encode_picture,
4307     .close          = ff_MPV_encode_end,
4308     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4309     .priv_class     = &msmpeg4v2_class,
4310 };
4311
4312 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4313
4314 AVCodec ff_msmpeg4v3_encoder = {
4315     .name           = "msmpeg4",
4316     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4317     .type           = AVMEDIA_TYPE_VIDEO,
4318     .id             = AV_CODEC_ID_MSMPEG4V3,
4319     .priv_data_size = sizeof(MpegEncContext),
4320     .init           = ff_MPV_encode_init,
4321     .encode2        = ff_MPV_encode_picture,
4322     .close          = ff_MPV_encode_end,
4323     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4324     .priv_class     = &msmpeg4v3_class,
4325 };
4326
4327 FF_MPV_GENERIC_CLASS(wmv1)
4328
4329 AVCodec ff_wmv1_encoder = {
4330     .name           = "wmv1",
4331     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4332     .type           = AVMEDIA_TYPE_VIDEO,
4333     .id             = AV_CODEC_ID_WMV1,
4334     .priv_data_size = sizeof(MpegEncContext),
4335     .init           = ff_MPV_encode_init,
4336     .encode2        = ff_MPV_encode_picture,
4337     .close          = ff_MPV_encode_end,
4338     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4339     .priv_class     = &wmv1_class,
4340 };