]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
mpegvideo_enc: factor out denominator and explicitly cast operands
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mjpegenc_common.h"
46 #include "mathops.h"
47 #include "mpegutils.h"
48 #include "mjpegenc.h"
49 #include "msmpeg4.h"
50 #include "pixblockdsp.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60
61 #define QUANT_BIAS_SHIFT 8
62
63 #define QMAT_SHIFT_MMX 16
64 #define QMAT_SHIFT 22
65
66 static int encode_picture(MpegEncContext *s, int picture_number);
67 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
68 static int sse_mb(MpegEncContext *s);
69 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
70 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
71
72 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
73 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
74
75 const AVOption ff_mpv_generic_options[] = {
76     FF_MPV_COMMON_OPTS
77     { NULL },
78 };
79
80 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
81                        uint16_t (*qmat16)[2][64],
82                        const uint16_t *quant_matrix,
83                        int bias, int qmin, int qmax, int intra)
84 {
85     FDCTDSPContext *fdsp = &s->fdsp;
86     int qscale;
87     int shift = 0;
88
89     for (qscale = qmin; qscale <= qmax; qscale++) {
90         int i;
91         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
92 #if CONFIG_FAANDCT
93             fdsp->fdct == ff_faandct            ||
94 #endif /* CONFIG_FAANDCT */
95             fdsp->fdct == ff_jpeg_fdct_islow_10) {
96             for (i = 0; i < 64; i++) {
97                 const int j = s->idsp.idct_permutation[i];
98                 int64_t den = (int64_t) qscale * quant_matrix[j];
99                 /* 16 <= qscale * quant_matrix[i] <= 7905
100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
101                  *             19952 <=              x  <= 249205026
102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
103                  *           3444240 >= (1 << 36) / (x) >= 275 */
104
105                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
106             }
107         } else if (fdsp->fdct == ff_fdct_ifast) {
108             for (i = 0; i < 64; i++) {
109                 const int j = s->idsp.idct_permutation[i];
110                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
111                 /* 16 <= qscale * quant_matrix[i] <= 7905
112                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
113                  *             19952 <=              x  <= 249205026
114                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
115                  *           3444240 >= (1 << 36) / (x) >= 275 */
116
117                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
118             }
119         } else {
120             for (i = 0; i < 64; i++) {
121                 const int j = s->idsp.idct_permutation[i];
122                 int64_t den = (int64_t) qscale * quant_matrix[j];
123                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
124                  * Assume x = qscale * quant_matrix[i]
125                  * So             16 <=              x  <= 7905
126                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
127                  * so          32768 >= (1 << 19) / (x) >= 67 */
128                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
129                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
130                 //                    (qscale * quant_matrix[i]);
131                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
132
133                 if (qmat16[qscale][0][i] == 0 ||
134                     qmat16[qscale][0][i] == 128 * 256)
135                     qmat16[qscale][0][i] = 128 * 256 - 1;
136                 qmat16[qscale][1][i] =
137                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
138                                 qmat16[qscale][0][i]);
139             }
140         }
141
142         for (i = intra; i < 64; i++) {
143             int64_t max = 8191;
144             if (fdsp->fdct == ff_fdct_ifast) {
145                 max = (8191LL * ff_aanscales[i]) >> 14;
146             }
147             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
148                 shift++;
149             }
150         }
151     }
152     if (shift) {
153         av_log(NULL, AV_LOG_INFO,
154                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
155                QMAT_SHIFT - shift);
156     }
157 }
158
159 static inline void update_qscale(MpegEncContext *s)
160 {
161     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
162                 (FF_LAMBDA_SHIFT + 7);
163     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
164
165     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
166                  FF_LAMBDA_SHIFT;
167 }
168
169 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
170 {
171     int i;
172
173     if (matrix) {
174         put_bits(pb, 1, 1);
175         for (i = 0; i < 64; i++) {
176             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
177         }
178     } else
179         put_bits(pb, 1, 0);
180 }
181
182 /**
183  * init s->current_picture.qscale_table from s->lambda_table
184  */
185 void ff_init_qscale_tab(MpegEncContext *s)
186 {
187     int8_t * const qscale_table = s->current_picture.qscale_table;
188     int i;
189
190     for (i = 0; i < s->mb_num; i++) {
191         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
192         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
193         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
194                                                   s->avctx->qmax);
195     }
196 }
197
198 static void update_duplicate_context_after_me(MpegEncContext *dst,
199                                               MpegEncContext *src)
200 {
201 #define COPY(a) dst->a= src->a
202     COPY(pict_type);
203     COPY(current_picture);
204     COPY(f_code);
205     COPY(b_code);
206     COPY(qscale);
207     COPY(lambda);
208     COPY(lambda2);
209     COPY(picture_in_gop_number);
210     COPY(gop_picture_number);
211     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
212     COPY(progressive_frame);    // FIXME don't set in encode_header
213     COPY(partitioned_frame);    // FIXME don't set in encode_header
214 #undef COPY
215 }
216
217 /**
218  * Set the given MpegEncContext to defaults for encoding.
219  * the changed fields will not depend upon the prior state of the MpegEncContext.
220  */
221 static void mpv_encode_defaults(MpegEncContext *s)
222 {
223     int i;
224     ff_mpv_common_defaults(s);
225
226     for (i = -16; i < 16; i++) {
227         default_fcode_tab[i + MAX_MV] = 1;
228     }
229     s->me.mv_penalty = default_mv_penalty;
230     s->fcode_tab     = default_fcode_tab;
231
232     s->input_picture_number  = 0;
233     s->picture_in_gop_number = 0;
234 }
235
236 /* init video encoder */
237 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
238 {
239     MpegEncContext *s = avctx->priv_data;
240     int i, ret, format_supported;
241
242     mpv_encode_defaults(s);
243
244     switch (avctx->codec_id) {
245     case AV_CODEC_ID_MPEG2VIDEO:
246         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
247             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
248             av_log(avctx, AV_LOG_ERROR,
249                    "only YUV420 and YUV422 are supported\n");
250             return -1;
251         }
252         break;
253     case AV_CODEC_ID_MJPEG:
254         format_supported = 0;
255         /* JPEG color space */
256         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
257             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
258             (avctx->color_range == AVCOL_RANGE_JPEG &&
259              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
260               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
261             format_supported = 1;
262         /* MPEG color space */
263         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
264                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
265                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
266             format_supported = 1;
267
268         if (!format_supported) {
269             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
270             return -1;
271         }
272         break;
273     default:
274         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
275             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
276             return -1;
277         }
278     }
279
280     switch (avctx->pix_fmt) {
281     case AV_PIX_FMT_YUVJ422P:
282     case AV_PIX_FMT_YUV422P:
283         s->chroma_format = CHROMA_422;
284         break;
285     case AV_PIX_FMT_YUVJ420P:
286     case AV_PIX_FMT_YUV420P:
287     default:
288         s->chroma_format = CHROMA_420;
289         break;
290     }
291
292     s->bit_rate = avctx->bit_rate;
293     s->width    = avctx->width;
294     s->height   = avctx->height;
295     if (avctx->gop_size > 600 &&
296         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
297         av_log(avctx, AV_LOG_ERROR,
298                "Warning keyframe interval too large! reducing it ...\n");
299         avctx->gop_size = 600;
300     }
301     s->gop_size     = avctx->gop_size;
302     s->avctx        = avctx;
303     s->flags        = avctx->flags;
304     s->flags2       = avctx->flags2;
305     if (avctx->max_b_frames > MAX_B_FRAMES) {
306         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
307                "is %d.\n", MAX_B_FRAMES);
308     }
309     s->max_b_frames = avctx->max_b_frames;
310     s->codec_id     = avctx->codec->id;
311     s->strict_std_compliance = avctx->strict_std_compliance;
312     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
313     s->mpeg_quant         = avctx->mpeg_quant;
314     s->rtp_mode           = !!avctx->rtp_payload_size;
315     s->intra_dc_precision = avctx->intra_dc_precision;
316     s->user_specified_pts = AV_NOPTS_VALUE;
317
318     if (s->gop_size <= 1) {
319         s->intra_only = 1;
320         s->gop_size   = 12;
321     } else {
322         s->intra_only = 0;
323     }
324
325     s->me_method = avctx->me_method;
326
327     /* Fixed QSCALE */
328     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
329
330 #if FF_API_MPV_OPT
331     FF_DISABLE_DEPRECATION_WARNINGS
332     if (avctx->border_masking != 0.0)
333         s->border_masking = avctx->border_masking;
334     FF_ENABLE_DEPRECATION_WARNINGS
335 #endif
336
337     s->adaptive_quant = (s->avctx->lumi_masking ||
338                          s->avctx->dark_masking ||
339                          s->avctx->temporal_cplx_masking ||
340                          s->avctx->spatial_cplx_masking  ||
341                          s->avctx->p_masking      ||
342                          s->border_masking ||
343                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
344                         !s->fixed_qscale;
345
346     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
347
348     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
349         av_log(avctx, AV_LOG_ERROR,
350                "a vbv buffer size is needed, "
351                "for encoding with a maximum bitrate\n");
352         return -1;
353     }
354
355     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
356         av_log(avctx, AV_LOG_INFO,
357                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
358     }
359
360     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
361         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
362         return -1;
363     }
364
365     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
366         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
367         return -1;
368     }
369
370     if (avctx->rc_max_rate &&
371         avctx->rc_max_rate == avctx->bit_rate &&
372         avctx->rc_max_rate != avctx->rc_min_rate) {
373         av_log(avctx, AV_LOG_INFO,
374                "impossible bitrate constraints, this will fail\n");
375     }
376
377     if (avctx->rc_buffer_size &&
378         avctx->bit_rate * (int64_t)avctx->time_base.num >
379             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
380         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
381         return -1;
382     }
383
384     if (!s->fixed_qscale &&
385         avctx->bit_rate * av_q2d(avctx->time_base) >
386             avctx->bit_rate_tolerance) {
387         av_log(avctx, AV_LOG_ERROR,
388                "bitrate tolerance too small for bitrate\n");
389         return -1;
390     }
391
392     if (s->avctx->rc_max_rate &&
393         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
394         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
395          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
396         90000LL * (avctx->rc_buffer_size - 1) >
397             s->avctx->rc_max_rate * 0xFFFFLL) {
398         av_log(avctx, AV_LOG_INFO,
399                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
400                "specified vbv buffer is too large for the given bitrate!\n");
401     }
402
403     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
404         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
405         s->codec_id != AV_CODEC_ID_FLV1) {
406         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
407         return -1;
408     }
409
410     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
411         av_log(avctx, AV_LOG_ERROR,
412                "OBMC is only supported with simple mb decision\n");
413         return -1;
414     }
415
416     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
417         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
418         return -1;
419     }
420
421     if (s->max_b_frames                    &&
422         s->codec_id != AV_CODEC_ID_MPEG4      &&
423         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
424         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
425         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
426         return -1;
427     }
428
429     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
430          s->codec_id == AV_CODEC_ID_H263  ||
431          s->codec_id == AV_CODEC_ID_H263P) &&
432         (avctx->sample_aspect_ratio.num > 255 ||
433          avctx->sample_aspect_ratio.den > 255)) {
434         av_log(avctx, AV_LOG_ERROR,
435                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
436                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
437         return -1;
438     }
439
440     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
441         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
442         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
443         return -1;
444     }
445
446     // FIXME mpeg2 uses that too
447     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
448         av_log(avctx, AV_LOG_ERROR,
449                "mpeg2 style quantization not supported by codec\n");
450         return -1;
451     }
452
453     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
454         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
455         return -1;
456     }
457
458     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
459         s->avctx->mb_decision != FF_MB_DECISION_RD) {
460         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
461         return -1;
462     }
463
464     if (s->avctx->scenechange_threshold < 1000000000 &&
465         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
466         av_log(avctx, AV_LOG_ERROR,
467                "closed gop with scene change detection are not supported yet, "
468                "set threshold to 1000000000\n");
469         return -1;
470     }
471
472     if (s->flags & CODEC_FLAG_LOW_DELAY) {
473         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
474             av_log(avctx, AV_LOG_ERROR,
475                   "low delay forcing is only available for mpeg2\n");
476             return -1;
477         }
478         if (s->max_b_frames != 0) {
479             av_log(avctx, AV_LOG_ERROR,
480                    "b frames cannot be used with low delay\n");
481             return -1;
482         }
483     }
484
485     if (s->q_scale_type == 1) {
486         if (avctx->qmax > 12) {
487             av_log(avctx, AV_LOG_ERROR,
488                    "non linear quant only supports qmax <= 12 currently\n");
489             return -1;
490         }
491     }
492
493     if (s->avctx->thread_count > 1         &&
494         s->codec_id != AV_CODEC_ID_MPEG4      &&
495         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
496         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
497         (s->codec_id != AV_CODEC_ID_H263P)) {
498         av_log(avctx, AV_LOG_ERROR,
499                "multi threaded encoding not supported by codec\n");
500         return -1;
501     }
502
503     if (s->avctx->thread_count < 1) {
504         av_log(avctx, AV_LOG_ERROR,
505                "automatic thread number detection not supported by codec,"
506                "patch welcome\n");
507         return -1;
508     }
509
510     if (s->avctx->thread_count > 1)
511         s->rtp_mode = 1;
512
513     if (!avctx->time_base.den || !avctx->time_base.num) {
514         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
515         return -1;
516     }
517
518     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
519         av_log(avctx, AV_LOG_INFO,
520                "notice: b_frame_strategy only affects the first pass\n");
521         avctx->b_frame_strategy = 0;
522     }
523
524     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
525     if (i > 1) {
526         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
527         avctx->time_base.den /= i;
528         avctx->time_base.num /= i;
529         //return -1;
530     }
531
532     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
533         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
534         // (a + x * 3 / 8) / x
535         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
536         s->inter_quant_bias = 0;
537     } else {
538         s->intra_quant_bias = 0;
539         // (a - x / 4) / x
540         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
541     }
542
543     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
544         s->intra_quant_bias = avctx->intra_quant_bias;
545     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
546         s->inter_quant_bias = avctx->inter_quant_bias;
547
548     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
549         s->avctx->time_base.den > (1 << 16) - 1) {
550         av_log(avctx, AV_LOG_ERROR,
551                "timebase %d/%d not supported by MPEG 4 standard, "
552                "the maximum admitted value for the timebase denominator "
553                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
554                (1 << 16) - 1);
555         return -1;
556     }
557     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
558
559     switch (avctx->codec->id) {
560     case AV_CODEC_ID_MPEG1VIDEO:
561         s->out_format = FMT_MPEG1;
562         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
563         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
564         break;
565     case AV_CODEC_ID_MPEG2VIDEO:
566         s->out_format = FMT_MPEG1;
567         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
568         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
569         s->rtp_mode   = 1;
570         break;
571     case AV_CODEC_ID_MJPEG:
572         s->out_format = FMT_MJPEG;
573         s->intra_only = 1; /* force intra only for jpeg */
574         if (!CONFIG_MJPEG_ENCODER ||
575             ff_mjpeg_encode_init(s) < 0)
576             return -1;
577         avctx->delay = 0;
578         s->low_delay = 1;
579         break;
580     case AV_CODEC_ID_H261:
581         if (!CONFIG_H261_ENCODER)
582             return -1;
583         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
584             av_log(avctx, AV_LOG_ERROR,
585                    "The specified picture size of %dx%d is not valid for the "
586                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
587                     s->width, s->height);
588             return -1;
589         }
590         s->out_format = FMT_H261;
591         avctx->delay  = 0;
592         s->low_delay  = 1;
593         break;
594     case AV_CODEC_ID_H263:
595         if (!CONFIG_H263_ENCODER)
596         return -1;
597         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
598                              s->width, s->height) == 8) {
599             av_log(avctx, AV_LOG_INFO,
600                    "The specified picture size of %dx%d is not valid for "
601                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
602                    "352x288, 704x576, and 1408x1152."
603                    "Try H.263+.\n", s->width, s->height);
604             return -1;
605         }
606         s->out_format = FMT_H263;
607         avctx->delay  = 0;
608         s->low_delay  = 1;
609         break;
610     case AV_CODEC_ID_H263P:
611         s->out_format = FMT_H263;
612         s->h263_plus  = 1;
613         /* Fx */
614         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
615         s->modified_quant  = s->h263_aic;
616         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
617         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
618
619         /* /Fx */
620         /* These are just to be sure */
621         avctx->delay = 0;
622         s->low_delay = 1;
623         break;
624     case AV_CODEC_ID_FLV1:
625         s->out_format      = FMT_H263;
626         s->h263_flv        = 2; /* format = 1; 11-bit codes */
627         s->unrestricted_mv = 1;
628         s->rtp_mode  = 0; /* don't allow GOB */
629         avctx->delay = 0;
630         s->low_delay = 1;
631         break;
632     case AV_CODEC_ID_RV10:
633         s->out_format = FMT_H263;
634         avctx->delay  = 0;
635         s->low_delay  = 1;
636         break;
637     case AV_CODEC_ID_RV20:
638         s->out_format      = FMT_H263;
639         avctx->delay       = 0;
640         s->low_delay       = 1;
641         s->modified_quant  = 1;
642         s->h263_aic        = 1;
643         s->h263_plus       = 1;
644         s->loop_filter     = 1;
645         s->unrestricted_mv = 0;
646         break;
647     case AV_CODEC_ID_MPEG4:
648         s->out_format      = FMT_H263;
649         s->h263_pred       = 1;
650         s->unrestricted_mv = 1;
651         s->low_delay       = s->max_b_frames ? 0 : 1;
652         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
653         break;
654     case AV_CODEC_ID_MSMPEG4V2:
655         s->out_format      = FMT_H263;
656         s->h263_pred       = 1;
657         s->unrestricted_mv = 1;
658         s->msmpeg4_version = 2;
659         avctx->delay       = 0;
660         s->low_delay       = 1;
661         break;
662     case AV_CODEC_ID_MSMPEG4V3:
663         s->out_format        = FMT_H263;
664         s->h263_pred         = 1;
665         s->unrestricted_mv   = 1;
666         s->msmpeg4_version   = 3;
667         s->flipflop_rounding = 1;
668         avctx->delay         = 0;
669         s->low_delay         = 1;
670         break;
671     case AV_CODEC_ID_WMV1:
672         s->out_format        = FMT_H263;
673         s->h263_pred         = 1;
674         s->unrestricted_mv   = 1;
675         s->msmpeg4_version   = 4;
676         s->flipflop_rounding = 1;
677         avctx->delay         = 0;
678         s->low_delay         = 1;
679         break;
680     case AV_CODEC_ID_WMV2:
681         s->out_format        = FMT_H263;
682         s->h263_pred         = 1;
683         s->unrestricted_mv   = 1;
684         s->msmpeg4_version   = 5;
685         s->flipflop_rounding = 1;
686         avctx->delay         = 0;
687         s->low_delay         = 1;
688         break;
689     default:
690         return -1;
691     }
692
693     avctx->has_b_frames = !s->low_delay;
694
695     s->encoding = 1;
696
697     s->progressive_frame    =
698     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
699                                                 CODEC_FLAG_INTERLACED_ME) ||
700                                 s->alternate_scan);
701
702     /* init */
703     ff_mpv_idct_init(s);
704     if (ff_mpv_common_init(s) < 0)
705         return -1;
706
707     if (ARCH_X86)
708         ff_mpv_encode_init_x86(s);
709
710     ff_fdctdsp_init(&s->fdsp, avctx);
711     ff_me_cmp_init(&s->mecc, avctx);
712     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
713     ff_pixblockdsp_init(&s->pdsp, avctx);
714     ff_qpeldsp_init(&s->qdsp);
715
716     s->avctx->coded_frame = s->current_picture.f;
717
718     if (s->msmpeg4_version) {
719         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
720                           2 * 2 * (MAX_LEVEL + 1) *
721                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
722     }
723     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
724
725     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
726     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
727     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
728     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
729     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
730                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
731     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
732                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
733
734     if (s->avctx->noise_reduction) {
735         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
736                           2 * 64 * sizeof(uint16_t), fail);
737     }
738
739     if (CONFIG_H263_ENCODER)
740         ff_h263dsp_init(&s->h263dsp);
741     if (!s->dct_quantize)
742         s->dct_quantize = ff_dct_quantize_c;
743     if (!s->denoise_dct)
744         s->denoise_dct  = denoise_dct_c;
745     s->fast_dct_quantize = s->dct_quantize;
746     if (avctx->trellis)
747         s->dct_quantize  = dct_quantize_trellis_c;
748
749     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
750         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
751
752     s->quant_precision = 5;
753
754     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
755     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
756
757     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
758         ff_h261_encode_init(s);
759     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
760         ff_h263_encode_init(s);
761     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
762         ff_msmpeg4_encode_init(s);
763     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
764         && s->out_format == FMT_MPEG1)
765         ff_mpeg1_encode_init(s);
766
767     /* init q matrix */
768     for (i = 0; i < 64; i++) {
769         int j = s->idsp.idct_permutation[i];
770         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
771             s->mpeg_quant) {
772             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
773             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
774         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
775             s->intra_matrix[j] =
776             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
777         } else {
778             /* mpeg1/2 */
779             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
780             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
781         }
782         if (s->avctx->intra_matrix)
783             s->intra_matrix[j] = s->avctx->intra_matrix[i];
784         if (s->avctx->inter_matrix)
785             s->inter_matrix[j] = s->avctx->inter_matrix[i];
786     }
787
788     /* precompute matrix */
789     /* for mjpeg, we do include qscale in the matrix */
790     if (s->out_format != FMT_MJPEG) {
791         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
792                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
793                           31, 1);
794         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
795                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
796                           31, 0);
797     }
798
799     if (ff_rate_control_init(s) < 0)
800         return -1;
801
802 #if FF_API_ERROR_RATE
803     FF_DISABLE_DEPRECATION_WARNINGS
804     if (avctx->error_rate)
805         s->error_rate = avctx->error_rate;
806     FF_ENABLE_DEPRECATION_WARNINGS;
807 #endif
808
809 #if FF_API_NORMALIZE_AQP
810     FF_DISABLE_DEPRECATION_WARNINGS
811     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
812         s->mpv_flags |= FF_MPV_FLAG_NAQ;
813     FF_ENABLE_DEPRECATION_WARNINGS;
814 #endif
815
816 #if FF_API_MV0
817     FF_DISABLE_DEPRECATION_WARNINGS
818     if (avctx->flags & CODEC_FLAG_MV0)
819         s->mpv_flags |= FF_MPV_FLAG_MV0;
820     FF_ENABLE_DEPRECATION_WARNINGS
821 #endif
822
823 #if FF_API_MPV_OPT
824     FF_DISABLE_DEPRECATION_WARNINGS
825     if (avctx->rc_qsquish != 0.0)
826         s->rc_qsquish = avctx->rc_qsquish;
827     if (avctx->rc_qmod_amp != 0.0)
828         s->rc_qmod_amp = avctx->rc_qmod_amp;
829     if (avctx->rc_qmod_freq)
830         s->rc_qmod_freq = avctx->rc_qmod_freq;
831     if (avctx->rc_buffer_aggressivity != 1.0)
832         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
833     if (avctx->rc_initial_cplx != 0.0)
834         s->rc_initial_cplx = avctx->rc_initial_cplx;
835     if (avctx->lmin)
836         s->lmin = avctx->lmin;
837     if (avctx->lmax)
838         s->lmax = avctx->lmax;
839
840     if (avctx->rc_eq) {
841         av_freep(&s->rc_eq);
842         s->rc_eq = av_strdup(avctx->rc_eq);
843         if (!s->rc_eq)
844             return AVERROR(ENOMEM);
845     }
846     FF_ENABLE_DEPRECATION_WARNINGS
847 #endif
848
849     if (avctx->b_frame_strategy == 2) {
850         for (i = 0; i < s->max_b_frames + 2; i++) {
851             s->tmp_frames[i] = av_frame_alloc();
852             if (!s->tmp_frames[i])
853                 return AVERROR(ENOMEM);
854
855             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
856             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
857             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
858
859             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
860             if (ret < 0)
861                 return ret;
862         }
863     }
864
865     return 0;
866 fail:
867     ff_mpv_encode_end(avctx);
868     return AVERROR_UNKNOWN;
869 }
870
871 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
872 {
873     MpegEncContext *s = avctx->priv_data;
874     int i;
875
876     ff_rate_control_uninit(s);
877
878     ff_mpv_common_end(s);
879     if (CONFIG_MJPEG_ENCODER &&
880         s->out_format == FMT_MJPEG)
881         ff_mjpeg_encode_close(s);
882
883     av_freep(&avctx->extradata);
884
885     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
886         av_frame_free(&s->tmp_frames[i]);
887
888     ff_free_picture_tables(&s->new_picture);
889     ff_mpeg_unref_picture(s, &s->new_picture);
890
891     av_freep(&s->avctx->stats_out);
892     av_freep(&s->ac_stats);
893
894     av_freep(&s->q_intra_matrix);
895     av_freep(&s->q_inter_matrix);
896     av_freep(&s->q_intra_matrix16);
897     av_freep(&s->q_inter_matrix16);
898     av_freep(&s->input_picture);
899     av_freep(&s->reordered_input_picture);
900     av_freep(&s->dct_offset);
901
902     return 0;
903 }
904
905 static int get_sae(uint8_t *src, int ref, int stride)
906 {
907     int x,y;
908     int acc = 0;
909
910     for (y = 0; y < 16; y++) {
911         for (x = 0; x < 16; x++) {
912             acc += FFABS(src[x + y * stride] - ref);
913         }
914     }
915
916     return acc;
917 }
918
919 static int get_intra_count(MpegEncContext *s, uint8_t *src,
920                            uint8_t *ref, int stride)
921 {
922     int x, y, w, h;
923     int acc = 0;
924
925     w = s->width  & ~15;
926     h = s->height & ~15;
927
928     for (y = 0; y < h; y += 16) {
929         for (x = 0; x < w; x += 16) {
930             int offset = x + y * stride;
931             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
932                                       stride, 16);
933             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
934             int sae  = get_sae(src + offset, mean, stride);
935
936             acc += sae + 500 < sad;
937         }
938     }
939     return acc;
940 }
941
942
943 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
944 {
945     Picture *pic = NULL;
946     int64_t pts;
947     int i, display_picture_number = 0, ret;
948     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
949                                                  (s->low_delay ? 0 : 1);
950     int direct = 1;
951
952     if (pic_arg) {
953         pts = pic_arg->pts;
954         display_picture_number = s->input_picture_number++;
955
956         if (pts != AV_NOPTS_VALUE) {
957             if (s->user_specified_pts != AV_NOPTS_VALUE) {
958                 int64_t time = pts;
959                 int64_t last = s->user_specified_pts;
960
961                 if (time <= last) {
962                     av_log(s->avctx, AV_LOG_ERROR,
963                            "Error, Invalid timestamp=%"PRId64", "
964                            "last=%"PRId64"\n", pts, s->user_specified_pts);
965                     return -1;
966                 }
967
968                 if (!s->low_delay && display_picture_number == 1)
969                     s->dts_delta = time - last;
970             }
971             s->user_specified_pts = pts;
972         } else {
973             if (s->user_specified_pts != AV_NOPTS_VALUE) {
974                 s->user_specified_pts =
975                 pts = s->user_specified_pts + 1;
976                 av_log(s->avctx, AV_LOG_INFO,
977                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
978                        pts);
979             } else {
980                 pts = display_picture_number;
981             }
982         }
983     }
984
985     if (pic_arg) {
986         if (!pic_arg->buf[0] ||
987             pic_arg->linesize[0] != s->linesize ||
988             pic_arg->linesize[1] != s->uvlinesize ||
989             pic_arg->linesize[2] != s->uvlinesize)
990             direct = 0;
991
992         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
993                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
994
995         i = ff_find_unused_picture(s, direct);
996         if (i < 0)
997             return i;
998
999         pic = &s->picture[i];
1000         pic->reference = 3;
1001
1002         if (direct) {
1003             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1004                 return ret;
1005             if (ff_alloc_picture(s, pic, 1) < 0) {
1006                 return -1;
1007             }
1008         } else {
1009             if (ff_alloc_picture(s, pic, 0) < 0) {
1010                 return -1;
1011             }
1012
1013             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1014                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1015                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1016                 // empty
1017             } else {
1018                 int h_chroma_shift, v_chroma_shift;
1019                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1020                                                  &h_chroma_shift,
1021                                                  &v_chroma_shift);
1022
1023                 for (i = 0; i < 3; i++) {
1024                     int src_stride = pic_arg->linesize[i];
1025                     int dst_stride = i ? s->uvlinesize : s->linesize;
1026                     int h_shift = i ? h_chroma_shift : 0;
1027                     int v_shift = i ? v_chroma_shift : 0;
1028                     int w = s->width  >> h_shift;
1029                     int h = s->height >> v_shift;
1030                     uint8_t *src = pic_arg->data[i];
1031                     uint8_t *dst = pic->f->data[i];
1032
1033                     if (!s->avctx->rc_buffer_size)
1034                         dst += INPLACE_OFFSET;
1035
1036                     if (src_stride == dst_stride)
1037                         memcpy(dst, src, src_stride * h);
1038                     else {
1039                         while (h--) {
1040                             memcpy(dst, src, w);
1041                             dst += dst_stride;
1042                             src += src_stride;
1043                         }
1044                     }
1045                 }
1046             }
1047         }
1048         ret = av_frame_copy_props(pic->f, pic_arg);
1049         if (ret < 0)
1050             return ret;
1051
1052         pic->f->display_picture_number = display_picture_number;
1053         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1054     }
1055
1056     /* shift buffer entries */
1057     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1058         s->input_picture[i - 1] = s->input_picture[i];
1059
1060     s->input_picture[encoding_delay] = (Picture*) pic;
1061
1062     return 0;
1063 }
1064
1065 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1066 {
1067     int x, y, plane;
1068     int score = 0;
1069     int64_t score64 = 0;
1070
1071     for (plane = 0; plane < 3; plane++) {
1072         const int stride = p->f->linesize[plane];
1073         const int bw = plane ? 1 : 2;
1074         for (y = 0; y < s->mb_height * bw; y++) {
1075             for (x = 0; x < s->mb_width * bw; x++) {
1076                 int off = p->shared ? 0 : 16;
1077                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1078                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1079                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1080
1081                 switch (s->avctx->frame_skip_exp) {
1082                 case 0: score    =  FFMAX(score, v);          break;
1083                 case 1: score   += FFABS(v);                  break;
1084                 case 2: score   += v * v;                     break;
1085                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1086                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1087                 }
1088             }
1089         }
1090     }
1091
1092     if (score)
1093         score64 = score;
1094
1095     if (score64 < s->avctx->frame_skip_threshold)
1096         return 1;
1097     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1098         return 1;
1099     return 0;
1100 }
1101
1102 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1103 {
1104     AVPacket pkt = { 0 };
1105     int ret, got_output;
1106
1107     av_init_packet(&pkt);
1108     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1109     if (ret < 0)
1110         return ret;
1111
1112     ret = pkt.size;
1113     av_free_packet(&pkt);
1114     return ret;
1115 }
1116
1117 static int estimate_best_b_count(MpegEncContext *s)
1118 {
1119     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1120     AVCodecContext *c = avcodec_alloc_context3(NULL);
1121     const int scale = s->avctx->brd_scale;
1122     int i, j, out_size, p_lambda, b_lambda, lambda2;
1123     int64_t best_rd  = INT64_MAX;
1124     int best_b_count = -1;
1125
1126     assert(scale >= 0 && scale <= 3);
1127
1128     //emms_c();
1129     //s->next_picture_ptr->quality;
1130     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1131     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1132     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1133     if (!b_lambda) // FIXME we should do this somewhere else
1134         b_lambda = p_lambda;
1135     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1136                FF_LAMBDA_SHIFT;
1137
1138     c->width        = s->width  >> scale;
1139     c->height       = s->height >> scale;
1140     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1141     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1142     c->mb_decision  = s->avctx->mb_decision;
1143     c->me_cmp       = s->avctx->me_cmp;
1144     c->mb_cmp       = s->avctx->mb_cmp;
1145     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1146     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1147     c->time_base    = s->avctx->time_base;
1148     c->max_b_frames = s->max_b_frames;
1149
1150     if (avcodec_open2(c, codec, NULL) < 0)
1151         return -1;
1152
1153     for (i = 0; i < s->max_b_frames + 2; i++) {
1154         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1155                                                 s->next_picture_ptr;
1156
1157         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1158             pre_input = *pre_input_ptr;
1159
1160             if (!pre_input.shared && i) {
1161                 pre_input.f->data[0] += INPLACE_OFFSET;
1162                 pre_input.f->data[1] += INPLACE_OFFSET;
1163                 pre_input.f->data[2] += INPLACE_OFFSET;
1164             }
1165
1166             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1167                                        s->tmp_frames[i]->linesize[0],
1168                                        pre_input.f->data[0],
1169                                        pre_input.f->linesize[0],
1170                                        c->width, c->height);
1171             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1172                                        s->tmp_frames[i]->linesize[1],
1173                                        pre_input.f->data[1],
1174                                        pre_input.f->linesize[1],
1175                                        c->width >> 1, c->height >> 1);
1176             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1177                                        s->tmp_frames[i]->linesize[2],
1178                                        pre_input.f->data[2],
1179                                        pre_input.f->linesize[2],
1180                                        c->width >> 1, c->height >> 1);
1181         }
1182     }
1183
1184     for (j = 0; j < s->max_b_frames + 1; j++) {
1185         int64_t rd = 0;
1186
1187         if (!s->input_picture[j])
1188             break;
1189
1190         c->error[0] = c->error[1] = c->error[2] = 0;
1191
1192         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1193         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1194
1195         out_size = encode_frame(c, s->tmp_frames[0]);
1196
1197         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1198
1199         for (i = 0; i < s->max_b_frames + 1; i++) {
1200             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1201
1202             s->tmp_frames[i + 1]->pict_type = is_p ?
1203                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1204             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1205
1206             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1207
1208             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1209         }
1210
1211         /* get the delayed frames */
1212         while (out_size) {
1213             out_size = encode_frame(c, NULL);
1214             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1215         }
1216
1217         rd += c->error[0] + c->error[1] + c->error[2];
1218
1219         if (rd < best_rd) {
1220             best_rd = rd;
1221             best_b_count = j;
1222         }
1223     }
1224
1225     avcodec_close(c);
1226     av_freep(&c);
1227
1228     return best_b_count;
1229 }
1230
1231 static int select_input_picture(MpegEncContext *s)
1232 {
1233     int i, ret;
1234
1235     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1236         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1237     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1238
1239     /* set next picture type & ordering */
1240     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1241         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1242             !s->next_picture_ptr || s->intra_only) {
1243             s->reordered_input_picture[0] = s->input_picture[0];
1244             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1245             s->reordered_input_picture[0]->f->coded_picture_number =
1246                 s->coded_picture_number++;
1247         } else {
1248             int b_frames;
1249
1250             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1251                 if (s->picture_in_gop_number < s->gop_size &&
1252                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1253                     // FIXME check that te gop check above is +-1 correct
1254                     av_frame_unref(s->input_picture[0]->f);
1255
1256                     emms_c();
1257                     ff_vbv_update(s, 0);
1258
1259                     goto no_output_pic;
1260                 }
1261             }
1262
1263             if (s->flags & CODEC_FLAG_PASS2) {
1264                 for (i = 0; i < s->max_b_frames + 1; i++) {
1265                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1266
1267                     if (pict_num >= s->rc_context.num_entries)
1268                         break;
1269                     if (!s->input_picture[i]) {
1270                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1271                         break;
1272                     }
1273
1274                     s->input_picture[i]->f->pict_type =
1275                         s->rc_context.entry[pict_num].new_pict_type;
1276                 }
1277             }
1278
1279             if (s->avctx->b_frame_strategy == 0) {
1280                 b_frames = s->max_b_frames;
1281                 while (b_frames && !s->input_picture[b_frames])
1282                     b_frames--;
1283             } else if (s->avctx->b_frame_strategy == 1) {
1284                 for (i = 1; i < s->max_b_frames + 1; i++) {
1285                     if (s->input_picture[i] &&
1286                         s->input_picture[i]->b_frame_score == 0) {
1287                         s->input_picture[i]->b_frame_score =
1288                             get_intra_count(s,
1289                                             s->input_picture[i    ]->f->data[0],
1290                                             s->input_picture[i - 1]->f->data[0],
1291                                             s->linesize) + 1;
1292                     }
1293                 }
1294                 for (i = 0; i < s->max_b_frames + 1; i++) {
1295                     if (!s->input_picture[i] ||
1296                         s->input_picture[i]->b_frame_score - 1 >
1297                             s->mb_num / s->avctx->b_sensitivity)
1298                         break;
1299                 }
1300
1301                 b_frames = FFMAX(0, i - 1);
1302
1303                 /* reset scores */
1304                 for (i = 0; i < b_frames + 1; i++) {
1305                     s->input_picture[i]->b_frame_score = 0;
1306                 }
1307             } else if (s->avctx->b_frame_strategy == 2) {
1308                 b_frames = estimate_best_b_count(s);
1309             } else {
1310                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1311                 b_frames = 0;
1312             }
1313
1314             emms_c();
1315
1316             for (i = b_frames - 1; i >= 0; i--) {
1317                 int type = s->input_picture[i]->f->pict_type;
1318                 if (type && type != AV_PICTURE_TYPE_B)
1319                     b_frames = i;
1320             }
1321             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1322                 b_frames == s->max_b_frames) {
1323                 av_log(s->avctx, AV_LOG_ERROR,
1324                        "warning, too many b frames in a row\n");
1325             }
1326
1327             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1328                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1329                     s->gop_size > s->picture_in_gop_number) {
1330                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1331                 } else {
1332                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1333                         b_frames = 0;
1334                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1335                 }
1336             }
1337
1338             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1339                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1340                 b_frames--;
1341
1342             s->reordered_input_picture[0] = s->input_picture[b_frames];
1343             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1344                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1345             s->reordered_input_picture[0]->f->coded_picture_number =
1346                 s->coded_picture_number++;
1347             for (i = 0; i < b_frames; i++) {
1348                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1349                 s->reordered_input_picture[i + 1]->f->pict_type =
1350                     AV_PICTURE_TYPE_B;
1351                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1352                     s->coded_picture_number++;
1353             }
1354         }
1355     }
1356 no_output_pic:
1357     if (s->reordered_input_picture[0]) {
1358         s->reordered_input_picture[0]->reference =
1359            s->reordered_input_picture[0]->f->pict_type !=
1360                AV_PICTURE_TYPE_B ? 3 : 0;
1361
1362         ff_mpeg_unref_picture(s, &s->new_picture);
1363         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1364             return ret;
1365
1366         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1367             // input is a shared pix, so we can't modifiy it -> alloc a new
1368             // one & ensure that the shared one is reuseable
1369
1370             Picture *pic;
1371             int i = ff_find_unused_picture(s, 0);
1372             if (i < 0)
1373                 return i;
1374             pic = &s->picture[i];
1375
1376             pic->reference = s->reordered_input_picture[0]->reference;
1377             if (ff_alloc_picture(s, pic, 0) < 0) {
1378                 return -1;
1379             }
1380
1381             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1382             if (ret < 0)
1383                 return ret;
1384
1385             /* mark us unused / free shared pic */
1386             av_frame_unref(s->reordered_input_picture[0]->f);
1387             s->reordered_input_picture[0]->shared = 0;
1388
1389             s->current_picture_ptr = pic;
1390         } else {
1391             // input is not a shared pix -> reuse buffer for current_pix
1392             s->current_picture_ptr = s->reordered_input_picture[0];
1393             for (i = 0; i < 4; i++) {
1394                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1395             }
1396         }
1397         ff_mpeg_unref_picture(s, &s->current_picture);
1398         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1399                                        s->current_picture_ptr)) < 0)
1400             return ret;
1401
1402         s->picture_number = s->new_picture.f->display_picture_number;
1403     } else {
1404         ff_mpeg_unref_picture(s, &s->new_picture);
1405     }
1406     return 0;
1407 }
1408
1409 static void frame_end(MpegEncContext *s)
1410 {
1411     int i;
1412
1413     if (s->unrestricted_mv &&
1414         s->current_picture.reference &&
1415         !s->intra_only) {
1416         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1417         int hshift = desc->log2_chroma_w;
1418         int vshift = desc->log2_chroma_h;
1419         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1420                                 s->h_edge_pos, s->v_edge_pos,
1421                                 EDGE_WIDTH, EDGE_WIDTH,
1422                                 EDGE_TOP | EDGE_BOTTOM);
1423         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1424                                 s->h_edge_pos >> hshift,
1425                                 s->v_edge_pos >> vshift,
1426                                 EDGE_WIDTH >> hshift,
1427                                 EDGE_WIDTH >> vshift,
1428                                 EDGE_TOP | EDGE_BOTTOM);
1429         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1430                                 s->h_edge_pos >> hshift,
1431                                 s->v_edge_pos >> vshift,
1432                                 EDGE_WIDTH >> hshift,
1433                                 EDGE_WIDTH >> vshift,
1434                                 EDGE_TOP | EDGE_BOTTOM);
1435     }
1436
1437     emms_c();
1438
1439     s->last_pict_type                 = s->pict_type;
1440     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1441     if (s->pict_type!= AV_PICTURE_TYPE_B)
1442         s->last_non_b_pict_type = s->pict_type;
1443
1444     if (s->encoding) {
1445         /* release non-reference frames */
1446         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1447             if (!s->picture[i].reference)
1448                 ff_mpeg_unref_picture(s, &s->picture[i]);
1449         }
1450     }
1451
1452     s->avctx->coded_frame = s->current_picture_ptr->f;
1453
1454 }
1455
1456 static void update_noise_reduction(MpegEncContext *s)
1457 {
1458     int intra, i;
1459
1460     for (intra = 0; intra < 2; intra++) {
1461         if (s->dct_count[intra] > (1 << 16)) {
1462             for (i = 0; i < 64; i++) {
1463                 s->dct_error_sum[intra][i] >>= 1;
1464             }
1465             s->dct_count[intra] >>= 1;
1466         }
1467
1468         for (i = 0; i < 64; i++) {
1469             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1470                                        s->dct_count[intra] +
1471                                        s->dct_error_sum[intra][i] / 2) /
1472                                       (s->dct_error_sum[intra][i] + 1);
1473         }
1474     }
1475 }
1476
1477 static int frame_start(MpegEncContext *s)
1478 {
1479     int ret;
1480
1481     /* mark & release old frames */
1482     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1483         s->last_picture_ptr != s->next_picture_ptr &&
1484         s->last_picture_ptr->f->buf[0]) {
1485         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1486     }
1487
1488     s->current_picture_ptr->f->pict_type = s->pict_type;
1489     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1490
1491     ff_mpeg_unref_picture(s, &s->current_picture);
1492     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1493                                    s->current_picture_ptr)) < 0)
1494         return ret;
1495
1496     if (s->pict_type != AV_PICTURE_TYPE_B) {
1497         s->last_picture_ptr = s->next_picture_ptr;
1498         if (!s->droppable)
1499             s->next_picture_ptr = s->current_picture_ptr;
1500     }
1501
1502     if (s->last_picture_ptr) {
1503         ff_mpeg_unref_picture(s, &s->last_picture);
1504         if (s->last_picture_ptr->f->buf[0] &&
1505             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1506                                        s->last_picture_ptr)) < 0)
1507             return ret;
1508     }
1509     if (s->next_picture_ptr) {
1510         ff_mpeg_unref_picture(s, &s->next_picture);
1511         if (s->next_picture_ptr->f->buf[0] &&
1512             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1513                                        s->next_picture_ptr)) < 0)
1514             return ret;
1515     }
1516
1517     if (s->picture_structure!= PICT_FRAME) {
1518         int i;
1519         for (i = 0; i < 4; i++) {
1520             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1521                 s->current_picture.f->data[i] +=
1522                     s->current_picture.f->linesize[i];
1523             }
1524             s->current_picture.f->linesize[i] *= 2;
1525             s->last_picture.f->linesize[i]    *= 2;
1526             s->next_picture.f->linesize[i]    *= 2;
1527         }
1528     }
1529
1530     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1531         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1532         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1533     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1534         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1535         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1536     } else {
1537         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1538         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1539     }
1540
1541     if (s->dct_error_sum) {
1542         assert(s->avctx->noise_reduction && s->encoding);
1543         update_noise_reduction(s);
1544     }
1545
1546     return 0;
1547 }
1548
1549 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1550                           const AVFrame *pic_arg, int *got_packet)
1551 {
1552     MpegEncContext *s = avctx->priv_data;
1553     int i, stuffing_count, ret;
1554     int context_count = s->slice_context_count;
1555
1556     s->picture_in_gop_number++;
1557
1558     if (load_input_picture(s, pic_arg) < 0)
1559         return -1;
1560
1561     if (select_input_picture(s) < 0) {
1562         return -1;
1563     }
1564
1565     /* output? */
1566     if (s->new_picture.f->data[0]) {
1567         if (!pkt->data &&
1568             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1569             return ret;
1570         if (s->mb_info) {
1571             s->mb_info_ptr = av_packet_new_side_data(pkt,
1572                                  AV_PKT_DATA_H263_MB_INFO,
1573                                  s->mb_width*s->mb_height*12);
1574             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1575         }
1576
1577         for (i = 0; i < context_count; i++) {
1578             int start_y = s->thread_context[i]->start_mb_y;
1579             int   end_y = s->thread_context[i]->  end_mb_y;
1580             int h       = s->mb_height;
1581             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1582             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1583
1584             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1585         }
1586
1587         s->pict_type = s->new_picture.f->pict_type;
1588         //emms_c();
1589         ret = frame_start(s);
1590         if (ret < 0)
1591             return ret;
1592 vbv_retry:
1593         if (encode_picture(s, s->picture_number) < 0)
1594             return -1;
1595
1596         avctx->header_bits = s->header_bits;
1597         avctx->mv_bits     = s->mv_bits;
1598         avctx->misc_bits   = s->misc_bits;
1599         avctx->i_tex_bits  = s->i_tex_bits;
1600         avctx->p_tex_bits  = s->p_tex_bits;
1601         avctx->i_count     = s->i_count;
1602         // FIXME f/b_count in avctx
1603         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1604         avctx->skip_count  = s->skip_count;
1605
1606         frame_end(s);
1607
1608         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1609             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1610
1611         if (avctx->rc_buffer_size) {
1612             RateControlContext *rcc = &s->rc_context;
1613             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1614
1615             if (put_bits_count(&s->pb) > max_size &&
1616                 s->lambda < s->lmax) {
1617                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1618                                        (s->qscale + 1) / s->qscale);
1619                 if (s->adaptive_quant) {
1620                     int i;
1621                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1622                         s->lambda_table[i] =
1623                             FFMAX(s->lambda_table[i] + 1,
1624                                   s->lambda_table[i] * (s->qscale + 1) /
1625                                   s->qscale);
1626                 }
1627                 s->mb_skipped = 0;        // done in frame_start()
1628                 // done in encode_picture() so we must undo it
1629                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1630                     if (s->flipflop_rounding          ||
1631                         s->codec_id == AV_CODEC_ID_H263P ||
1632                         s->codec_id == AV_CODEC_ID_MPEG4)
1633                         s->no_rounding ^= 1;
1634                 }
1635                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1636                     s->time_base       = s->last_time_base;
1637                     s->last_non_b_time = s->time - s->pp_time;
1638                 }
1639                 for (i = 0; i < context_count; i++) {
1640                     PutBitContext *pb = &s->thread_context[i]->pb;
1641                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1642                 }
1643                 goto vbv_retry;
1644             }
1645
1646             assert(s->avctx->rc_max_rate);
1647         }
1648
1649         if (s->flags & CODEC_FLAG_PASS1)
1650             ff_write_pass1_stats(s);
1651
1652         for (i = 0; i < 4; i++) {
1653             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1654             avctx->error[i] += s->current_picture_ptr->f->error[i];
1655         }
1656
1657         if (s->flags & CODEC_FLAG_PASS1)
1658             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1659                    avctx->i_tex_bits + avctx->p_tex_bits ==
1660                        put_bits_count(&s->pb));
1661         flush_put_bits(&s->pb);
1662         s->frame_bits  = put_bits_count(&s->pb);
1663
1664         stuffing_count = ff_vbv_update(s, s->frame_bits);
1665         if (stuffing_count) {
1666             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1667                     stuffing_count + 50) {
1668                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1669                 return -1;
1670             }
1671
1672             switch (s->codec_id) {
1673             case AV_CODEC_ID_MPEG1VIDEO:
1674             case AV_CODEC_ID_MPEG2VIDEO:
1675                 while (stuffing_count--) {
1676                     put_bits(&s->pb, 8, 0);
1677                 }
1678             break;
1679             case AV_CODEC_ID_MPEG4:
1680                 put_bits(&s->pb, 16, 0);
1681                 put_bits(&s->pb, 16, 0x1C3);
1682                 stuffing_count -= 4;
1683                 while (stuffing_count--) {
1684                     put_bits(&s->pb, 8, 0xFF);
1685                 }
1686             break;
1687             default:
1688                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1689             }
1690             flush_put_bits(&s->pb);
1691             s->frame_bits  = put_bits_count(&s->pb);
1692         }
1693
1694         /* update mpeg1/2 vbv_delay for CBR */
1695         if (s->avctx->rc_max_rate                          &&
1696             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1697             s->out_format == FMT_MPEG1                     &&
1698             90000LL * (avctx->rc_buffer_size - 1) <=
1699                 s->avctx->rc_max_rate * 0xFFFFLL) {
1700             int vbv_delay, min_delay;
1701             double inbits  = s->avctx->rc_max_rate *
1702                              av_q2d(s->avctx->time_base);
1703             int    minbits = s->frame_bits - 8 *
1704                              (s->vbv_delay_ptr - s->pb.buf - 1);
1705             double bits    = s->rc_context.buffer_index + minbits - inbits;
1706
1707             if (bits < 0)
1708                 av_log(s->avctx, AV_LOG_ERROR,
1709                        "Internal error, negative bits\n");
1710
1711             assert(s->repeat_first_field == 0);
1712
1713             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1714             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1715                         s->avctx->rc_max_rate;
1716
1717             vbv_delay = FFMAX(vbv_delay, min_delay);
1718
1719             assert(vbv_delay < 0xFFFF);
1720
1721             s->vbv_delay_ptr[0] &= 0xF8;
1722             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1723             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1724             s->vbv_delay_ptr[2] &= 0x07;
1725             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1726             avctx->vbv_delay     = vbv_delay * 300;
1727         }
1728         s->total_bits     += s->frame_bits;
1729         avctx->frame_bits  = s->frame_bits;
1730
1731         pkt->pts = s->current_picture.f->pts;
1732         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1733             if (!s->current_picture.f->coded_picture_number)
1734                 pkt->dts = pkt->pts - s->dts_delta;
1735             else
1736                 pkt->dts = s->reordered_pts;
1737             s->reordered_pts = pkt->pts;
1738         } else
1739             pkt->dts = pkt->pts;
1740         if (s->current_picture.f->key_frame)
1741             pkt->flags |= AV_PKT_FLAG_KEY;
1742         if (s->mb_info)
1743             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1744     } else {
1745         s->frame_bits = 0;
1746     }
1747     assert((s->frame_bits & 7) == 0);
1748
1749     pkt->size = s->frame_bits / 8;
1750     *got_packet = !!pkt->size;
1751     return 0;
1752 }
1753
1754 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1755                                                 int n, int threshold)
1756 {
1757     static const char tab[64] = {
1758         3, 2, 2, 1, 1, 1, 1, 1,
1759         1, 1, 1, 1, 1, 1, 1, 1,
1760         1, 1, 1, 1, 1, 1, 1, 1,
1761         0, 0, 0, 0, 0, 0, 0, 0,
1762         0, 0, 0, 0, 0, 0, 0, 0,
1763         0, 0, 0, 0, 0, 0, 0, 0,
1764         0, 0, 0, 0, 0, 0, 0, 0,
1765         0, 0, 0, 0, 0, 0, 0, 0
1766     };
1767     int score = 0;
1768     int run = 0;
1769     int i;
1770     int16_t *block = s->block[n];
1771     const int last_index = s->block_last_index[n];
1772     int skip_dc;
1773
1774     if (threshold < 0) {
1775         skip_dc = 0;
1776         threshold = -threshold;
1777     } else
1778         skip_dc = 1;
1779
1780     /* Are all we could set to zero already zero? */
1781     if (last_index <= skip_dc - 1)
1782         return;
1783
1784     for (i = 0; i <= last_index; i++) {
1785         const int j = s->intra_scantable.permutated[i];
1786         const int level = FFABS(block[j]);
1787         if (level == 1) {
1788             if (skip_dc && i == 0)
1789                 continue;
1790             score += tab[run];
1791             run = 0;
1792         } else if (level > 1) {
1793             return;
1794         } else {
1795             run++;
1796         }
1797     }
1798     if (score >= threshold)
1799         return;
1800     for (i = skip_dc; i <= last_index; i++) {
1801         const int j = s->intra_scantable.permutated[i];
1802         block[j] = 0;
1803     }
1804     if (block[0])
1805         s->block_last_index[n] = 0;
1806     else
1807         s->block_last_index[n] = -1;
1808 }
1809
1810 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1811                                int last_index)
1812 {
1813     int i;
1814     const int maxlevel = s->max_qcoeff;
1815     const int minlevel = s->min_qcoeff;
1816     int overflow = 0;
1817
1818     if (s->mb_intra) {
1819         i = 1; // skip clipping of intra dc
1820     } else
1821         i = 0;
1822
1823     for (; i <= last_index; i++) {
1824         const int j = s->intra_scantable.permutated[i];
1825         int level = block[j];
1826
1827         if (level > maxlevel) {
1828             level = maxlevel;
1829             overflow++;
1830         } else if (level < minlevel) {
1831             level = minlevel;
1832             overflow++;
1833         }
1834
1835         block[j] = level;
1836     }
1837
1838     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1839         av_log(s->avctx, AV_LOG_INFO,
1840                "warning, clipping %d dct coefficients to %d..%d\n",
1841                overflow, minlevel, maxlevel);
1842 }
1843
1844 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1845 {
1846     int x, y;
1847     // FIXME optimize
1848     for (y = 0; y < 8; y++) {
1849         for (x = 0; x < 8; x++) {
1850             int x2, y2;
1851             int sum = 0;
1852             int sqr = 0;
1853             int count = 0;
1854
1855             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1856                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1857                     int v = ptr[x2 + y2 * stride];
1858                     sum += v;
1859                     sqr += v * v;
1860                     count++;
1861                 }
1862             }
1863             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1864         }
1865     }
1866 }
1867
1868 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1869                                                 int motion_x, int motion_y,
1870                                                 int mb_block_height,
1871                                                 int mb_block_count)
1872 {
1873     int16_t weight[8][64];
1874     int16_t orig[8][64];
1875     const int mb_x = s->mb_x;
1876     const int mb_y = s->mb_y;
1877     int i;
1878     int skip_dct[8];
1879     int dct_offset = s->linesize * 8; // default for progressive frames
1880     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1881     ptrdiff_t wrap_y, wrap_c;
1882
1883     for (i = 0; i < mb_block_count; i++)
1884         skip_dct[i] = s->skipdct;
1885
1886     if (s->adaptive_quant) {
1887         const int last_qp = s->qscale;
1888         const int mb_xy = mb_x + mb_y * s->mb_stride;
1889
1890         s->lambda = s->lambda_table[mb_xy];
1891         update_qscale(s);
1892
1893         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1894             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1895             s->dquant = s->qscale - last_qp;
1896
1897             if (s->out_format == FMT_H263) {
1898                 s->dquant = av_clip(s->dquant, -2, 2);
1899
1900                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1901                     if (!s->mb_intra) {
1902                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1903                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1904                                 s->dquant = 0;
1905                         }
1906                         if (s->mv_type == MV_TYPE_8X8)
1907                             s->dquant = 0;
1908                     }
1909                 }
1910             }
1911         }
1912         ff_set_qscale(s, last_qp + s->dquant);
1913     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1914         ff_set_qscale(s, s->qscale + s->dquant);
1915
1916     wrap_y = s->linesize;
1917     wrap_c = s->uvlinesize;
1918     ptr_y  = s->new_picture.f->data[0] +
1919              (mb_y * 16 * wrap_y)              + mb_x * 16;
1920     ptr_cb = s->new_picture.f->data[1] +
1921              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1922     ptr_cr = s->new_picture.f->data[2] +
1923              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1924
1925     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1926         uint8_t *ebuf = s->edge_emu_buffer + 32;
1927         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1928                                  wrap_y, wrap_y,
1929                                  16, 16, mb_x * 16, mb_y * 16,
1930                                  s->width, s->height);
1931         ptr_y = ebuf;
1932         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1933                                  wrap_c, wrap_c,
1934                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1935                                  s->width >> 1, s->height >> 1);
1936         ptr_cb = ebuf + 18 * wrap_y;
1937         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1938                                  wrap_c, wrap_c,
1939                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1940                                  s->width >> 1, s->height >> 1);
1941         ptr_cr = ebuf + 18 * wrap_y + 8;
1942     }
1943
1944     if (s->mb_intra) {
1945         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1946             int progressive_score, interlaced_score;
1947
1948             s->interlaced_dct = 0;
1949             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
1950                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1951                                                      NULL, wrap_y, 8) - 400;
1952
1953             if (progressive_score > 0) {
1954                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
1955                                                         NULL, wrap_y * 2, 8) +
1956                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
1957                                                         NULL, wrap_y * 2, 8);
1958                 if (progressive_score > interlaced_score) {
1959                     s->interlaced_dct = 1;
1960
1961                     dct_offset = wrap_y;
1962                     wrap_y <<= 1;
1963                     if (s->chroma_format == CHROMA_422)
1964                         wrap_c <<= 1;
1965                 }
1966             }
1967         }
1968
1969         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
1970         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
1971         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
1972         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
1973
1974         if (s->flags & CODEC_FLAG_GRAY) {
1975             skip_dct[4] = 1;
1976             skip_dct[5] = 1;
1977         } else {
1978             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1979             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1980             if (!s->chroma_y_shift) { /* 422 */
1981                 s->pdsp.get_pixels(s->block[6],
1982                                    ptr_cb + (dct_offset >> 1), wrap_c);
1983                 s->pdsp.get_pixels(s->block[7],
1984                                    ptr_cr + (dct_offset >> 1), wrap_c);
1985             }
1986         }
1987     } else {
1988         op_pixels_func (*op_pix)[4];
1989         qpel_mc_func (*op_qpix)[16];
1990         uint8_t *dest_y, *dest_cb, *dest_cr;
1991
1992         dest_y  = s->dest[0];
1993         dest_cb = s->dest[1];
1994         dest_cr = s->dest[2];
1995
1996         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1997             op_pix  = s->hdsp.put_pixels_tab;
1998             op_qpix = s->qdsp.put_qpel_pixels_tab;
1999         } else {
2000             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2001             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2002         }
2003
2004         if (s->mv_dir & MV_DIR_FORWARD) {
2005             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2006                           s->last_picture.f->data,
2007                           op_pix, op_qpix);
2008             op_pix  = s->hdsp.avg_pixels_tab;
2009             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2010         }
2011         if (s->mv_dir & MV_DIR_BACKWARD) {
2012             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2013                           s->next_picture.f->data,
2014                           op_pix, op_qpix);
2015         }
2016
2017         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2018             int progressive_score, interlaced_score;
2019
2020             s->interlaced_dct = 0;
2021             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2022                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2023                                                      ptr_y + wrap_y * 8,
2024                                                      wrap_y, 8) - 400;
2025
2026             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2027                 progressive_score -= 400;
2028
2029             if (progressive_score > 0) {
2030                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2031                                                         wrap_y * 2, 8) +
2032                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2033                                                         ptr_y + wrap_y,
2034                                                         wrap_y * 2, 8);
2035
2036                 if (progressive_score > interlaced_score) {
2037                     s->interlaced_dct = 1;
2038
2039                     dct_offset = wrap_y;
2040                     wrap_y <<= 1;
2041                     if (s->chroma_format == CHROMA_422)
2042                         wrap_c <<= 1;
2043                 }
2044             }
2045         }
2046
2047         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2048         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2049         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2050                             dest_y + dct_offset, wrap_y);
2051         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2052                             dest_y + dct_offset + 8, wrap_y);
2053
2054         if (s->flags & CODEC_FLAG_GRAY) {
2055             skip_dct[4] = 1;
2056             skip_dct[5] = 1;
2057         } else {
2058             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2059             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2060             if (!s->chroma_y_shift) { /* 422 */
2061                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2062                                     dest_cb + (dct_offset >> 1), wrap_c);
2063                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2064                                     dest_cr + (dct_offset >> 1), wrap_c);
2065             }
2066         }
2067         /* pre quantization */
2068         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2069                 2 * s->qscale * s->qscale) {
2070             // FIXME optimize
2071             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2072                 skip_dct[0] = 1;
2073             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2074                 skip_dct[1] = 1;
2075             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2076                                wrap_y, 8) < 20 * s->qscale)
2077                 skip_dct[2] = 1;
2078             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2079                                wrap_y, 8) < 20 * s->qscale)
2080                 skip_dct[3] = 1;
2081             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2082                 skip_dct[4] = 1;
2083             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2084                 skip_dct[5] = 1;
2085             if (!s->chroma_y_shift) { /* 422 */
2086                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2087                                    dest_cb + (dct_offset >> 1),
2088                                    wrap_c, 8) < 20 * s->qscale)
2089                     skip_dct[6] = 1;
2090                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2091                                    dest_cr + (dct_offset >> 1),
2092                                    wrap_c, 8) < 20 * s->qscale)
2093                     skip_dct[7] = 1;
2094             }
2095         }
2096     }
2097
2098     if (s->quantizer_noise_shaping) {
2099         if (!skip_dct[0])
2100             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2101         if (!skip_dct[1])
2102             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2103         if (!skip_dct[2])
2104             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2105         if (!skip_dct[3])
2106             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2107         if (!skip_dct[4])
2108             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2109         if (!skip_dct[5])
2110             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2111         if (!s->chroma_y_shift) { /* 422 */
2112             if (!skip_dct[6])
2113                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2114                                   wrap_c);
2115             if (!skip_dct[7])
2116                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2117                                   wrap_c);
2118         }
2119         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2120     }
2121
2122     /* DCT & quantize */
2123     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2124     {
2125         for (i = 0; i < mb_block_count; i++) {
2126             if (!skip_dct[i]) {
2127                 int overflow;
2128                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2129                 // FIXME we could decide to change to quantizer instead of
2130                 // clipping
2131                 // JS: I don't think that would be a good idea it could lower
2132                 //     quality instead of improve it. Just INTRADC clipping
2133                 //     deserves changes in quantizer
2134                 if (overflow)
2135                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2136             } else
2137                 s->block_last_index[i] = -1;
2138         }
2139         if (s->quantizer_noise_shaping) {
2140             for (i = 0; i < mb_block_count; i++) {
2141                 if (!skip_dct[i]) {
2142                     s->block_last_index[i] =
2143                         dct_quantize_refine(s, s->block[i], weight[i],
2144                                             orig[i], i, s->qscale);
2145                 }
2146             }
2147         }
2148
2149         if (s->luma_elim_threshold && !s->mb_intra)
2150             for (i = 0; i < 4; i++)
2151                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2152         if (s->chroma_elim_threshold && !s->mb_intra)
2153             for (i = 4; i < mb_block_count; i++)
2154                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2155
2156         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2157             for (i = 0; i < mb_block_count; i++) {
2158                 if (s->block_last_index[i] == -1)
2159                     s->coded_score[i] = INT_MAX / 256;
2160             }
2161         }
2162     }
2163
2164     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2165         s->block_last_index[4] =
2166         s->block_last_index[5] = 0;
2167         s->block[4][0] =
2168         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2169     }
2170
2171     // non c quantize code returns incorrect block_last_index FIXME
2172     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2173         for (i = 0; i < mb_block_count; i++) {
2174             int j;
2175             if (s->block_last_index[i] > 0) {
2176                 for (j = 63; j > 0; j--) {
2177                     if (s->block[i][s->intra_scantable.permutated[j]])
2178                         break;
2179                 }
2180                 s->block_last_index[i] = j;
2181             }
2182         }
2183     }
2184
2185     /* huffman encode */
2186     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2187     case AV_CODEC_ID_MPEG1VIDEO:
2188     case AV_CODEC_ID_MPEG2VIDEO:
2189         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2190             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2191         break;
2192     case AV_CODEC_ID_MPEG4:
2193         if (CONFIG_MPEG4_ENCODER)
2194             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2195         break;
2196     case AV_CODEC_ID_MSMPEG4V2:
2197     case AV_CODEC_ID_MSMPEG4V3:
2198     case AV_CODEC_ID_WMV1:
2199         if (CONFIG_MSMPEG4_ENCODER)
2200             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2201         break;
2202     case AV_CODEC_ID_WMV2:
2203         if (CONFIG_WMV2_ENCODER)
2204             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2205         break;
2206     case AV_CODEC_ID_H261:
2207         if (CONFIG_H261_ENCODER)
2208             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2209         break;
2210     case AV_CODEC_ID_H263:
2211     case AV_CODEC_ID_H263P:
2212     case AV_CODEC_ID_FLV1:
2213     case AV_CODEC_ID_RV10:
2214     case AV_CODEC_ID_RV20:
2215         if (CONFIG_H263_ENCODER)
2216             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2217         break;
2218     case AV_CODEC_ID_MJPEG:
2219         if (CONFIG_MJPEG_ENCODER)
2220             ff_mjpeg_encode_mb(s, s->block);
2221         break;
2222     default:
2223         assert(0);
2224     }
2225 }
2226
2227 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2228 {
2229     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2230     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2231 }
2232
2233 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2234     int i;
2235
2236     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2237
2238     /* mpeg1 */
2239     d->mb_skip_run= s->mb_skip_run;
2240     for(i=0; i<3; i++)
2241         d->last_dc[i] = s->last_dc[i];
2242
2243     /* statistics */
2244     d->mv_bits= s->mv_bits;
2245     d->i_tex_bits= s->i_tex_bits;
2246     d->p_tex_bits= s->p_tex_bits;
2247     d->i_count= s->i_count;
2248     d->f_count= s->f_count;
2249     d->b_count= s->b_count;
2250     d->skip_count= s->skip_count;
2251     d->misc_bits= s->misc_bits;
2252     d->last_bits= 0;
2253
2254     d->mb_skipped= 0;
2255     d->qscale= s->qscale;
2256     d->dquant= s->dquant;
2257
2258     d->esc3_level_length= s->esc3_level_length;
2259 }
2260
2261 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2262     int i;
2263
2264     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2265     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2266
2267     /* mpeg1 */
2268     d->mb_skip_run= s->mb_skip_run;
2269     for(i=0; i<3; i++)
2270         d->last_dc[i] = s->last_dc[i];
2271
2272     /* statistics */
2273     d->mv_bits= s->mv_bits;
2274     d->i_tex_bits= s->i_tex_bits;
2275     d->p_tex_bits= s->p_tex_bits;
2276     d->i_count= s->i_count;
2277     d->f_count= s->f_count;
2278     d->b_count= s->b_count;
2279     d->skip_count= s->skip_count;
2280     d->misc_bits= s->misc_bits;
2281
2282     d->mb_intra= s->mb_intra;
2283     d->mb_skipped= s->mb_skipped;
2284     d->mv_type= s->mv_type;
2285     d->mv_dir= s->mv_dir;
2286     d->pb= s->pb;
2287     if(s->data_partitioning){
2288         d->pb2= s->pb2;
2289         d->tex_pb= s->tex_pb;
2290     }
2291     d->block= s->block;
2292     for(i=0; i<8; i++)
2293         d->block_last_index[i]= s->block_last_index[i];
2294     d->interlaced_dct= s->interlaced_dct;
2295     d->qscale= s->qscale;
2296
2297     d->esc3_level_length= s->esc3_level_length;
2298 }
2299
2300 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2301                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2302                            int *dmin, int *next_block, int motion_x, int motion_y)
2303 {
2304     int score;
2305     uint8_t *dest_backup[3];
2306
2307     copy_context_before_encode(s, backup, type);
2308
2309     s->block= s->blocks[*next_block];
2310     s->pb= pb[*next_block];
2311     if(s->data_partitioning){
2312         s->pb2   = pb2   [*next_block];
2313         s->tex_pb= tex_pb[*next_block];
2314     }
2315
2316     if(*next_block){
2317         memcpy(dest_backup, s->dest, sizeof(s->dest));
2318         s->dest[0] = s->rd_scratchpad;
2319         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2320         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2321         assert(s->linesize >= 32); //FIXME
2322     }
2323
2324     encode_mb(s, motion_x, motion_y);
2325
2326     score= put_bits_count(&s->pb);
2327     if(s->data_partitioning){
2328         score+= put_bits_count(&s->pb2);
2329         score+= put_bits_count(&s->tex_pb);
2330     }
2331
2332     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2333         ff_mpv_decode_mb(s, s->block);
2334
2335         score *= s->lambda2;
2336         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2337     }
2338
2339     if(*next_block){
2340         memcpy(s->dest, dest_backup, sizeof(s->dest));
2341     }
2342
2343     if(score<*dmin){
2344         *dmin= score;
2345         *next_block^=1;
2346
2347         copy_context_after_encode(best, s, type);
2348     }
2349 }
2350
2351 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2352     uint32_t *sq = ff_square_tab + 256;
2353     int acc=0;
2354     int x,y;
2355
2356     if(w==16 && h==16)
2357         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2358     else if(w==8 && h==8)
2359         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2360
2361     for(y=0; y<h; y++){
2362         for(x=0; x<w; x++){
2363             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2364         }
2365     }
2366
2367     assert(acc>=0);
2368
2369     return acc;
2370 }
2371
2372 static int sse_mb(MpegEncContext *s){
2373     int w= 16;
2374     int h= 16;
2375
2376     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2377     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2378
2379     if(w==16 && h==16)
2380       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2381         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2382                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2383                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2384       }else{
2385         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2386                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2387                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2388       }
2389     else
2390         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2391                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2392                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2393 }
2394
2395 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2396     MpegEncContext *s= *(void**)arg;
2397
2398
2399     s->me.pre_pass=1;
2400     s->me.dia_size= s->avctx->pre_dia_size;
2401     s->first_slice_line=1;
2402     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2403         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2404             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2405         }
2406         s->first_slice_line=0;
2407     }
2408
2409     s->me.pre_pass=0;
2410
2411     return 0;
2412 }
2413
2414 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2415     MpegEncContext *s= *(void**)arg;
2416
2417     s->me.dia_size= s->avctx->dia_size;
2418     s->first_slice_line=1;
2419     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2420         s->mb_x=0; //for block init below
2421         ff_init_block_index(s);
2422         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2423             s->block_index[0]+=2;
2424             s->block_index[1]+=2;
2425             s->block_index[2]+=2;
2426             s->block_index[3]+=2;
2427
2428             /* compute motion vector & mb_type and store in context */
2429             if(s->pict_type==AV_PICTURE_TYPE_B)
2430                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2431             else
2432                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2433         }
2434         s->first_slice_line=0;
2435     }
2436     return 0;
2437 }
2438
2439 static int mb_var_thread(AVCodecContext *c, void *arg){
2440     MpegEncContext *s= *(void**)arg;
2441     int mb_x, mb_y;
2442
2443     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2444         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2445             int xx = mb_x * 16;
2446             int yy = mb_y * 16;
2447             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2448             int varc;
2449             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2450
2451             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2452                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2453
2454             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2455             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2456             s->me.mb_var_sum_temp    += varc;
2457         }
2458     }
2459     return 0;
2460 }
2461
2462 static void write_slice_end(MpegEncContext *s){
2463     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2464         if(s->partitioned_frame){
2465             ff_mpeg4_merge_partitions(s);
2466         }
2467
2468         ff_mpeg4_stuffing(&s->pb);
2469     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2470         ff_mjpeg_encode_stuffing(&s->pb);
2471     }
2472
2473     avpriv_align_put_bits(&s->pb);
2474     flush_put_bits(&s->pb);
2475
2476     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2477         s->misc_bits+= get_bits_diff(s);
2478 }
2479
2480 static void write_mb_info(MpegEncContext *s)
2481 {
2482     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2483     int offset = put_bits_count(&s->pb);
2484     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2485     int gobn = s->mb_y / s->gob_index;
2486     int pred_x, pred_y;
2487     if (CONFIG_H263_ENCODER)
2488         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2489     bytestream_put_le32(&ptr, offset);
2490     bytestream_put_byte(&ptr, s->qscale);
2491     bytestream_put_byte(&ptr, gobn);
2492     bytestream_put_le16(&ptr, mba);
2493     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2494     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2495     /* 4MV not implemented */
2496     bytestream_put_byte(&ptr, 0); /* hmv2 */
2497     bytestream_put_byte(&ptr, 0); /* vmv2 */
2498 }
2499
2500 static void update_mb_info(MpegEncContext *s, int startcode)
2501 {
2502     if (!s->mb_info)
2503         return;
2504     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2505         s->mb_info_size += 12;
2506         s->prev_mb_info = s->last_mb_info;
2507     }
2508     if (startcode) {
2509         s->prev_mb_info = put_bits_count(&s->pb)/8;
2510         /* This might have incremented mb_info_size above, and we return without
2511          * actually writing any info into that slot yet. But in that case,
2512          * this will be called again at the start of the after writing the
2513          * start code, actually writing the mb info. */
2514         return;
2515     }
2516
2517     s->last_mb_info = put_bits_count(&s->pb)/8;
2518     if (!s->mb_info_size)
2519         s->mb_info_size += 12;
2520     write_mb_info(s);
2521 }
2522
2523 static int encode_thread(AVCodecContext *c, void *arg){
2524     MpegEncContext *s= *(void**)arg;
2525     int mb_x, mb_y, pdif = 0;
2526     int chr_h= 16>>s->chroma_y_shift;
2527     int i, j;
2528     MpegEncContext best_s, backup_s;
2529     uint8_t bit_buf[2][MAX_MB_BYTES];
2530     uint8_t bit_buf2[2][MAX_MB_BYTES];
2531     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2532     PutBitContext pb[2], pb2[2], tex_pb[2];
2533
2534     for(i=0; i<2; i++){
2535         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2536         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2537         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2538     }
2539
2540     s->last_bits= put_bits_count(&s->pb);
2541     s->mv_bits=0;
2542     s->misc_bits=0;
2543     s->i_tex_bits=0;
2544     s->p_tex_bits=0;
2545     s->i_count=0;
2546     s->f_count=0;
2547     s->b_count=0;
2548     s->skip_count=0;
2549
2550     for(i=0; i<3; i++){
2551         /* init last dc values */
2552         /* note: quant matrix value (8) is implied here */
2553         s->last_dc[i] = 128 << s->intra_dc_precision;
2554
2555         s->current_picture.f->error[i] = 0;
2556     }
2557     s->mb_skip_run = 0;
2558     memset(s->last_mv, 0, sizeof(s->last_mv));
2559
2560     s->last_mv_dir = 0;
2561
2562     switch(s->codec_id){
2563     case AV_CODEC_ID_H263:
2564     case AV_CODEC_ID_H263P:
2565     case AV_CODEC_ID_FLV1:
2566         if (CONFIG_H263_ENCODER)
2567             s->gob_index = ff_h263_get_gob_height(s);
2568         break;
2569     case AV_CODEC_ID_MPEG4:
2570         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2571             ff_mpeg4_init_partitions(s);
2572         break;
2573     }
2574
2575     s->resync_mb_x=0;
2576     s->resync_mb_y=0;
2577     s->first_slice_line = 1;
2578     s->ptr_lastgob = s->pb.buf;
2579     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2580         s->mb_x=0;
2581         s->mb_y= mb_y;
2582
2583         ff_set_qscale(s, s->qscale);
2584         ff_init_block_index(s);
2585
2586         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2587             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2588             int mb_type= s->mb_type[xy];
2589 //            int d;
2590             int dmin= INT_MAX;
2591             int dir;
2592
2593             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2594                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2595                 return -1;
2596             }
2597             if(s->data_partitioning){
2598                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2599                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2600                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2601                     return -1;
2602                 }
2603             }
2604
2605             s->mb_x = mb_x;
2606             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2607             ff_update_block_index(s);
2608
2609             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2610                 ff_h261_reorder_mb_index(s);
2611                 xy= s->mb_y*s->mb_stride + s->mb_x;
2612                 mb_type= s->mb_type[xy];
2613             }
2614
2615             /* write gob / video packet header  */
2616             if(s->rtp_mode){
2617                 int current_packet_size, is_gob_start;
2618
2619                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2620
2621                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2622
2623                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2624
2625                 switch(s->codec_id){
2626                 case AV_CODEC_ID_H263:
2627                 case AV_CODEC_ID_H263P:
2628                     if(!s->h263_slice_structured)
2629                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2630                     break;
2631                 case AV_CODEC_ID_MPEG2VIDEO:
2632                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2633                 case AV_CODEC_ID_MPEG1VIDEO:
2634                     if(s->mb_skip_run) is_gob_start=0;
2635                     break;
2636                 }
2637
2638                 if(is_gob_start){
2639                     if(s->start_mb_y != mb_y || mb_x!=0){
2640                         write_slice_end(s);
2641
2642                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2643                             ff_mpeg4_init_partitions(s);
2644                         }
2645                     }
2646
2647                     assert((put_bits_count(&s->pb)&7) == 0);
2648                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2649
2650                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2651                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2652                         int d = 100 / s->error_rate;
2653                         if(r % d == 0){
2654                             current_packet_size=0;
2655                             s->pb.buf_ptr= s->ptr_lastgob;
2656                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2657                         }
2658                     }
2659
2660                     if (s->avctx->rtp_callback){
2661                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2662                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2663                     }
2664                     update_mb_info(s, 1);
2665
2666                     switch(s->codec_id){
2667                     case AV_CODEC_ID_MPEG4:
2668                         if (CONFIG_MPEG4_ENCODER) {
2669                             ff_mpeg4_encode_video_packet_header(s);
2670                             ff_mpeg4_clean_buffers(s);
2671                         }
2672                     break;
2673                     case AV_CODEC_ID_MPEG1VIDEO:
2674                     case AV_CODEC_ID_MPEG2VIDEO:
2675                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2676                             ff_mpeg1_encode_slice_header(s);
2677                             ff_mpeg1_clean_buffers(s);
2678                         }
2679                     break;
2680                     case AV_CODEC_ID_H263:
2681                     case AV_CODEC_ID_H263P:
2682                         if (CONFIG_H263_ENCODER)
2683                             ff_h263_encode_gob_header(s, mb_y);
2684                     break;
2685                     }
2686
2687                     if(s->flags&CODEC_FLAG_PASS1){
2688                         int bits= put_bits_count(&s->pb);
2689                         s->misc_bits+= bits - s->last_bits;
2690                         s->last_bits= bits;
2691                     }
2692
2693                     s->ptr_lastgob += current_packet_size;
2694                     s->first_slice_line=1;
2695                     s->resync_mb_x=mb_x;
2696                     s->resync_mb_y=mb_y;
2697                 }
2698             }
2699
2700             if(  (s->resync_mb_x   == s->mb_x)
2701                && s->resync_mb_y+1 == s->mb_y){
2702                 s->first_slice_line=0;
2703             }
2704
2705             s->mb_skipped=0;
2706             s->dquant=0; //only for QP_RD
2707
2708             update_mb_info(s, 0);
2709
2710             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2711                 int next_block=0;
2712                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2713
2714                 copy_context_before_encode(&backup_s, s, -1);
2715                 backup_s.pb= s->pb;
2716                 best_s.data_partitioning= s->data_partitioning;
2717                 best_s.partitioned_frame= s->partitioned_frame;
2718                 if(s->data_partitioning){
2719                     backup_s.pb2= s->pb2;
2720                     backup_s.tex_pb= s->tex_pb;
2721                 }
2722
2723                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2724                     s->mv_dir = MV_DIR_FORWARD;
2725                     s->mv_type = MV_TYPE_16X16;
2726                     s->mb_intra= 0;
2727                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2728                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2729                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2730                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2731                 }
2732                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2733                     s->mv_dir = MV_DIR_FORWARD;
2734                     s->mv_type = MV_TYPE_FIELD;
2735                     s->mb_intra= 0;
2736                     for(i=0; i<2; i++){
2737                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2738                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2739                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2740                     }
2741                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2742                                  &dmin, &next_block, 0, 0);
2743                 }
2744                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2745                     s->mv_dir = MV_DIR_FORWARD;
2746                     s->mv_type = MV_TYPE_16X16;
2747                     s->mb_intra= 0;
2748                     s->mv[0][0][0] = 0;
2749                     s->mv[0][0][1] = 0;
2750                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2751                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2752                 }
2753                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2754                     s->mv_dir = MV_DIR_FORWARD;
2755                     s->mv_type = MV_TYPE_8X8;
2756                     s->mb_intra= 0;
2757                     for(i=0; i<4; i++){
2758                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2759                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2760                     }
2761                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2762                                  &dmin, &next_block, 0, 0);
2763                 }
2764                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2765                     s->mv_dir = MV_DIR_FORWARD;
2766                     s->mv_type = MV_TYPE_16X16;
2767                     s->mb_intra= 0;
2768                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2769                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2770                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2771                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2772                 }
2773                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2774                     s->mv_dir = MV_DIR_BACKWARD;
2775                     s->mv_type = MV_TYPE_16X16;
2776                     s->mb_intra= 0;
2777                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2778                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2779                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2780                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2781                 }
2782                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2783                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2784                     s->mv_type = MV_TYPE_16X16;
2785                     s->mb_intra= 0;
2786                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2787                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2788                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2789                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2790                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2791                                  &dmin, &next_block, 0, 0);
2792                 }
2793                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2794                     s->mv_dir = MV_DIR_FORWARD;
2795                     s->mv_type = MV_TYPE_FIELD;
2796                     s->mb_intra= 0;
2797                     for(i=0; i<2; i++){
2798                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2799                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2800                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2801                     }
2802                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2803                                  &dmin, &next_block, 0, 0);
2804                 }
2805                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2806                     s->mv_dir = MV_DIR_BACKWARD;
2807                     s->mv_type = MV_TYPE_FIELD;
2808                     s->mb_intra= 0;
2809                     for(i=0; i<2; i++){
2810                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2811                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2812                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2813                     }
2814                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2815                                  &dmin, &next_block, 0, 0);
2816                 }
2817                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2818                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2819                     s->mv_type = MV_TYPE_FIELD;
2820                     s->mb_intra= 0;
2821                     for(dir=0; dir<2; dir++){
2822                         for(i=0; i<2; i++){
2823                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2824                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2825                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2826                         }
2827                     }
2828                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2829                                  &dmin, &next_block, 0, 0);
2830                 }
2831                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2832                     s->mv_dir = 0;
2833                     s->mv_type = MV_TYPE_16X16;
2834                     s->mb_intra= 1;
2835                     s->mv[0][0][0] = 0;
2836                     s->mv[0][0][1] = 0;
2837                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2838                                  &dmin, &next_block, 0, 0);
2839                     if(s->h263_pred || s->h263_aic){
2840                         if(best_s.mb_intra)
2841                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2842                         else
2843                             ff_clean_intra_table_entries(s); //old mode?
2844                     }
2845                 }
2846
2847                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2848                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2849                         const int last_qp= backup_s.qscale;
2850                         int qpi, qp, dc[6];
2851                         int16_t ac[6][16];
2852                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2853                         static const int dquant_tab[4]={-1,1,-2,2};
2854
2855                         assert(backup_s.dquant == 0);
2856
2857                         //FIXME intra
2858                         s->mv_dir= best_s.mv_dir;
2859                         s->mv_type = MV_TYPE_16X16;
2860                         s->mb_intra= best_s.mb_intra;
2861                         s->mv[0][0][0] = best_s.mv[0][0][0];
2862                         s->mv[0][0][1] = best_s.mv[0][0][1];
2863                         s->mv[1][0][0] = best_s.mv[1][0][0];
2864                         s->mv[1][0][1] = best_s.mv[1][0][1];
2865
2866                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2867                         for(; qpi<4; qpi++){
2868                             int dquant= dquant_tab[qpi];
2869                             qp= last_qp + dquant;
2870                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2871                                 continue;
2872                             backup_s.dquant= dquant;
2873                             if(s->mb_intra && s->dc_val[0]){
2874                                 for(i=0; i<6; i++){
2875                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2876                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2877                                 }
2878                             }
2879
2880                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2881                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2882                             if(best_s.qscale != qp){
2883                                 if(s->mb_intra && s->dc_val[0]){
2884                                     for(i=0; i<6; i++){
2885                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2886                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2887                                     }
2888                                 }
2889                             }
2890                         }
2891                     }
2892                 }
2893                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2894                     int mx= s->b_direct_mv_table[xy][0];
2895                     int my= s->b_direct_mv_table[xy][1];
2896
2897                     backup_s.dquant = 0;
2898                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2899                     s->mb_intra= 0;
2900                     ff_mpeg4_set_direct_mv(s, mx, my);
2901                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2902                                  &dmin, &next_block, mx, my);
2903                 }
2904                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2905                     backup_s.dquant = 0;
2906                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2907                     s->mb_intra= 0;
2908                     ff_mpeg4_set_direct_mv(s, 0, 0);
2909                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2910                                  &dmin, &next_block, 0, 0);
2911                 }
2912                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2913                     int coded=0;
2914                     for(i=0; i<6; i++)
2915                         coded |= s->block_last_index[i];
2916                     if(coded){
2917                         int mx,my;
2918                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2919                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2920                             mx=my=0; //FIXME find the one we actually used
2921                             ff_mpeg4_set_direct_mv(s, mx, my);
2922                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2923                             mx= s->mv[1][0][0];
2924                             my= s->mv[1][0][1];
2925                         }else{
2926                             mx= s->mv[0][0][0];
2927                             my= s->mv[0][0][1];
2928                         }
2929
2930                         s->mv_dir= best_s.mv_dir;
2931                         s->mv_type = best_s.mv_type;
2932                         s->mb_intra= 0;
2933 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2934                         s->mv[0][0][1] = best_s.mv[0][0][1];
2935                         s->mv[1][0][0] = best_s.mv[1][0][0];
2936                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2937                         backup_s.dquant= 0;
2938                         s->skipdct=1;
2939                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2940                                         &dmin, &next_block, mx, my);
2941                         s->skipdct=0;
2942                     }
2943                 }
2944
2945                 s->current_picture.qscale_table[xy] = best_s.qscale;
2946
2947                 copy_context_after_encode(s, &best_s, -1);
2948
2949                 pb_bits_count= put_bits_count(&s->pb);
2950                 flush_put_bits(&s->pb);
2951                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2952                 s->pb= backup_s.pb;
2953
2954                 if(s->data_partitioning){
2955                     pb2_bits_count= put_bits_count(&s->pb2);
2956                     flush_put_bits(&s->pb2);
2957                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2958                     s->pb2= backup_s.pb2;
2959
2960                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2961                     flush_put_bits(&s->tex_pb);
2962                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2963                     s->tex_pb= backup_s.tex_pb;
2964                 }
2965                 s->last_bits= put_bits_count(&s->pb);
2966
2967                 if (CONFIG_H263_ENCODER &&
2968                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2969                     ff_h263_update_motion_val(s);
2970
2971                 if(next_block==0){ //FIXME 16 vs linesize16
2972                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2973                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2974                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2975                 }
2976
2977                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2978                     ff_mpv_decode_mb(s, s->block);
2979             } else {
2980                 int motion_x = 0, motion_y = 0;
2981                 s->mv_type=MV_TYPE_16X16;
2982                 // only one MB-Type possible
2983
2984                 switch(mb_type){
2985                 case CANDIDATE_MB_TYPE_INTRA:
2986                     s->mv_dir = 0;
2987                     s->mb_intra= 1;
2988                     motion_x= s->mv[0][0][0] = 0;
2989                     motion_y= s->mv[0][0][1] = 0;
2990                     break;
2991                 case CANDIDATE_MB_TYPE_INTER:
2992                     s->mv_dir = MV_DIR_FORWARD;
2993                     s->mb_intra= 0;
2994                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2995                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2996                     break;
2997                 case CANDIDATE_MB_TYPE_INTER_I:
2998                     s->mv_dir = MV_DIR_FORWARD;
2999                     s->mv_type = MV_TYPE_FIELD;
3000                     s->mb_intra= 0;
3001                     for(i=0; i<2; i++){
3002                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3003                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3004                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3005                     }
3006                     break;
3007                 case CANDIDATE_MB_TYPE_INTER4V:
3008                     s->mv_dir = MV_DIR_FORWARD;
3009                     s->mv_type = MV_TYPE_8X8;
3010                     s->mb_intra= 0;
3011                     for(i=0; i<4; i++){
3012                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3013                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3014                     }
3015                     break;
3016                 case CANDIDATE_MB_TYPE_DIRECT:
3017                     if (CONFIG_MPEG4_ENCODER) {
3018                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3019                         s->mb_intra= 0;
3020                         motion_x=s->b_direct_mv_table[xy][0];
3021                         motion_y=s->b_direct_mv_table[xy][1];
3022                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3023                     }
3024                     break;
3025                 case CANDIDATE_MB_TYPE_DIRECT0:
3026                     if (CONFIG_MPEG4_ENCODER) {
3027                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3028                         s->mb_intra= 0;
3029                         ff_mpeg4_set_direct_mv(s, 0, 0);
3030                     }
3031                     break;
3032                 case CANDIDATE_MB_TYPE_BIDIR:
3033                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3034                     s->mb_intra= 0;
3035                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3036                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3037                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3038                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3039                     break;
3040                 case CANDIDATE_MB_TYPE_BACKWARD:
3041                     s->mv_dir = MV_DIR_BACKWARD;
3042                     s->mb_intra= 0;
3043                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3044                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3045                     break;
3046                 case CANDIDATE_MB_TYPE_FORWARD:
3047                     s->mv_dir = MV_DIR_FORWARD;
3048                     s->mb_intra= 0;
3049                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3050                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3051                     break;
3052                 case CANDIDATE_MB_TYPE_FORWARD_I:
3053                     s->mv_dir = MV_DIR_FORWARD;
3054                     s->mv_type = MV_TYPE_FIELD;
3055                     s->mb_intra= 0;
3056                     for(i=0; i<2; i++){
3057                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3058                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3059                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3060                     }
3061                     break;
3062                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3063                     s->mv_dir = MV_DIR_BACKWARD;
3064                     s->mv_type = MV_TYPE_FIELD;
3065                     s->mb_intra= 0;
3066                     for(i=0; i<2; i++){
3067                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3068                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3069                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3070                     }
3071                     break;
3072                 case CANDIDATE_MB_TYPE_BIDIR_I:
3073                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3074                     s->mv_type = MV_TYPE_FIELD;
3075                     s->mb_intra= 0;
3076                     for(dir=0; dir<2; dir++){
3077                         for(i=0; i<2; i++){
3078                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3079                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3080                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3081                         }
3082                     }
3083                     break;
3084                 default:
3085                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3086                 }
3087
3088                 encode_mb(s, motion_x, motion_y);
3089
3090                 // RAL: Update last macroblock type
3091                 s->last_mv_dir = s->mv_dir;
3092
3093                 if (CONFIG_H263_ENCODER &&
3094                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3095                     ff_h263_update_motion_val(s);
3096
3097                 ff_mpv_decode_mb(s, s->block);
3098             }
3099
3100             /* clean the MV table in IPS frames for direct mode in B frames */
3101             if(s->mb_intra /* && I,P,S_TYPE */){
3102                 s->p_mv_table[xy][0]=0;
3103                 s->p_mv_table[xy][1]=0;
3104             }
3105
3106             if(s->flags&CODEC_FLAG_PSNR){
3107                 int w= 16;
3108                 int h= 16;
3109
3110                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3111                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3112
3113                 s->current_picture.f->error[0] += sse(
3114                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3115                     s->dest[0], w, h, s->linesize);
3116                 s->current_picture.f->error[1] += sse(
3117                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3118                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3119                 s->current_picture.f->error[2] += sse(
3120                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3121                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3122             }
3123             if(s->loop_filter){
3124                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3125                     ff_h263_loop_filter(s);
3126             }
3127             av_dlog(s->avctx, "MB %d %d bits\n",
3128                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3129         }
3130     }
3131
3132     //not beautiful here but we must write it before flushing so it has to be here
3133     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3134         ff_msmpeg4_encode_ext_header(s);
3135
3136     write_slice_end(s);
3137
3138     /* Send the last GOB if RTP */
3139     if (s->avctx->rtp_callback) {
3140         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3141         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3142         /* Call the RTP callback to send the last GOB */
3143         emms_c();
3144         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3145     }
3146
3147     return 0;
3148 }
3149
3150 #define MERGE(field) dst->field += src->field; src->field=0
3151 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3152     MERGE(me.scene_change_score);
3153     MERGE(me.mc_mb_var_sum_temp);
3154     MERGE(me.mb_var_sum_temp);
3155 }
3156
3157 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3158     int i;
3159
3160     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3161     MERGE(dct_count[1]);
3162     MERGE(mv_bits);
3163     MERGE(i_tex_bits);
3164     MERGE(p_tex_bits);
3165     MERGE(i_count);
3166     MERGE(f_count);
3167     MERGE(b_count);
3168     MERGE(skip_count);
3169     MERGE(misc_bits);
3170     MERGE(er.error_count);
3171     MERGE(padding_bug_score);
3172     MERGE(current_picture.f->error[0]);
3173     MERGE(current_picture.f->error[1]);
3174     MERGE(current_picture.f->error[2]);
3175
3176     if(dst->avctx->noise_reduction){
3177         for(i=0; i<64; i++){
3178             MERGE(dct_error_sum[0][i]);
3179             MERGE(dct_error_sum[1][i]);
3180         }
3181     }
3182
3183     assert(put_bits_count(&src->pb) % 8 ==0);
3184     assert(put_bits_count(&dst->pb) % 8 ==0);
3185     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3186     flush_put_bits(&dst->pb);
3187 }
3188
3189 static int estimate_qp(MpegEncContext *s, int dry_run){
3190     if (s->next_lambda){
3191         s->current_picture_ptr->f->quality =
3192         s->current_picture.f->quality = s->next_lambda;
3193         if(!dry_run) s->next_lambda= 0;
3194     } else if (!s->fixed_qscale) {
3195         s->current_picture_ptr->f->quality =
3196         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3197         if (s->current_picture.f->quality < 0)
3198             return -1;
3199     }
3200
3201     if(s->adaptive_quant){
3202         switch(s->codec_id){
3203         case AV_CODEC_ID_MPEG4:
3204             if (CONFIG_MPEG4_ENCODER)
3205                 ff_clean_mpeg4_qscales(s);
3206             break;
3207         case AV_CODEC_ID_H263:
3208         case AV_CODEC_ID_H263P:
3209         case AV_CODEC_ID_FLV1:
3210             if (CONFIG_H263_ENCODER)
3211                 ff_clean_h263_qscales(s);
3212             break;
3213         default:
3214             ff_init_qscale_tab(s);
3215         }
3216
3217         s->lambda= s->lambda_table[0];
3218         //FIXME broken
3219     }else
3220         s->lambda = s->current_picture.f->quality;
3221     update_qscale(s);
3222     return 0;
3223 }
3224
3225 /* must be called before writing the header */
3226 static void set_frame_distances(MpegEncContext * s){
3227     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3228     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3229
3230     if(s->pict_type==AV_PICTURE_TYPE_B){
3231         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3232         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3233     }else{
3234         s->pp_time= s->time - s->last_non_b_time;
3235         s->last_non_b_time= s->time;
3236         assert(s->picture_number==0 || s->pp_time > 0);
3237     }
3238 }
3239
3240 static int encode_picture(MpegEncContext *s, int picture_number)
3241 {
3242     int i, ret;
3243     int bits;
3244     int context_count = s->slice_context_count;
3245
3246     s->picture_number = picture_number;
3247
3248     /* Reset the average MB variance */
3249     s->me.mb_var_sum_temp    =
3250     s->me.mc_mb_var_sum_temp = 0;
3251
3252     /* we need to initialize some time vars before we can encode b-frames */
3253     // RAL: Condition added for MPEG1VIDEO
3254     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3255         set_frame_distances(s);
3256     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3257         ff_set_mpeg4_time(s);
3258
3259     s->me.scene_change_score=0;
3260
3261 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3262
3263     if(s->pict_type==AV_PICTURE_TYPE_I){
3264         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3265         else                        s->no_rounding=0;
3266     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3267         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3268             s->no_rounding ^= 1;
3269     }
3270
3271     if(s->flags & CODEC_FLAG_PASS2){
3272         if (estimate_qp(s,1) < 0)
3273             return -1;
3274         ff_get_2pass_fcode(s);
3275     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3276         if(s->pict_type==AV_PICTURE_TYPE_B)
3277             s->lambda= s->last_lambda_for[s->pict_type];
3278         else
3279             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3280         update_qscale(s);
3281     }
3282
3283     s->mb_intra=0; //for the rate distortion & bit compare functions
3284     for(i=1; i<context_count; i++){
3285         ret = ff_update_duplicate_context(s->thread_context[i], s);
3286         if (ret < 0)
3287             return ret;
3288     }
3289
3290     if(ff_init_me(s)<0)
3291         return -1;
3292
3293     /* Estimate motion for every MB */
3294     if(s->pict_type != AV_PICTURE_TYPE_I){
3295         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3296         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3297         if (s->pict_type != AV_PICTURE_TYPE_B) {
3298             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3299                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3300             }
3301         }
3302
3303         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3304     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3305         /* I-Frame */
3306         for(i=0; i<s->mb_stride*s->mb_height; i++)
3307             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3308
3309         if(!s->fixed_qscale){
3310             /* finding spatial complexity for I-frame rate control */
3311             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3312         }
3313     }
3314     for(i=1; i<context_count; i++){
3315         merge_context_after_me(s, s->thread_context[i]);
3316     }
3317     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3318     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3319     emms_c();
3320
3321     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3322         s->pict_type= AV_PICTURE_TYPE_I;
3323         for(i=0; i<s->mb_stride*s->mb_height; i++)
3324             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3325         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3326                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3327     }
3328
3329     if(!s->umvplus){
3330         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3331             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3332
3333             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3334                 int a,b;
3335                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3336                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3337                 s->f_code= FFMAX3(s->f_code, a, b);
3338             }
3339
3340             ff_fix_long_p_mvs(s);
3341             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3342             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3343                 int j;
3344                 for(i=0; i<2; i++){
3345                     for(j=0; j<2; j++)
3346                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3347                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3348                 }
3349             }
3350         }
3351
3352         if(s->pict_type==AV_PICTURE_TYPE_B){
3353             int a, b;
3354
3355             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3356             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3357             s->f_code = FFMAX(a, b);
3358
3359             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3360             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3361             s->b_code = FFMAX(a, b);
3362
3363             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3364             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3365             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3366             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3367             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3368                 int dir, j;
3369                 for(dir=0; dir<2; dir++){
3370                     for(i=0; i<2; i++){
3371                         for(j=0; j<2; j++){
3372                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3373                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3374                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3375                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3376                         }
3377                     }
3378                 }
3379             }
3380         }
3381     }
3382
3383     if (estimate_qp(s, 0) < 0)
3384         return -1;
3385
3386     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3387         s->qscale= 3; //reduce clipping problems
3388
3389     if (s->out_format == FMT_MJPEG) {
3390         /* for mjpeg, we do include qscale in the matrix */
3391         for(i=1;i<64;i++){
3392             int j = s->idsp.idct_permutation[i];
3393
3394             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3395         }
3396         s->y_dc_scale_table=
3397         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3398         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3399         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3400                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3401         s->qscale= 8;
3402     }
3403
3404     //FIXME var duplication
3405     s->current_picture_ptr->f->key_frame =
3406     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3407     s->current_picture_ptr->f->pict_type =
3408     s->current_picture.f->pict_type = s->pict_type;
3409
3410     if (s->current_picture.f->key_frame)
3411         s->picture_in_gop_number=0;
3412
3413     s->last_bits= put_bits_count(&s->pb);
3414     switch(s->out_format) {
3415     case FMT_MJPEG:
3416         if (CONFIG_MJPEG_ENCODER)
3417             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3418                                            s->intra_matrix);
3419         break;
3420     case FMT_H261:
3421         if (CONFIG_H261_ENCODER)
3422             ff_h261_encode_picture_header(s, picture_number);
3423         break;
3424     case FMT_H263:
3425         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3426             ff_wmv2_encode_picture_header(s, picture_number);
3427         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3428             ff_msmpeg4_encode_picture_header(s, picture_number);
3429         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3430             ff_mpeg4_encode_picture_header(s, picture_number);
3431         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3432             ff_rv10_encode_picture_header(s, picture_number);
3433         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3434             ff_rv20_encode_picture_header(s, picture_number);
3435         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3436             ff_flv_encode_picture_header(s, picture_number);
3437         else if (CONFIG_H263_ENCODER)
3438             ff_h263_encode_picture_header(s, picture_number);
3439         break;
3440     case FMT_MPEG1:
3441         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3442             ff_mpeg1_encode_picture_header(s, picture_number);
3443         break;
3444     default:
3445         assert(0);
3446     }
3447     bits= put_bits_count(&s->pb);
3448     s->header_bits= bits - s->last_bits;
3449
3450     for(i=1; i<context_count; i++){
3451         update_duplicate_context_after_me(s->thread_context[i], s);
3452     }
3453     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3454     for(i=1; i<context_count; i++){
3455         merge_context_after_encode(s, s->thread_context[i]);
3456     }
3457     emms_c();
3458     return 0;
3459 }
3460
3461 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3462     const int intra= s->mb_intra;
3463     int i;
3464
3465     s->dct_count[intra]++;
3466
3467     for(i=0; i<64; i++){
3468         int level= block[i];
3469
3470         if(level){
3471             if(level>0){
3472                 s->dct_error_sum[intra][i] += level;
3473                 level -= s->dct_offset[intra][i];
3474                 if(level<0) level=0;
3475             }else{
3476                 s->dct_error_sum[intra][i] -= level;
3477                 level += s->dct_offset[intra][i];
3478                 if(level>0) level=0;
3479             }
3480             block[i]= level;
3481         }
3482     }
3483 }
3484
3485 static int dct_quantize_trellis_c(MpegEncContext *s,
3486                                   int16_t *block, int n,
3487                                   int qscale, int *overflow){
3488     const int *qmat;
3489     const uint8_t *scantable= s->intra_scantable.scantable;
3490     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3491     int max=0;
3492     unsigned int threshold1, threshold2;
3493     int bias=0;
3494     int run_tab[65];
3495     int level_tab[65];
3496     int score_tab[65];
3497     int survivor[65];
3498     int survivor_count;
3499     int last_run=0;
3500     int last_level=0;
3501     int last_score= 0;
3502     int last_i;
3503     int coeff[2][64];
3504     int coeff_count[64];
3505     int qmul, qadd, start_i, last_non_zero, i, dc;
3506     const int esc_length= s->ac_esc_length;
3507     uint8_t * length;
3508     uint8_t * last_length;
3509     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3510
3511     s->fdsp.fdct(block);
3512
3513     if(s->dct_error_sum)
3514         s->denoise_dct(s, block);
3515     qmul= qscale*16;
3516     qadd= ((qscale-1)|1)*8;
3517
3518     if (s->mb_intra) {
3519         int q;
3520         if (!s->h263_aic) {
3521             if (n < 4)
3522                 q = s->y_dc_scale;
3523             else
3524                 q = s->c_dc_scale;
3525             q = q << 3;
3526         } else{
3527             /* For AIC we skip quant/dequant of INTRADC */
3528             q = 1 << 3;
3529             qadd=0;
3530         }
3531
3532         /* note: block[0] is assumed to be positive */
3533         block[0] = (block[0] + (q >> 1)) / q;
3534         start_i = 1;
3535         last_non_zero = 0;
3536         qmat = s->q_intra_matrix[qscale];
3537         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3538             bias= 1<<(QMAT_SHIFT-1);
3539         length     = s->intra_ac_vlc_length;
3540         last_length= s->intra_ac_vlc_last_length;
3541     } else {
3542         start_i = 0;
3543         last_non_zero = -1;
3544         qmat = s->q_inter_matrix[qscale];
3545         length     = s->inter_ac_vlc_length;
3546         last_length= s->inter_ac_vlc_last_length;
3547     }
3548     last_i= start_i;
3549
3550     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3551     threshold2= (threshold1<<1);
3552
3553     for(i=63; i>=start_i; i--) {
3554         const int j = scantable[i];
3555         int level = block[j] * qmat[j];
3556
3557         if(((unsigned)(level+threshold1))>threshold2){
3558             last_non_zero = i;
3559             break;
3560         }
3561     }
3562
3563     for(i=start_i; i<=last_non_zero; i++) {
3564         const int j = scantable[i];
3565         int level = block[j] * qmat[j];
3566
3567 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3568 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3569         if(((unsigned)(level+threshold1))>threshold2){
3570             if(level>0){
3571                 level= (bias + level)>>QMAT_SHIFT;
3572                 coeff[0][i]= level;
3573                 coeff[1][i]= level-1;
3574 //                coeff[2][k]= level-2;
3575             }else{
3576                 level= (bias - level)>>QMAT_SHIFT;
3577                 coeff[0][i]= -level;
3578                 coeff[1][i]= -level+1;
3579 //                coeff[2][k]= -level+2;
3580             }
3581             coeff_count[i]= FFMIN(level, 2);
3582             assert(coeff_count[i]);
3583             max |=level;
3584         }else{
3585             coeff[0][i]= (level>>31)|1;
3586             coeff_count[i]= 1;
3587         }
3588     }
3589
3590     *overflow= s->max_qcoeff < max; //overflow might have happened
3591
3592     if(last_non_zero < start_i){
3593         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3594         return last_non_zero;
3595     }
3596
3597     score_tab[start_i]= 0;
3598     survivor[0]= start_i;
3599     survivor_count= 1;
3600
3601     for(i=start_i; i<=last_non_zero; i++){
3602         int level_index, j, zero_distortion;
3603         int dct_coeff= FFABS(block[ scantable[i] ]);
3604         int best_score=256*256*256*120;
3605
3606         if (s->fdsp.fdct == ff_fdct_ifast)
3607             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3608         zero_distortion= dct_coeff*dct_coeff;
3609
3610         for(level_index=0; level_index < coeff_count[i]; level_index++){
3611             int distortion;
3612             int level= coeff[level_index][i];
3613             const int alevel= FFABS(level);
3614             int unquant_coeff;
3615
3616             assert(level);
3617
3618             if(s->out_format == FMT_H263){
3619                 unquant_coeff= alevel*qmul + qadd;
3620             }else{ //MPEG1
3621                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3622                 if(s->mb_intra){
3623                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3624                         unquant_coeff =   (unquant_coeff - 1) | 1;
3625                 }else{
3626                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3627                         unquant_coeff =   (unquant_coeff - 1) | 1;
3628                 }
3629                 unquant_coeff<<= 3;
3630             }
3631
3632             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3633             level+=64;
3634             if((level&(~127)) == 0){
3635                 for(j=survivor_count-1; j>=0; j--){
3636                     int run= i - survivor[j];
3637                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3638                     score += score_tab[i-run];
3639
3640                     if(score < best_score){
3641                         best_score= score;
3642                         run_tab[i+1]= run;
3643                         level_tab[i+1]= level-64;
3644                     }
3645                 }
3646
3647                 if(s->out_format == FMT_H263){
3648                     for(j=survivor_count-1; j>=0; j--){
3649                         int run= i - survivor[j];
3650                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3651                         score += score_tab[i-run];
3652                         if(score < last_score){
3653                             last_score= score;
3654                             last_run= run;
3655                             last_level= level-64;
3656                             last_i= i+1;
3657                         }
3658                     }
3659                 }
3660             }else{
3661                 distortion += esc_length*lambda;
3662                 for(j=survivor_count-1; j>=0; j--){
3663                     int run= i - survivor[j];
3664                     int score= distortion + score_tab[i-run];
3665
3666                     if(score < best_score){
3667                         best_score= score;
3668                         run_tab[i+1]= run;
3669                         level_tab[i+1]= level-64;
3670                     }
3671                 }
3672
3673                 if(s->out_format == FMT_H263){
3674                   for(j=survivor_count-1; j>=0; j--){
3675                         int run= i - survivor[j];
3676                         int score= distortion + score_tab[i-run];
3677                         if(score < last_score){
3678                             last_score= score;
3679                             last_run= run;
3680                             last_level= level-64;
3681                             last_i= i+1;
3682                         }
3683                     }
3684                 }
3685             }
3686         }
3687
3688         score_tab[i+1]= best_score;
3689
3690         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3691         if(last_non_zero <= 27){
3692             for(; survivor_count; survivor_count--){
3693                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3694                     break;
3695             }
3696         }else{
3697             for(; survivor_count; survivor_count--){
3698                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3699                     break;
3700             }
3701         }
3702
3703         survivor[ survivor_count++ ]= i+1;
3704     }
3705
3706     if(s->out_format != FMT_H263){
3707         last_score= 256*256*256*120;
3708         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3709             int score= score_tab[i];
3710             if(i) score += lambda*2; //FIXME exacter?
3711
3712             if(score < last_score){
3713                 last_score= score;
3714                 last_i= i;
3715                 last_level= level_tab[i];
3716                 last_run= run_tab[i];
3717             }
3718         }
3719     }
3720
3721     s->coded_score[n] = last_score;
3722
3723     dc= FFABS(block[0]);
3724     last_non_zero= last_i - 1;
3725     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3726
3727     if(last_non_zero < start_i)
3728         return last_non_zero;
3729
3730     if(last_non_zero == 0 && start_i == 0){
3731         int best_level= 0;
3732         int best_score= dc * dc;
3733
3734         for(i=0; i<coeff_count[0]; i++){
3735             int level= coeff[i][0];
3736             int alevel= FFABS(level);
3737             int unquant_coeff, score, distortion;
3738
3739             if(s->out_format == FMT_H263){
3740                     unquant_coeff= (alevel*qmul + qadd)>>3;
3741             }else{ //MPEG1
3742                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3743                     unquant_coeff =   (unquant_coeff - 1) | 1;
3744             }
3745             unquant_coeff = (unquant_coeff + 4) >> 3;
3746             unquant_coeff<<= 3 + 3;
3747
3748             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3749             level+=64;
3750             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3751             else                    score= distortion + esc_length*lambda;
3752
3753             if(score < best_score){
3754                 best_score= score;
3755                 best_level= level - 64;
3756             }
3757         }
3758         block[0]= best_level;
3759         s->coded_score[n] = best_score - dc*dc;
3760         if(best_level == 0) return -1;
3761         else                return last_non_zero;
3762     }
3763
3764     i= last_i;
3765     assert(last_level);
3766
3767     block[ perm_scantable[last_non_zero] ]= last_level;
3768     i -= last_run + 1;
3769
3770     for(; i>start_i; i -= run_tab[i] + 1){
3771         block[ perm_scantable[i-1] ]= level_tab[i];
3772     }
3773
3774     return last_non_zero;
3775 }
3776
3777 //#define REFINE_STATS 1
3778 static int16_t basis[64][64];
3779
3780 static void build_basis(uint8_t *perm){
3781     int i, j, x, y;
3782     emms_c();
3783     for(i=0; i<8; i++){
3784         for(j=0; j<8; j++){
3785             for(y=0; y<8; y++){
3786                 for(x=0; x<8; x++){
3787                     double s= 0.25*(1<<BASIS_SHIFT);
3788                     int index= 8*i + j;
3789                     int perm_index= perm[index];
3790                     if(i==0) s*= sqrt(0.5);
3791                     if(j==0) s*= sqrt(0.5);
3792                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3793                 }
3794             }
3795         }
3796     }
3797 }
3798
3799 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3800                         int16_t *block, int16_t *weight, int16_t *orig,
3801                         int n, int qscale){
3802     int16_t rem[64];
3803     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3804     const uint8_t *scantable= s->intra_scantable.scantable;
3805     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3806 //    unsigned int threshold1, threshold2;
3807 //    int bias=0;
3808     int run_tab[65];
3809     int prev_run=0;
3810     int prev_level=0;
3811     int qmul, qadd, start_i, last_non_zero, i, dc;
3812     uint8_t * length;
3813     uint8_t * last_length;
3814     int lambda;
3815     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3816 #ifdef REFINE_STATS
3817 static int count=0;
3818 static int after_last=0;
3819 static int to_zero=0;
3820 static int from_zero=0;
3821 static int raise=0;
3822 static int lower=0;
3823 static int messed_sign=0;
3824 #endif
3825
3826     if(basis[0][0] == 0)
3827         build_basis(s->idsp.idct_permutation);
3828
3829     qmul= qscale*2;
3830     qadd= (qscale-1)|1;
3831     if (s->mb_intra) {
3832         if (!s->h263_aic) {
3833             if (n < 4)
3834                 q = s->y_dc_scale;
3835             else
3836                 q = s->c_dc_scale;
3837         } else{
3838             /* For AIC we skip quant/dequant of INTRADC */
3839             q = 1;
3840             qadd=0;
3841         }
3842         q <<= RECON_SHIFT-3;
3843         /* note: block[0] is assumed to be positive */
3844         dc= block[0]*q;
3845 //        block[0] = (block[0] + (q >> 1)) / q;
3846         start_i = 1;
3847 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3848 //            bias= 1<<(QMAT_SHIFT-1);
3849         length     = s->intra_ac_vlc_length;
3850         last_length= s->intra_ac_vlc_last_length;
3851     } else {
3852         dc= 0;
3853         start_i = 0;
3854         length     = s->inter_ac_vlc_length;
3855         last_length= s->inter_ac_vlc_last_length;
3856     }
3857     last_non_zero = s->block_last_index[n];
3858
3859 #ifdef REFINE_STATS
3860 {START_TIMER
3861 #endif
3862     dc += (1<<(RECON_SHIFT-1));
3863     for(i=0; i<64; i++){
3864         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3865     }
3866 #ifdef REFINE_STATS
3867 STOP_TIMER("memset rem[]")}
3868 #endif
3869     sum=0;
3870     for(i=0; i<64; i++){
3871         int one= 36;
3872         int qns=4;
3873         int w;
3874
3875         w= FFABS(weight[i]) + qns*one;
3876         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3877
3878         weight[i] = w;
3879 //        w=weight[i] = (63*qns + (w/2)) / w;
3880
3881         assert(w>0);
3882         assert(w<(1<<6));
3883         sum += w*w;
3884     }
3885     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3886 #ifdef REFINE_STATS
3887 {START_TIMER
3888 #endif
3889     run=0;
3890     rle_index=0;
3891     for(i=start_i; i<=last_non_zero; i++){
3892         int j= perm_scantable[i];
3893         const int level= block[j];
3894         int coeff;
3895
3896         if(level){
3897             if(level<0) coeff= qmul*level - qadd;
3898             else        coeff= qmul*level + qadd;
3899             run_tab[rle_index++]=run;
3900             run=0;
3901
3902             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
3903         }else{
3904             run++;
3905         }
3906     }
3907 #ifdef REFINE_STATS
3908 if(last_non_zero>0){
3909 STOP_TIMER("init rem[]")
3910 }
3911 }
3912
3913 {START_TIMER
3914 #endif
3915     for(;;){
3916         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
3917         int best_coeff=0;
3918         int best_change=0;
3919         int run2, best_unquant_change=0, analyze_gradient;
3920 #ifdef REFINE_STATS
3921 {START_TIMER
3922 #endif
3923         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3924
3925         if(analyze_gradient){
3926 #ifdef REFINE_STATS
3927 {START_TIMER
3928 #endif
3929             for(i=0; i<64; i++){
3930                 int w= weight[i];
3931
3932                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3933             }
3934 #ifdef REFINE_STATS
3935 STOP_TIMER("rem*w*w")}
3936 {START_TIMER
3937 #endif
3938             s->fdsp.fdct(d1);
3939 #ifdef REFINE_STATS
3940 STOP_TIMER("dct")}
3941 #endif
3942         }
3943
3944         if(start_i){
3945             const int level= block[0];
3946             int change, old_coeff;
3947
3948             assert(s->mb_intra);
3949
3950             old_coeff= q*level;
3951
3952             for(change=-1; change<=1; change+=2){
3953                 int new_level= level + change;
3954                 int score, new_coeff;
3955
3956                 new_coeff= q*new_level;
3957                 if(new_coeff >= 2048 || new_coeff < 0)
3958                     continue;
3959
3960                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
3961                                                   new_coeff - old_coeff);
3962                 if(score<best_score){
3963                     best_score= score;
3964                     best_coeff= 0;
3965                     best_change= change;
3966                     best_unquant_change= new_coeff - old_coeff;
3967                 }
3968             }
3969         }
3970
3971         run=0;
3972         rle_index=0;
3973         run2= run_tab[rle_index++];
3974         prev_level=0;
3975         prev_run=0;
3976
3977         for(i=start_i; i<64; i++){
3978             int j= perm_scantable[i];
3979             const int level= block[j];
3980             int change, old_coeff;
3981
3982             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3983                 break;
3984
3985             if(level){
3986                 if(level<0) old_coeff= qmul*level - qadd;
3987                 else        old_coeff= qmul*level + qadd;
3988                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3989             }else{
3990                 old_coeff=0;
3991                 run2--;
3992                 assert(run2>=0 || i >= last_non_zero );
3993             }
3994
3995             for(change=-1; change<=1; change+=2){
3996                 int new_level= level + change;
3997                 int score, new_coeff, unquant_change;
3998
3999                 score=0;
4000                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4001                    continue;
4002
4003                 if(new_level){
4004                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4005                     else            new_coeff= qmul*new_level + qadd;
4006                     if(new_coeff >= 2048 || new_coeff <= -2048)
4007                         continue;
4008                     //FIXME check for overflow
4009
4010                     if(level){
4011                         if(level < 63 && level > -63){
4012                             if(i < last_non_zero)
4013                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4014                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4015                             else
4016                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4017                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4018                         }
4019                     }else{
4020                         assert(FFABS(new_level)==1);
4021
4022                         if(analyze_gradient){
4023                             int g= d1[ scantable[i] ];
4024                             if(g && (g^new_level) >= 0)
4025                                 continue;
4026                         }
4027
4028                         if(i < last_non_zero){
4029                             int next_i= i + run2 + 1;
4030                             int next_level= block[ perm_scantable[next_i] ] + 64;
4031
4032                             if(next_level&(~127))
4033                                 next_level= 0;
4034
4035                             if(next_i < last_non_zero)
4036                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4037                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4038                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4039                             else
4040                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4041                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4042                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4043                         }else{
4044                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4045                             if(prev_level){
4046                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4047                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4048                             }
4049                         }
4050                     }
4051                 }else{
4052                     new_coeff=0;
4053                     assert(FFABS(level)==1);
4054
4055                     if(i < last_non_zero){
4056                         int next_i= i + run2 + 1;
4057                         int next_level= block[ perm_scantable[next_i] ] + 64;
4058
4059                         if(next_level&(~127))
4060                             next_level= 0;
4061
4062                         if(next_i < last_non_zero)
4063                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4064                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4065                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4066                         else
4067                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4068                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4069                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4070                     }else{
4071                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4072                         if(prev_level){
4073                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4074                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4075                         }
4076                     }
4077                 }
4078
4079                 score *= lambda;
4080
4081                 unquant_change= new_coeff - old_coeff;
4082                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4083
4084                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4085                                                    unquant_change);
4086                 if(score<best_score){
4087                     best_score= score;
4088                     best_coeff= i;
4089                     best_change= change;
4090                     best_unquant_change= unquant_change;
4091                 }
4092             }
4093             if(level){
4094                 prev_level= level + 64;
4095                 if(prev_level&(~127))
4096                     prev_level= 0;
4097                 prev_run= run;
4098                 run=0;
4099             }else{
4100                 run++;
4101             }
4102         }
4103 #ifdef REFINE_STATS
4104 STOP_TIMER("iterative step")}
4105 #endif
4106
4107         if(best_change){
4108             int j= perm_scantable[ best_coeff ];
4109
4110             block[j] += best_change;
4111
4112             if(best_coeff > last_non_zero){
4113                 last_non_zero= best_coeff;
4114                 assert(block[j]);
4115 #ifdef REFINE_STATS
4116 after_last++;
4117 #endif
4118             }else{
4119 #ifdef REFINE_STATS
4120 if(block[j]){
4121     if(block[j] - best_change){
4122         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4123             raise++;
4124         }else{
4125             lower++;
4126         }
4127     }else{
4128         from_zero++;
4129     }
4130 }else{
4131     to_zero++;
4132 }
4133 #endif
4134                 for(; last_non_zero>=start_i; last_non_zero--){
4135                     if(block[perm_scantable[last_non_zero]])
4136                         break;
4137                 }
4138             }
4139 #ifdef REFINE_STATS
4140 count++;
4141 if(256*256*256*64 % count == 0){
4142     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4143 }
4144 #endif
4145             run=0;
4146             rle_index=0;
4147             for(i=start_i; i<=last_non_zero; i++){
4148                 int j= perm_scantable[i];
4149                 const int level= block[j];
4150
4151                  if(level){
4152                      run_tab[rle_index++]=run;
4153                      run=0;
4154                  }else{
4155                      run++;
4156                  }
4157             }
4158
4159             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4160         }else{
4161             break;
4162         }
4163     }
4164 #ifdef REFINE_STATS
4165 if(last_non_zero>0){
4166 STOP_TIMER("iterative search")
4167 }
4168 }
4169 #endif
4170
4171     return last_non_zero;
4172 }
4173
4174 int ff_dct_quantize_c(MpegEncContext *s,
4175                         int16_t *block, int n,
4176                         int qscale, int *overflow)
4177 {
4178     int i, j, level, last_non_zero, q, start_i;
4179     const int *qmat;
4180     const uint8_t *scantable= s->intra_scantable.scantable;
4181     int bias;
4182     int max=0;
4183     unsigned int threshold1, threshold2;
4184
4185     s->fdsp.fdct(block);
4186
4187     if(s->dct_error_sum)
4188         s->denoise_dct(s, block);
4189
4190     if (s->mb_intra) {
4191         if (!s->h263_aic) {
4192             if (n < 4)
4193                 q = s->y_dc_scale;
4194             else
4195                 q = s->c_dc_scale;
4196             q = q << 3;
4197         } else
4198             /* For AIC we skip quant/dequant of INTRADC */
4199             q = 1 << 3;
4200
4201         /* note: block[0] is assumed to be positive */
4202         block[0] = (block[0] + (q >> 1)) / q;
4203         start_i = 1;
4204         last_non_zero = 0;
4205         qmat = s->q_intra_matrix[qscale];
4206         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4207     } else {
4208         start_i = 0;
4209         last_non_zero = -1;
4210         qmat = s->q_inter_matrix[qscale];
4211         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4212     }
4213     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4214     threshold2= (threshold1<<1);
4215     for(i=63;i>=start_i;i--) {
4216         j = scantable[i];
4217         level = block[j] * qmat[j];
4218
4219         if(((unsigned)(level+threshold1))>threshold2){
4220             last_non_zero = i;
4221             break;
4222         }else{
4223             block[j]=0;
4224         }
4225     }
4226     for(i=start_i; i<=last_non_zero; i++) {
4227         j = scantable[i];
4228         level = block[j] * qmat[j];
4229
4230 //        if(   bias+level >= (1<<QMAT_SHIFT)
4231 //           || bias-level >= (1<<QMAT_SHIFT)){
4232         if(((unsigned)(level+threshold1))>threshold2){
4233             if(level>0){
4234                 level= (bias + level)>>QMAT_SHIFT;
4235                 block[j]= level;
4236             }else{
4237                 level= (bias - level)>>QMAT_SHIFT;
4238                 block[j]= -level;
4239             }
4240             max |=level;
4241         }else{
4242             block[j]=0;
4243         }
4244     }
4245     *overflow= s->max_qcoeff < max; //overflow might have happened
4246
4247     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4248     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4249         ff_block_permute(block, s->idsp.idct_permutation,
4250                          scantable, last_non_zero);
4251
4252     return last_non_zero;
4253 }
4254
4255 #define OFFSET(x) offsetof(MpegEncContext, x)
4256 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4257 static const AVOption h263_options[] = {
4258     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4259     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4260     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4261     FF_MPV_COMMON_OPTS
4262     { NULL },
4263 };
4264
4265 static const AVClass h263_class = {
4266     .class_name = "H.263 encoder",
4267     .item_name  = av_default_item_name,
4268     .option     = h263_options,
4269     .version    = LIBAVUTIL_VERSION_INT,
4270 };
4271
4272 AVCodec ff_h263_encoder = {
4273     .name           = "h263",
4274     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4275     .type           = AVMEDIA_TYPE_VIDEO,
4276     .id             = AV_CODEC_ID_H263,
4277     .priv_data_size = sizeof(MpegEncContext),
4278     .init           = ff_mpv_encode_init,
4279     .encode2        = ff_mpv_encode_picture,
4280     .close          = ff_mpv_encode_end,
4281     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4282     .priv_class     = &h263_class,
4283 };
4284
4285 static const AVOption h263p_options[] = {
4286     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4287     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4288     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4289     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4290     FF_MPV_COMMON_OPTS
4291     { NULL },
4292 };
4293 static const AVClass h263p_class = {
4294     .class_name = "H.263p encoder",
4295     .item_name  = av_default_item_name,
4296     .option     = h263p_options,
4297     .version    = LIBAVUTIL_VERSION_INT,
4298 };
4299
4300 AVCodec ff_h263p_encoder = {
4301     .name           = "h263p",
4302     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4303     .type           = AVMEDIA_TYPE_VIDEO,
4304     .id             = AV_CODEC_ID_H263P,
4305     .priv_data_size = sizeof(MpegEncContext),
4306     .init           = ff_mpv_encode_init,
4307     .encode2        = ff_mpv_encode_picture,
4308     .close          = ff_mpv_encode_end,
4309     .capabilities   = CODEC_CAP_SLICE_THREADS,
4310     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4311     .priv_class     = &h263p_class,
4312 };
4313
4314 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4315
4316 AVCodec ff_msmpeg4v2_encoder = {
4317     .name           = "msmpeg4v2",
4318     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4319     .type           = AVMEDIA_TYPE_VIDEO,
4320     .id             = AV_CODEC_ID_MSMPEG4V2,
4321     .priv_data_size = sizeof(MpegEncContext),
4322     .init           = ff_mpv_encode_init,
4323     .encode2        = ff_mpv_encode_picture,
4324     .close          = ff_mpv_encode_end,
4325     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4326     .priv_class     = &msmpeg4v2_class,
4327 };
4328
4329 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4330
4331 AVCodec ff_msmpeg4v3_encoder = {
4332     .name           = "msmpeg4",
4333     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4334     .type           = AVMEDIA_TYPE_VIDEO,
4335     .id             = AV_CODEC_ID_MSMPEG4V3,
4336     .priv_data_size = sizeof(MpegEncContext),
4337     .init           = ff_mpv_encode_init,
4338     .encode2        = ff_mpv_encode_picture,
4339     .close          = ff_mpv_encode_end,
4340     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4341     .priv_class     = &msmpeg4v3_class,
4342 };
4343
4344 FF_MPV_GENERIC_CLASS(wmv1)
4345
4346 AVCodec ff_wmv1_encoder = {
4347     .name           = "wmv1",
4348     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4349     .type           = AVMEDIA_TYPE_VIDEO,
4350     .id             = AV_CODEC_ID_WMV1,
4351     .priv_data_size = sizeof(MpegEncContext),
4352     .init           = ff_mpv_encode_init,
4353     .encode2        = ff_mpv_encode_picture,
4354     .close          = ff_mpv_encode_end,
4355     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4356     .priv_class     = &wmv1_class,
4357 };