]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
lavc: make rc_qsquish a private option of mpegvideo encoders
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mjpegenc_common.h"
46 #include "mathops.h"
47 #include "mpegutils.h"
48 #include "mjpegenc.h"
49 #include "msmpeg4.h"
50 #include "pixblockdsp.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60
61 #define QUANT_BIAS_SHIFT 8
62
63 #define QMAT_SHIFT_MMX 16
64 #define QMAT_SHIFT 22
65
66 static int encode_picture(MpegEncContext *s, int picture_number);
67 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
68 static int sse_mb(MpegEncContext *s);
69 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
70 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
71
72 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
73 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
74
75 const AVOption ff_mpv_generic_options[] = {
76     FF_MPV_COMMON_OPTS
77     { NULL },
78 };
79
80 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
81                        uint16_t (*qmat16)[2][64],
82                        const uint16_t *quant_matrix,
83                        int bias, int qmin, int qmax, int intra)
84 {
85     FDCTDSPContext *fdsp = &s->fdsp;
86     int qscale;
87     int shift = 0;
88
89     for (qscale = qmin; qscale <= qmax; qscale++) {
90         int i;
91         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
92 #if CONFIG_FAANDCT
93             fdsp->fdct == ff_faandct            ||
94 #endif /* CONFIG_FAANDCT */
95             fdsp->fdct == ff_jpeg_fdct_islow_10) {
96             for (i = 0; i < 64; i++) {
97                 const int j = s->idsp.idct_permutation[i];
98                 /* 16 <= qscale * quant_matrix[i] <= 7905
99                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
100                  *             19952 <=              x  <= 249205026
101                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
102                  *           3444240 >= (1 << 36) / (x) >= 275 */
103
104                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
105                                         (qscale * quant_matrix[j]));
106             }
107         } else if (fdsp->fdct == ff_fdct_ifast) {
108             for (i = 0; i < 64; i++) {
109                 const int j = s->idsp.idct_permutation[i];
110                 /* 16 <= qscale * quant_matrix[i] <= 7905
111                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
112                  *             19952 <=              x  <= 249205026
113                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
114                  *           3444240 >= (1 << 36) / (x) >= 275 */
115
116                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
117                                         (ff_aanscales[i] * qscale *
118                                          quant_matrix[j]));
119             }
120         } else {
121             for (i = 0; i < 64; i++) {
122                 const int j = s->idsp.idct_permutation[i];
123                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
124                  * Assume x = qscale * quant_matrix[i]
125                  * So             16 <=              x  <= 7905
126                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
127                  * so          32768 >= (1 << 19) / (x) >= 67 */
128                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
129                                         (qscale * quant_matrix[j]));
130                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
131                 //                    (qscale * quant_matrix[i]);
132                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
133                                        (qscale * quant_matrix[j]);
134
135                 if (qmat16[qscale][0][i] == 0 ||
136                     qmat16[qscale][0][i] == 128 * 256)
137                     qmat16[qscale][0][i] = 128 * 256 - 1;
138                 qmat16[qscale][1][i] =
139                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
140                                 qmat16[qscale][0][i]);
141             }
142         }
143
144         for (i = intra; i < 64; i++) {
145             int64_t max = 8191;
146             if (fdsp->fdct == ff_fdct_ifast) {
147                 max = (8191LL * ff_aanscales[i]) >> 14;
148             }
149             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
150                 shift++;
151             }
152         }
153     }
154     if (shift) {
155         av_log(NULL, AV_LOG_INFO,
156                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
157                QMAT_SHIFT - shift);
158     }
159 }
160
161 static inline void update_qscale(MpegEncContext *s)
162 {
163     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
164                 (FF_LAMBDA_SHIFT + 7);
165     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
166
167     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
168                  FF_LAMBDA_SHIFT;
169 }
170
171 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
172 {
173     int i;
174
175     if (matrix) {
176         put_bits(pb, 1, 1);
177         for (i = 0; i < 64; i++) {
178             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
179         }
180     } else
181         put_bits(pb, 1, 0);
182 }
183
184 /**
185  * init s->current_picture.qscale_table from s->lambda_table
186  */
187 void ff_init_qscale_tab(MpegEncContext *s)
188 {
189     int8_t * const qscale_table = s->current_picture.qscale_table;
190     int i;
191
192     for (i = 0; i < s->mb_num; i++) {
193         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
194         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
195         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
196                                                   s->avctx->qmax);
197     }
198 }
199
200 static void update_duplicate_context_after_me(MpegEncContext *dst,
201                                               MpegEncContext *src)
202 {
203 #define COPY(a) dst->a= src->a
204     COPY(pict_type);
205     COPY(current_picture);
206     COPY(f_code);
207     COPY(b_code);
208     COPY(qscale);
209     COPY(lambda);
210     COPY(lambda2);
211     COPY(picture_in_gop_number);
212     COPY(gop_picture_number);
213     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
214     COPY(progressive_frame);    // FIXME don't set in encode_header
215     COPY(partitioned_frame);    // FIXME don't set in encode_header
216 #undef COPY
217 }
218
219 /**
220  * Set the given MpegEncContext to defaults for encoding.
221  * the changed fields will not depend upon the prior state of the MpegEncContext.
222  */
223 static void mpv_encode_defaults(MpegEncContext *s)
224 {
225     int i;
226     ff_mpv_common_defaults(s);
227
228     for (i = -16; i < 16; i++) {
229         default_fcode_tab[i + MAX_MV] = 1;
230     }
231     s->me.mv_penalty = default_mv_penalty;
232     s->fcode_tab     = default_fcode_tab;
233
234     s->input_picture_number  = 0;
235     s->picture_in_gop_number = 0;
236 }
237
238 /* init video encoder */
239 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
240 {
241     MpegEncContext *s = avctx->priv_data;
242     int i, ret, format_supported;
243
244     mpv_encode_defaults(s);
245
246     switch (avctx->codec_id) {
247     case AV_CODEC_ID_MPEG2VIDEO:
248         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
249             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
250             av_log(avctx, AV_LOG_ERROR,
251                    "only YUV420 and YUV422 are supported\n");
252             return -1;
253         }
254         break;
255     case AV_CODEC_ID_MJPEG:
256         format_supported = 0;
257         /* JPEG color space */
258         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
259             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
260             (avctx->color_range == AVCOL_RANGE_JPEG &&
261              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
262               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
263             format_supported = 1;
264         /* MPEG color space */
265         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
266                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
267                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
268             format_supported = 1;
269
270         if (!format_supported) {
271             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
272             return -1;
273         }
274         break;
275     default:
276         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
277             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
278             return -1;
279         }
280     }
281
282     switch (avctx->pix_fmt) {
283     case AV_PIX_FMT_YUVJ422P:
284     case AV_PIX_FMT_YUV422P:
285         s->chroma_format = CHROMA_422;
286         break;
287     case AV_PIX_FMT_YUVJ420P:
288     case AV_PIX_FMT_YUV420P:
289     default:
290         s->chroma_format = CHROMA_420;
291         break;
292     }
293
294     s->bit_rate = avctx->bit_rate;
295     s->width    = avctx->width;
296     s->height   = avctx->height;
297     if (avctx->gop_size > 600 &&
298         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
299         av_log(avctx, AV_LOG_ERROR,
300                "Warning keyframe interval too large! reducing it ...\n");
301         avctx->gop_size = 600;
302     }
303     s->gop_size     = avctx->gop_size;
304     s->avctx        = avctx;
305     s->flags        = avctx->flags;
306     s->flags2       = avctx->flags2;
307     if (avctx->max_b_frames > MAX_B_FRAMES) {
308         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
309                "is %d.\n", MAX_B_FRAMES);
310     }
311     s->max_b_frames = avctx->max_b_frames;
312     s->codec_id     = avctx->codec->id;
313     s->strict_std_compliance = avctx->strict_std_compliance;
314     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
315     s->mpeg_quant         = avctx->mpeg_quant;
316     s->rtp_mode           = !!avctx->rtp_payload_size;
317     s->intra_dc_precision = avctx->intra_dc_precision;
318     s->user_specified_pts = AV_NOPTS_VALUE;
319
320     if (s->gop_size <= 1) {
321         s->intra_only = 1;
322         s->gop_size   = 12;
323     } else {
324         s->intra_only = 0;
325     }
326
327     s->me_method = avctx->me_method;
328
329     /* Fixed QSCALE */
330     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
331
332     s->adaptive_quant = (s->avctx->lumi_masking ||
333                          s->avctx->dark_masking ||
334                          s->avctx->temporal_cplx_masking ||
335                          s->avctx->spatial_cplx_masking  ||
336                          s->avctx->p_masking      ||
337                          s->avctx->border_masking ||
338                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
339                         !s->fixed_qscale;
340
341     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
342
343     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
344         av_log(avctx, AV_LOG_ERROR,
345                "a vbv buffer size is needed, "
346                "for encoding with a maximum bitrate\n");
347         return -1;
348     }
349
350     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
351         av_log(avctx, AV_LOG_INFO,
352                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
353     }
354
355     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
356         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
357         return -1;
358     }
359
360     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
361         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
362         return -1;
363     }
364
365     if (avctx->rc_max_rate &&
366         avctx->rc_max_rate == avctx->bit_rate &&
367         avctx->rc_max_rate != avctx->rc_min_rate) {
368         av_log(avctx, AV_LOG_INFO,
369                "impossible bitrate constraints, this will fail\n");
370     }
371
372     if (avctx->rc_buffer_size &&
373         avctx->bit_rate * (int64_t)avctx->time_base.num >
374             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
375         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
376         return -1;
377     }
378
379     if (!s->fixed_qscale &&
380         avctx->bit_rate * av_q2d(avctx->time_base) >
381             avctx->bit_rate_tolerance) {
382         av_log(avctx, AV_LOG_ERROR,
383                "bitrate tolerance too small for bitrate\n");
384         return -1;
385     }
386
387     if (s->avctx->rc_max_rate &&
388         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
389         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
390          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
391         90000LL * (avctx->rc_buffer_size - 1) >
392             s->avctx->rc_max_rate * 0xFFFFLL) {
393         av_log(avctx, AV_LOG_INFO,
394                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
395                "specified vbv buffer is too large for the given bitrate!\n");
396     }
397
398     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
399         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
400         s->codec_id != AV_CODEC_ID_FLV1) {
401         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
402         return -1;
403     }
404
405     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
406         av_log(avctx, AV_LOG_ERROR,
407                "OBMC is only supported with simple mb decision\n");
408         return -1;
409     }
410
411     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
412         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
413         return -1;
414     }
415
416     if (s->max_b_frames                    &&
417         s->codec_id != AV_CODEC_ID_MPEG4      &&
418         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
419         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
420         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
421         return -1;
422     }
423
424     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
425          s->codec_id == AV_CODEC_ID_H263  ||
426          s->codec_id == AV_CODEC_ID_H263P) &&
427         (avctx->sample_aspect_ratio.num > 255 ||
428          avctx->sample_aspect_ratio.den > 255)) {
429         av_log(avctx, AV_LOG_ERROR,
430                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
431                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
432         return -1;
433     }
434
435     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
436         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
437         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
438         return -1;
439     }
440
441     // FIXME mpeg2 uses that too
442     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
443         av_log(avctx, AV_LOG_ERROR,
444                "mpeg2 style quantization not supported by codec\n");
445         return -1;
446     }
447
448     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
449         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
450         return -1;
451     }
452
453     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
454         s->avctx->mb_decision != FF_MB_DECISION_RD) {
455         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
456         return -1;
457     }
458
459     if (s->avctx->scenechange_threshold < 1000000000 &&
460         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
461         av_log(avctx, AV_LOG_ERROR,
462                "closed gop with scene change detection are not supported yet, "
463                "set threshold to 1000000000\n");
464         return -1;
465     }
466
467     if (s->flags & CODEC_FLAG_LOW_DELAY) {
468         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
469             av_log(avctx, AV_LOG_ERROR,
470                   "low delay forcing is only available for mpeg2\n");
471             return -1;
472         }
473         if (s->max_b_frames != 0) {
474             av_log(avctx, AV_LOG_ERROR,
475                    "b frames cannot be used with low delay\n");
476             return -1;
477         }
478     }
479
480     if (s->q_scale_type == 1) {
481         if (avctx->qmax > 12) {
482             av_log(avctx, AV_LOG_ERROR,
483                    "non linear quant only supports qmax <= 12 currently\n");
484             return -1;
485         }
486     }
487
488     if (s->avctx->thread_count > 1         &&
489         s->codec_id != AV_CODEC_ID_MPEG4      &&
490         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
491         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
492         (s->codec_id != AV_CODEC_ID_H263P)) {
493         av_log(avctx, AV_LOG_ERROR,
494                "multi threaded encoding not supported by codec\n");
495         return -1;
496     }
497
498     if (s->avctx->thread_count < 1) {
499         av_log(avctx, AV_LOG_ERROR,
500                "automatic thread number detection not supported by codec,"
501                "patch welcome\n");
502         return -1;
503     }
504
505     if (s->avctx->thread_count > 1)
506         s->rtp_mode = 1;
507
508     if (!avctx->time_base.den || !avctx->time_base.num) {
509         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
510         return -1;
511     }
512
513     i = (INT_MAX / 2 + 128) >> 8;
514     if (avctx->mb_threshold >= i) {
515         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
516                i - 1);
517         return -1;
518     }
519
520     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
521         av_log(avctx, AV_LOG_INFO,
522                "notice: b_frame_strategy only affects the first pass\n");
523         avctx->b_frame_strategy = 0;
524     }
525
526     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
527     if (i > 1) {
528         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
529         avctx->time_base.den /= i;
530         avctx->time_base.num /= i;
531         //return -1;
532     }
533
534     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
535         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
536         // (a + x * 3 / 8) / x
537         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
538         s->inter_quant_bias = 0;
539     } else {
540         s->intra_quant_bias = 0;
541         // (a - x / 4) / x
542         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
543     }
544
545     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
546         s->intra_quant_bias = avctx->intra_quant_bias;
547     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
548         s->inter_quant_bias = avctx->inter_quant_bias;
549
550     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
551         s->avctx->time_base.den > (1 << 16) - 1) {
552         av_log(avctx, AV_LOG_ERROR,
553                "timebase %d/%d not supported by MPEG 4 standard, "
554                "the maximum admitted value for the timebase denominator "
555                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
556                (1 << 16) - 1);
557         return -1;
558     }
559     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
560
561     switch (avctx->codec->id) {
562     case AV_CODEC_ID_MPEG1VIDEO:
563         s->out_format = FMT_MPEG1;
564         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
565         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
566         break;
567     case AV_CODEC_ID_MPEG2VIDEO:
568         s->out_format = FMT_MPEG1;
569         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
570         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
571         s->rtp_mode   = 1;
572         break;
573     case AV_CODEC_ID_MJPEG:
574         s->out_format = FMT_MJPEG;
575         s->intra_only = 1; /* force intra only for jpeg */
576         if (!CONFIG_MJPEG_ENCODER ||
577             ff_mjpeg_encode_init(s) < 0)
578             return -1;
579         avctx->delay = 0;
580         s->low_delay = 1;
581         break;
582     case AV_CODEC_ID_H261:
583         if (!CONFIG_H261_ENCODER)
584             return -1;
585         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
586             av_log(avctx, AV_LOG_ERROR,
587                    "The specified picture size of %dx%d is not valid for the "
588                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
589                     s->width, s->height);
590             return -1;
591         }
592         s->out_format = FMT_H261;
593         avctx->delay  = 0;
594         s->low_delay  = 1;
595         break;
596     case AV_CODEC_ID_H263:
597         if (!CONFIG_H263_ENCODER)
598         return -1;
599         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
600                              s->width, s->height) == 8) {
601             av_log(avctx, AV_LOG_INFO,
602                    "The specified picture size of %dx%d is not valid for "
603                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
604                    "352x288, 704x576, and 1408x1152."
605                    "Try H.263+.\n", s->width, s->height);
606             return -1;
607         }
608         s->out_format = FMT_H263;
609         avctx->delay  = 0;
610         s->low_delay  = 1;
611         break;
612     case AV_CODEC_ID_H263P:
613         s->out_format = FMT_H263;
614         s->h263_plus  = 1;
615         /* Fx */
616         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
617         s->modified_quant  = s->h263_aic;
618         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
619         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
620
621         /* /Fx */
622         /* These are just to be sure */
623         avctx->delay = 0;
624         s->low_delay = 1;
625         break;
626     case AV_CODEC_ID_FLV1:
627         s->out_format      = FMT_H263;
628         s->h263_flv        = 2; /* format = 1; 11-bit codes */
629         s->unrestricted_mv = 1;
630         s->rtp_mode  = 0; /* don't allow GOB */
631         avctx->delay = 0;
632         s->low_delay = 1;
633         break;
634     case AV_CODEC_ID_RV10:
635         s->out_format = FMT_H263;
636         avctx->delay  = 0;
637         s->low_delay  = 1;
638         break;
639     case AV_CODEC_ID_RV20:
640         s->out_format      = FMT_H263;
641         avctx->delay       = 0;
642         s->low_delay       = 1;
643         s->modified_quant  = 1;
644         s->h263_aic        = 1;
645         s->h263_plus       = 1;
646         s->loop_filter     = 1;
647         s->unrestricted_mv = 0;
648         break;
649     case AV_CODEC_ID_MPEG4:
650         s->out_format      = FMT_H263;
651         s->h263_pred       = 1;
652         s->unrestricted_mv = 1;
653         s->low_delay       = s->max_b_frames ? 0 : 1;
654         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
655         break;
656     case AV_CODEC_ID_MSMPEG4V2:
657         s->out_format      = FMT_H263;
658         s->h263_pred       = 1;
659         s->unrestricted_mv = 1;
660         s->msmpeg4_version = 2;
661         avctx->delay       = 0;
662         s->low_delay       = 1;
663         break;
664     case AV_CODEC_ID_MSMPEG4V3:
665         s->out_format        = FMT_H263;
666         s->h263_pred         = 1;
667         s->unrestricted_mv   = 1;
668         s->msmpeg4_version   = 3;
669         s->flipflop_rounding = 1;
670         avctx->delay         = 0;
671         s->low_delay         = 1;
672         break;
673     case AV_CODEC_ID_WMV1:
674         s->out_format        = FMT_H263;
675         s->h263_pred         = 1;
676         s->unrestricted_mv   = 1;
677         s->msmpeg4_version   = 4;
678         s->flipflop_rounding = 1;
679         avctx->delay         = 0;
680         s->low_delay         = 1;
681         break;
682     case AV_CODEC_ID_WMV2:
683         s->out_format        = FMT_H263;
684         s->h263_pred         = 1;
685         s->unrestricted_mv   = 1;
686         s->msmpeg4_version   = 5;
687         s->flipflop_rounding = 1;
688         avctx->delay         = 0;
689         s->low_delay         = 1;
690         break;
691     default:
692         return -1;
693     }
694
695     avctx->has_b_frames = !s->low_delay;
696
697     s->encoding = 1;
698
699     s->progressive_frame    =
700     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
701                                                 CODEC_FLAG_INTERLACED_ME) ||
702                                 s->alternate_scan);
703
704     /* init */
705     ff_mpv_idct_init(s);
706     if (ff_mpv_common_init(s) < 0)
707         return -1;
708
709     if (ARCH_X86)
710         ff_mpv_encode_init_x86(s);
711
712     ff_fdctdsp_init(&s->fdsp, avctx);
713     ff_me_cmp_init(&s->mecc, avctx);
714     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
715     ff_pixblockdsp_init(&s->pdsp, avctx);
716     ff_qpeldsp_init(&s->qdsp);
717
718     s->avctx->coded_frame = s->current_picture.f;
719
720     if (s->msmpeg4_version) {
721         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
722                           2 * 2 * (MAX_LEVEL + 1) *
723                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
724     }
725     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
726
727     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
728     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
729     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
730     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
731     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
732                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
733     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
734                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
735
736     if (s->avctx->noise_reduction) {
737         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
738                           2 * 64 * sizeof(uint16_t), fail);
739     }
740
741     if (CONFIG_H263_ENCODER)
742         ff_h263dsp_init(&s->h263dsp);
743     if (!s->dct_quantize)
744         s->dct_quantize = ff_dct_quantize_c;
745     if (!s->denoise_dct)
746         s->denoise_dct  = denoise_dct_c;
747     s->fast_dct_quantize = s->dct_quantize;
748     if (avctx->trellis)
749         s->dct_quantize  = dct_quantize_trellis_c;
750
751     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
752         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
753
754     s->quant_precision = 5;
755
756     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
757     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
758
759     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
760         ff_h261_encode_init(s);
761     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
762         ff_h263_encode_init(s);
763     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
764         ff_msmpeg4_encode_init(s);
765     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
766         && s->out_format == FMT_MPEG1)
767         ff_mpeg1_encode_init(s);
768
769     /* init q matrix */
770     for (i = 0; i < 64; i++) {
771         int j = s->idsp.idct_permutation[i];
772         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
773             s->mpeg_quant) {
774             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
775             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
776         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
777             s->intra_matrix[j] =
778             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
779         } else {
780             /* mpeg1/2 */
781             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
782             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
783         }
784         if (s->avctx->intra_matrix)
785             s->intra_matrix[j] = s->avctx->intra_matrix[i];
786         if (s->avctx->inter_matrix)
787             s->inter_matrix[j] = s->avctx->inter_matrix[i];
788     }
789
790     /* precompute matrix */
791     /* for mjpeg, we do include qscale in the matrix */
792     if (s->out_format != FMT_MJPEG) {
793         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
794                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
795                           31, 1);
796         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
797                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
798                           31, 0);
799     }
800
801     if (ff_rate_control_init(s) < 0)
802         return -1;
803
804 #if FF_API_ERROR_RATE
805     FF_DISABLE_DEPRECATION_WARNINGS
806     if (avctx->error_rate)
807         s->error_rate = avctx->error_rate;
808     FF_ENABLE_DEPRECATION_WARNINGS;
809 #endif
810
811 #if FF_API_NORMALIZE_AQP
812     FF_DISABLE_DEPRECATION_WARNINGS
813     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
814         s->mpv_flags |= FF_MPV_FLAG_NAQ;
815     FF_ENABLE_DEPRECATION_WARNINGS;
816 #endif
817
818 #if FF_API_MV0
819     FF_DISABLE_DEPRECATION_WARNINGS
820     if (avctx->flags & CODEC_FLAG_MV0)
821         s->mpv_flags |= FF_MPV_FLAG_MV0;
822     FF_ENABLE_DEPRECATION_WARNINGS
823 #endif
824
825 #if FF_API_MPV_OPT
826     FF_DISABLE_DEPRECATION_WARNINGS
827     if (avctx->rc_qsquish != 0.0)
828         s->rc_qsquish = avctx->rc_qsquish;
829     FF_ENABLE_DEPRECATION_WARNINGS
830 #endif
831
832     if (avctx->b_frame_strategy == 2) {
833         for (i = 0; i < s->max_b_frames + 2; i++) {
834             s->tmp_frames[i] = av_frame_alloc();
835             if (!s->tmp_frames[i])
836                 return AVERROR(ENOMEM);
837
838             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
839             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
840             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
841
842             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
843             if (ret < 0)
844                 return ret;
845         }
846     }
847
848     return 0;
849 fail:
850     ff_mpv_encode_end(avctx);
851     return AVERROR_UNKNOWN;
852 }
853
854 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
855 {
856     MpegEncContext *s = avctx->priv_data;
857     int i;
858
859     ff_rate_control_uninit(s);
860
861     ff_mpv_common_end(s);
862     if (CONFIG_MJPEG_ENCODER &&
863         s->out_format == FMT_MJPEG)
864         ff_mjpeg_encode_close(s);
865
866     av_freep(&avctx->extradata);
867
868     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
869         av_frame_free(&s->tmp_frames[i]);
870
871     ff_free_picture_tables(&s->new_picture);
872     ff_mpeg_unref_picture(s, &s->new_picture);
873
874     av_freep(&s->avctx->stats_out);
875     av_freep(&s->ac_stats);
876
877     av_freep(&s->q_intra_matrix);
878     av_freep(&s->q_inter_matrix);
879     av_freep(&s->q_intra_matrix16);
880     av_freep(&s->q_inter_matrix16);
881     av_freep(&s->input_picture);
882     av_freep(&s->reordered_input_picture);
883     av_freep(&s->dct_offset);
884
885     return 0;
886 }
887
888 static int get_sae(uint8_t *src, int ref, int stride)
889 {
890     int x,y;
891     int acc = 0;
892
893     for (y = 0; y < 16; y++) {
894         for (x = 0; x < 16; x++) {
895             acc += FFABS(src[x + y * stride] - ref);
896         }
897     }
898
899     return acc;
900 }
901
902 static int get_intra_count(MpegEncContext *s, uint8_t *src,
903                            uint8_t *ref, int stride)
904 {
905     int x, y, w, h;
906     int acc = 0;
907
908     w = s->width  & ~15;
909     h = s->height & ~15;
910
911     for (y = 0; y < h; y += 16) {
912         for (x = 0; x < w; x += 16) {
913             int offset = x + y * stride;
914             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
915                                       stride, 16);
916             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
917             int sae  = get_sae(src + offset, mean, stride);
918
919             acc += sae + 500 < sad;
920         }
921     }
922     return acc;
923 }
924
925
926 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
927 {
928     Picture *pic = NULL;
929     int64_t pts;
930     int i, display_picture_number = 0, ret;
931     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
932                                                  (s->low_delay ? 0 : 1);
933     int direct = 1;
934
935     if (pic_arg) {
936         pts = pic_arg->pts;
937         display_picture_number = s->input_picture_number++;
938
939         if (pts != AV_NOPTS_VALUE) {
940             if (s->user_specified_pts != AV_NOPTS_VALUE) {
941                 int64_t time = pts;
942                 int64_t last = s->user_specified_pts;
943
944                 if (time <= last) {
945                     av_log(s->avctx, AV_LOG_ERROR,
946                            "Error, Invalid timestamp=%"PRId64", "
947                            "last=%"PRId64"\n", pts, s->user_specified_pts);
948                     return -1;
949                 }
950
951                 if (!s->low_delay && display_picture_number == 1)
952                     s->dts_delta = time - last;
953             }
954             s->user_specified_pts = pts;
955         } else {
956             if (s->user_specified_pts != AV_NOPTS_VALUE) {
957                 s->user_specified_pts =
958                 pts = s->user_specified_pts + 1;
959                 av_log(s->avctx, AV_LOG_INFO,
960                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
961                        pts);
962             } else {
963                 pts = display_picture_number;
964             }
965         }
966     }
967
968     if (pic_arg) {
969         if (!pic_arg->buf[0]);
970             direct = 0;
971         if (pic_arg->linesize[0] != s->linesize)
972             direct = 0;
973         if (pic_arg->linesize[1] != s->uvlinesize)
974             direct = 0;
975         if (pic_arg->linesize[2] != s->uvlinesize)
976             direct = 0;
977
978         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
979                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
980
981         if (direct) {
982             i = ff_find_unused_picture(s, 1);
983             if (i < 0)
984                 return i;
985
986             pic = &s->picture[i];
987             pic->reference = 3;
988
989             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
990                 return ret;
991             if (ff_alloc_picture(s, pic, 1) < 0) {
992                 return -1;
993             }
994         } else {
995             i = ff_find_unused_picture(s, 0);
996             if (i < 0)
997                 return i;
998
999             pic = &s->picture[i];
1000             pic->reference = 3;
1001
1002             if (ff_alloc_picture(s, pic, 0) < 0) {
1003                 return -1;
1004             }
1005
1006             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1007                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1008                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1009                 // empty
1010             } else {
1011                 int h_chroma_shift, v_chroma_shift;
1012                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1013                                                  &h_chroma_shift,
1014                                                  &v_chroma_shift);
1015
1016                 for (i = 0; i < 3; i++) {
1017                     int src_stride = pic_arg->linesize[i];
1018                     int dst_stride = i ? s->uvlinesize : s->linesize;
1019                     int h_shift = i ? h_chroma_shift : 0;
1020                     int v_shift = i ? v_chroma_shift : 0;
1021                     int w = s->width  >> h_shift;
1022                     int h = s->height >> v_shift;
1023                     uint8_t *src = pic_arg->data[i];
1024                     uint8_t *dst = pic->f->data[i];
1025
1026                     if (!s->avctx->rc_buffer_size)
1027                         dst += INPLACE_OFFSET;
1028
1029                     if (src_stride == dst_stride)
1030                         memcpy(dst, src, src_stride * h);
1031                     else {
1032                         while (h--) {
1033                             memcpy(dst, src, w);
1034                             dst += dst_stride;
1035                             src += src_stride;
1036                         }
1037                     }
1038                 }
1039             }
1040         }
1041         ret = av_frame_copy_props(pic->f, pic_arg);
1042         if (ret < 0)
1043             return ret;
1044
1045         pic->f->display_picture_number = display_picture_number;
1046         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1047     }
1048
1049     /* shift buffer entries */
1050     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1051         s->input_picture[i - 1] = s->input_picture[i];
1052
1053     s->input_picture[encoding_delay] = (Picture*) pic;
1054
1055     return 0;
1056 }
1057
1058 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1059 {
1060     int x, y, plane;
1061     int score = 0;
1062     int64_t score64 = 0;
1063
1064     for (plane = 0; plane < 3; plane++) {
1065         const int stride = p->f->linesize[plane];
1066         const int bw = plane ? 1 : 2;
1067         for (y = 0; y < s->mb_height * bw; y++) {
1068             for (x = 0; x < s->mb_width * bw; x++) {
1069                 int off = p->shared ? 0 : 16;
1070                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1071                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1072                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1073
1074                 switch (s->avctx->frame_skip_exp) {
1075                 case 0: score    =  FFMAX(score, v);          break;
1076                 case 1: score   += FFABS(v);                  break;
1077                 case 2: score   += v * v;                     break;
1078                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1079                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1080                 }
1081             }
1082         }
1083     }
1084
1085     if (score)
1086         score64 = score;
1087
1088     if (score64 < s->avctx->frame_skip_threshold)
1089         return 1;
1090     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1091         return 1;
1092     return 0;
1093 }
1094
1095 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1096 {
1097     AVPacket pkt = { 0 };
1098     int ret, got_output;
1099
1100     av_init_packet(&pkt);
1101     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1102     if (ret < 0)
1103         return ret;
1104
1105     ret = pkt.size;
1106     av_free_packet(&pkt);
1107     return ret;
1108 }
1109
1110 static int estimate_best_b_count(MpegEncContext *s)
1111 {
1112     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1113     AVCodecContext *c = avcodec_alloc_context3(NULL);
1114     const int scale = s->avctx->brd_scale;
1115     int i, j, out_size, p_lambda, b_lambda, lambda2;
1116     int64_t best_rd  = INT64_MAX;
1117     int best_b_count = -1;
1118
1119     assert(scale >= 0 && scale <= 3);
1120
1121     //emms_c();
1122     //s->next_picture_ptr->quality;
1123     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1124     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1125     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1126     if (!b_lambda) // FIXME we should do this somewhere else
1127         b_lambda = p_lambda;
1128     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1129                FF_LAMBDA_SHIFT;
1130
1131     c->width        = s->width  >> scale;
1132     c->height       = s->height >> scale;
1133     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1134     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1135     c->mb_decision  = s->avctx->mb_decision;
1136     c->me_cmp       = s->avctx->me_cmp;
1137     c->mb_cmp       = s->avctx->mb_cmp;
1138     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1139     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1140     c->time_base    = s->avctx->time_base;
1141     c->max_b_frames = s->max_b_frames;
1142
1143     if (avcodec_open2(c, codec, NULL) < 0)
1144         return -1;
1145
1146     for (i = 0; i < s->max_b_frames + 2; i++) {
1147         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1148                                                 s->next_picture_ptr;
1149
1150         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1151             pre_input = *pre_input_ptr;
1152
1153             if (!pre_input.shared && i) {
1154                 pre_input.f->data[0] += INPLACE_OFFSET;
1155                 pre_input.f->data[1] += INPLACE_OFFSET;
1156                 pre_input.f->data[2] += INPLACE_OFFSET;
1157             }
1158
1159             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1160                                        s->tmp_frames[i]->linesize[0],
1161                                        pre_input.f->data[0],
1162                                        pre_input.f->linesize[0],
1163                                        c->width, c->height);
1164             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1165                                        s->tmp_frames[i]->linesize[1],
1166                                        pre_input.f->data[1],
1167                                        pre_input.f->linesize[1],
1168                                        c->width >> 1, c->height >> 1);
1169             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1170                                        s->tmp_frames[i]->linesize[2],
1171                                        pre_input.f->data[2],
1172                                        pre_input.f->linesize[2],
1173                                        c->width >> 1, c->height >> 1);
1174         }
1175     }
1176
1177     for (j = 0; j < s->max_b_frames + 1; j++) {
1178         int64_t rd = 0;
1179
1180         if (!s->input_picture[j])
1181             break;
1182
1183         c->error[0] = c->error[1] = c->error[2] = 0;
1184
1185         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1186         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1187
1188         out_size = encode_frame(c, s->tmp_frames[0]);
1189
1190         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1191
1192         for (i = 0; i < s->max_b_frames + 1; i++) {
1193             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1194
1195             s->tmp_frames[i + 1]->pict_type = is_p ?
1196                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1197             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1198
1199             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1200
1201             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1202         }
1203
1204         /* get the delayed frames */
1205         while (out_size) {
1206             out_size = encode_frame(c, NULL);
1207             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1208         }
1209
1210         rd += c->error[0] + c->error[1] + c->error[2];
1211
1212         if (rd < best_rd) {
1213             best_rd = rd;
1214             best_b_count = j;
1215         }
1216     }
1217
1218     avcodec_close(c);
1219     av_freep(&c);
1220
1221     return best_b_count;
1222 }
1223
1224 static int select_input_picture(MpegEncContext *s)
1225 {
1226     int i, ret;
1227
1228     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1229         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1230     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1231
1232     /* set next picture type & ordering */
1233     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1234         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1235             !s->next_picture_ptr || s->intra_only) {
1236             s->reordered_input_picture[0] = s->input_picture[0];
1237             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1238             s->reordered_input_picture[0]->f->coded_picture_number =
1239                 s->coded_picture_number++;
1240         } else {
1241             int b_frames;
1242
1243             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1244                 if (s->picture_in_gop_number < s->gop_size &&
1245                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1246                     // FIXME check that te gop check above is +-1 correct
1247                     av_frame_unref(s->input_picture[0]->f);
1248
1249                     emms_c();
1250                     ff_vbv_update(s, 0);
1251
1252                     goto no_output_pic;
1253                 }
1254             }
1255
1256             if (s->flags & CODEC_FLAG_PASS2) {
1257                 for (i = 0; i < s->max_b_frames + 1; i++) {
1258                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1259
1260                     if (pict_num >= s->rc_context.num_entries)
1261                         break;
1262                     if (!s->input_picture[i]) {
1263                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1264                         break;
1265                     }
1266
1267                     s->input_picture[i]->f->pict_type =
1268                         s->rc_context.entry[pict_num].new_pict_type;
1269                 }
1270             }
1271
1272             if (s->avctx->b_frame_strategy == 0) {
1273                 b_frames = s->max_b_frames;
1274                 while (b_frames && !s->input_picture[b_frames])
1275                     b_frames--;
1276             } else if (s->avctx->b_frame_strategy == 1) {
1277                 for (i = 1; i < s->max_b_frames + 1; i++) {
1278                     if (s->input_picture[i] &&
1279                         s->input_picture[i]->b_frame_score == 0) {
1280                         s->input_picture[i]->b_frame_score =
1281                             get_intra_count(s,
1282                                             s->input_picture[i    ]->f->data[0],
1283                                             s->input_picture[i - 1]->f->data[0],
1284                                             s->linesize) + 1;
1285                     }
1286                 }
1287                 for (i = 0; i < s->max_b_frames + 1; i++) {
1288                     if (!s->input_picture[i] ||
1289                         s->input_picture[i]->b_frame_score - 1 >
1290                             s->mb_num / s->avctx->b_sensitivity)
1291                         break;
1292                 }
1293
1294                 b_frames = FFMAX(0, i - 1);
1295
1296                 /* reset scores */
1297                 for (i = 0; i < b_frames + 1; i++) {
1298                     s->input_picture[i]->b_frame_score = 0;
1299                 }
1300             } else if (s->avctx->b_frame_strategy == 2) {
1301                 b_frames = estimate_best_b_count(s);
1302             } else {
1303                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1304                 b_frames = 0;
1305             }
1306
1307             emms_c();
1308
1309             for (i = b_frames - 1; i >= 0; i--) {
1310                 int type = s->input_picture[i]->f->pict_type;
1311                 if (type && type != AV_PICTURE_TYPE_B)
1312                     b_frames = i;
1313             }
1314             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1315                 b_frames == s->max_b_frames) {
1316                 av_log(s->avctx, AV_LOG_ERROR,
1317                        "warning, too many b frames in a row\n");
1318             }
1319
1320             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1321                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1322                     s->gop_size > s->picture_in_gop_number) {
1323                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1324                 } else {
1325                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1326                         b_frames = 0;
1327                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1328                 }
1329             }
1330
1331             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1332                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1333                 b_frames--;
1334
1335             s->reordered_input_picture[0] = s->input_picture[b_frames];
1336             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1337                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1338             s->reordered_input_picture[0]->f->coded_picture_number =
1339                 s->coded_picture_number++;
1340             for (i = 0; i < b_frames; i++) {
1341                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1342                 s->reordered_input_picture[i + 1]->f->pict_type =
1343                     AV_PICTURE_TYPE_B;
1344                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1345                     s->coded_picture_number++;
1346             }
1347         }
1348     }
1349 no_output_pic:
1350     if (s->reordered_input_picture[0]) {
1351         s->reordered_input_picture[0]->reference =
1352            s->reordered_input_picture[0]->f->pict_type !=
1353                AV_PICTURE_TYPE_B ? 3 : 0;
1354
1355         ff_mpeg_unref_picture(s, &s->new_picture);
1356         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1357             return ret;
1358
1359         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1360             // input is a shared pix, so we can't modifiy it -> alloc a new
1361             // one & ensure that the shared one is reuseable
1362
1363             Picture *pic;
1364             int i = ff_find_unused_picture(s, 0);
1365             if (i < 0)
1366                 return i;
1367             pic = &s->picture[i];
1368
1369             pic->reference = s->reordered_input_picture[0]->reference;
1370             if (ff_alloc_picture(s, pic, 0) < 0) {
1371                 return -1;
1372             }
1373
1374             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1375             if (ret < 0)
1376                 return ret;
1377
1378             /* mark us unused / free shared pic */
1379             av_frame_unref(s->reordered_input_picture[0]->f);
1380             s->reordered_input_picture[0]->shared = 0;
1381
1382             s->current_picture_ptr = pic;
1383         } else {
1384             // input is not a shared pix -> reuse buffer for current_pix
1385             s->current_picture_ptr = s->reordered_input_picture[0];
1386             for (i = 0; i < 4; i++) {
1387                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1388             }
1389         }
1390         ff_mpeg_unref_picture(s, &s->current_picture);
1391         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1392                                        s->current_picture_ptr)) < 0)
1393             return ret;
1394
1395         s->picture_number = s->new_picture.f->display_picture_number;
1396     } else {
1397         ff_mpeg_unref_picture(s, &s->new_picture);
1398     }
1399     return 0;
1400 }
1401
1402 static void frame_end(MpegEncContext *s)
1403 {
1404     int i;
1405
1406     if (s->unrestricted_mv &&
1407         s->current_picture.reference &&
1408         !s->intra_only) {
1409         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1410         int hshift = desc->log2_chroma_w;
1411         int vshift = desc->log2_chroma_h;
1412         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1413                                 s->h_edge_pos, s->v_edge_pos,
1414                                 EDGE_WIDTH, EDGE_WIDTH,
1415                                 EDGE_TOP | EDGE_BOTTOM);
1416         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1417                                 s->h_edge_pos >> hshift,
1418                                 s->v_edge_pos >> vshift,
1419                                 EDGE_WIDTH >> hshift,
1420                                 EDGE_WIDTH >> vshift,
1421                                 EDGE_TOP | EDGE_BOTTOM);
1422         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1423                                 s->h_edge_pos >> hshift,
1424                                 s->v_edge_pos >> vshift,
1425                                 EDGE_WIDTH >> hshift,
1426                                 EDGE_WIDTH >> vshift,
1427                                 EDGE_TOP | EDGE_BOTTOM);
1428     }
1429
1430     emms_c();
1431
1432     s->last_pict_type                 = s->pict_type;
1433     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1434     if (s->pict_type!= AV_PICTURE_TYPE_B)
1435         s->last_non_b_pict_type = s->pict_type;
1436
1437     if (s->encoding) {
1438         /* release non-reference frames */
1439         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1440             if (!s->picture[i].reference)
1441                 ff_mpeg_unref_picture(s, &s->picture[i]);
1442         }
1443     }
1444
1445     s->avctx->coded_frame = s->current_picture_ptr->f;
1446
1447 }
1448
1449 static void update_noise_reduction(MpegEncContext *s)
1450 {
1451     int intra, i;
1452
1453     for (intra = 0; intra < 2; intra++) {
1454         if (s->dct_count[intra] > (1 << 16)) {
1455             for (i = 0; i < 64; i++) {
1456                 s->dct_error_sum[intra][i] >>= 1;
1457             }
1458             s->dct_count[intra] >>= 1;
1459         }
1460
1461         for (i = 0; i < 64; i++) {
1462             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1463                                        s->dct_count[intra] +
1464                                        s->dct_error_sum[intra][i] / 2) /
1465                                       (s->dct_error_sum[intra][i] + 1);
1466         }
1467     }
1468 }
1469
1470 static int frame_start(MpegEncContext *s)
1471 {
1472     int ret;
1473
1474     /* mark & release old frames */
1475     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1476         s->last_picture_ptr != s->next_picture_ptr &&
1477         s->last_picture_ptr->f->buf[0]) {
1478         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1479     }
1480
1481     s->current_picture_ptr->f->pict_type = s->pict_type;
1482     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1483
1484     ff_mpeg_unref_picture(s, &s->current_picture);
1485     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1486                                    s->current_picture_ptr)) < 0)
1487         return ret;
1488
1489     if (s->pict_type != AV_PICTURE_TYPE_B) {
1490         s->last_picture_ptr = s->next_picture_ptr;
1491         if (!s->droppable)
1492             s->next_picture_ptr = s->current_picture_ptr;
1493     }
1494
1495     if (s->last_picture_ptr) {
1496         ff_mpeg_unref_picture(s, &s->last_picture);
1497         if (s->last_picture_ptr->f->buf[0] &&
1498             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1499                                        s->last_picture_ptr)) < 0)
1500             return ret;
1501     }
1502     if (s->next_picture_ptr) {
1503         ff_mpeg_unref_picture(s, &s->next_picture);
1504         if (s->next_picture_ptr->f->buf[0] &&
1505             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1506                                        s->next_picture_ptr)) < 0)
1507             return ret;
1508     }
1509
1510     if (s->picture_structure!= PICT_FRAME) {
1511         int i;
1512         for (i = 0; i < 4; i++) {
1513             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1514                 s->current_picture.f->data[i] +=
1515                     s->current_picture.f->linesize[i];
1516             }
1517             s->current_picture.f->linesize[i] *= 2;
1518             s->last_picture.f->linesize[i]    *= 2;
1519             s->next_picture.f->linesize[i]    *= 2;
1520         }
1521     }
1522
1523     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1524         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1525         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1526     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1527         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1528         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1529     } else {
1530         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1531         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1532     }
1533
1534     if (s->dct_error_sum) {
1535         assert(s->avctx->noise_reduction && s->encoding);
1536         update_noise_reduction(s);
1537     }
1538
1539     return 0;
1540 }
1541
1542 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1543                           const AVFrame *pic_arg, int *got_packet)
1544 {
1545     MpegEncContext *s = avctx->priv_data;
1546     int i, stuffing_count, ret;
1547     int context_count = s->slice_context_count;
1548
1549     s->picture_in_gop_number++;
1550
1551     if (load_input_picture(s, pic_arg) < 0)
1552         return -1;
1553
1554     if (select_input_picture(s) < 0) {
1555         return -1;
1556     }
1557
1558     /* output? */
1559     if (s->new_picture.f->data[0]) {
1560         if (!pkt->data &&
1561             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1562             return ret;
1563         if (s->mb_info) {
1564             s->mb_info_ptr = av_packet_new_side_data(pkt,
1565                                  AV_PKT_DATA_H263_MB_INFO,
1566                                  s->mb_width*s->mb_height*12);
1567             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1568         }
1569
1570         for (i = 0; i < context_count; i++) {
1571             int start_y = s->thread_context[i]->start_mb_y;
1572             int   end_y = s->thread_context[i]->  end_mb_y;
1573             int h       = s->mb_height;
1574             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1575             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1576
1577             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1578         }
1579
1580         s->pict_type = s->new_picture.f->pict_type;
1581         //emms_c();
1582         ret = frame_start(s);
1583         if (ret < 0)
1584             return ret;
1585 vbv_retry:
1586         if (encode_picture(s, s->picture_number) < 0)
1587             return -1;
1588
1589         avctx->header_bits = s->header_bits;
1590         avctx->mv_bits     = s->mv_bits;
1591         avctx->misc_bits   = s->misc_bits;
1592         avctx->i_tex_bits  = s->i_tex_bits;
1593         avctx->p_tex_bits  = s->p_tex_bits;
1594         avctx->i_count     = s->i_count;
1595         // FIXME f/b_count in avctx
1596         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1597         avctx->skip_count  = s->skip_count;
1598
1599         frame_end(s);
1600
1601         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1602             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1603
1604         if (avctx->rc_buffer_size) {
1605             RateControlContext *rcc = &s->rc_context;
1606             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1607
1608             if (put_bits_count(&s->pb) > max_size &&
1609                 s->lambda < s->avctx->lmax) {
1610                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1611                                        (s->qscale + 1) / s->qscale);
1612                 if (s->adaptive_quant) {
1613                     int i;
1614                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1615                         s->lambda_table[i] =
1616                             FFMAX(s->lambda_table[i] + 1,
1617                                   s->lambda_table[i] * (s->qscale + 1) /
1618                                   s->qscale);
1619                 }
1620                 s->mb_skipped = 0;        // done in frame_start()
1621                 // done in encode_picture() so we must undo it
1622                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1623                     if (s->flipflop_rounding          ||
1624                         s->codec_id == AV_CODEC_ID_H263P ||
1625                         s->codec_id == AV_CODEC_ID_MPEG4)
1626                         s->no_rounding ^= 1;
1627                 }
1628                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1629                     s->time_base       = s->last_time_base;
1630                     s->last_non_b_time = s->time - s->pp_time;
1631                 }
1632                 for (i = 0; i < context_count; i++) {
1633                     PutBitContext *pb = &s->thread_context[i]->pb;
1634                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1635                 }
1636                 goto vbv_retry;
1637             }
1638
1639             assert(s->avctx->rc_max_rate);
1640         }
1641
1642         if (s->flags & CODEC_FLAG_PASS1)
1643             ff_write_pass1_stats(s);
1644
1645         for (i = 0; i < 4; i++) {
1646             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1647             avctx->error[i] += s->current_picture_ptr->f->error[i];
1648         }
1649
1650         if (s->flags & CODEC_FLAG_PASS1)
1651             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1652                    avctx->i_tex_bits + avctx->p_tex_bits ==
1653                        put_bits_count(&s->pb));
1654         flush_put_bits(&s->pb);
1655         s->frame_bits  = put_bits_count(&s->pb);
1656
1657         stuffing_count = ff_vbv_update(s, s->frame_bits);
1658         if (stuffing_count) {
1659             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1660                     stuffing_count + 50) {
1661                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1662                 return -1;
1663             }
1664
1665             switch (s->codec_id) {
1666             case AV_CODEC_ID_MPEG1VIDEO:
1667             case AV_CODEC_ID_MPEG2VIDEO:
1668                 while (stuffing_count--) {
1669                     put_bits(&s->pb, 8, 0);
1670                 }
1671             break;
1672             case AV_CODEC_ID_MPEG4:
1673                 put_bits(&s->pb, 16, 0);
1674                 put_bits(&s->pb, 16, 0x1C3);
1675                 stuffing_count -= 4;
1676                 while (stuffing_count--) {
1677                     put_bits(&s->pb, 8, 0xFF);
1678                 }
1679             break;
1680             default:
1681                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1682             }
1683             flush_put_bits(&s->pb);
1684             s->frame_bits  = put_bits_count(&s->pb);
1685         }
1686
1687         /* update mpeg1/2 vbv_delay for CBR */
1688         if (s->avctx->rc_max_rate                          &&
1689             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1690             s->out_format == FMT_MPEG1                     &&
1691             90000LL * (avctx->rc_buffer_size - 1) <=
1692                 s->avctx->rc_max_rate * 0xFFFFLL) {
1693             int vbv_delay, min_delay;
1694             double inbits  = s->avctx->rc_max_rate *
1695                              av_q2d(s->avctx->time_base);
1696             int    minbits = s->frame_bits - 8 *
1697                              (s->vbv_delay_ptr - s->pb.buf - 1);
1698             double bits    = s->rc_context.buffer_index + minbits - inbits;
1699
1700             if (bits < 0)
1701                 av_log(s->avctx, AV_LOG_ERROR,
1702                        "Internal error, negative bits\n");
1703
1704             assert(s->repeat_first_field == 0);
1705
1706             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1707             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1708                         s->avctx->rc_max_rate;
1709
1710             vbv_delay = FFMAX(vbv_delay, min_delay);
1711
1712             assert(vbv_delay < 0xFFFF);
1713
1714             s->vbv_delay_ptr[0] &= 0xF8;
1715             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1716             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1717             s->vbv_delay_ptr[2] &= 0x07;
1718             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1719             avctx->vbv_delay     = vbv_delay * 300;
1720         }
1721         s->total_bits     += s->frame_bits;
1722         avctx->frame_bits  = s->frame_bits;
1723
1724         pkt->pts = s->current_picture.f->pts;
1725         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1726             if (!s->current_picture.f->coded_picture_number)
1727                 pkt->dts = pkt->pts - s->dts_delta;
1728             else
1729                 pkt->dts = s->reordered_pts;
1730             s->reordered_pts = pkt->pts;
1731         } else
1732             pkt->dts = pkt->pts;
1733         if (s->current_picture.f->key_frame)
1734             pkt->flags |= AV_PKT_FLAG_KEY;
1735         if (s->mb_info)
1736             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1737     } else {
1738         s->frame_bits = 0;
1739     }
1740     assert((s->frame_bits & 7) == 0);
1741
1742     pkt->size = s->frame_bits / 8;
1743     *got_packet = !!pkt->size;
1744     return 0;
1745 }
1746
1747 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1748                                                 int n, int threshold)
1749 {
1750     static const char tab[64] = {
1751         3, 2, 2, 1, 1, 1, 1, 1,
1752         1, 1, 1, 1, 1, 1, 1, 1,
1753         1, 1, 1, 1, 1, 1, 1, 1,
1754         0, 0, 0, 0, 0, 0, 0, 0,
1755         0, 0, 0, 0, 0, 0, 0, 0,
1756         0, 0, 0, 0, 0, 0, 0, 0,
1757         0, 0, 0, 0, 0, 0, 0, 0,
1758         0, 0, 0, 0, 0, 0, 0, 0
1759     };
1760     int score = 0;
1761     int run = 0;
1762     int i;
1763     int16_t *block = s->block[n];
1764     const int last_index = s->block_last_index[n];
1765     int skip_dc;
1766
1767     if (threshold < 0) {
1768         skip_dc = 0;
1769         threshold = -threshold;
1770     } else
1771         skip_dc = 1;
1772
1773     /* Are all we could set to zero already zero? */
1774     if (last_index <= skip_dc - 1)
1775         return;
1776
1777     for (i = 0; i <= last_index; i++) {
1778         const int j = s->intra_scantable.permutated[i];
1779         const int level = FFABS(block[j]);
1780         if (level == 1) {
1781             if (skip_dc && i == 0)
1782                 continue;
1783             score += tab[run];
1784             run = 0;
1785         } else if (level > 1) {
1786             return;
1787         } else {
1788             run++;
1789         }
1790     }
1791     if (score >= threshold)
1792         return;
1793     for (i = skip_dc; i <= last_index; i++) {
1794         const int j = s->intra_scantable.permutated[i];
1795         block[j] = 0;
1796     }
1797     if (block[0])
1798         s->block_last_index[n] = 0;
1799     else
1800         s->block_last_index[n] = -1;
1801 }
1802
1803 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1804                                int last_index)
1805 {
1806     int i;
1807     const int maxlevel = s->max_qcoeff;
1808     const int minlevel = s->min_qcoeff;
1809     int overflow = 0;
1810
1811     if (s->mb_intra) {
1812         i = 1; // skip clipping of intra dc
1813     } else
1814         i = 0;
1815
1816     for (; i <= last_index; i++) {
1817         const int j = s->intra_scantable.permutated[i];
1818         int level = block[j];
1819
1820         if (level > maxlevel) {
1821             level = maxlevel;
1822             overflow++;
1823         } else if (level < minlevel) {
1824             level = minlevel;
1825             overflow++;
1826         }
1827
1828         block[j] = level;
1829     }
1830
1831     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1832         av_log(s->avctx, AV_LOG_INFO,
1833                "warning, clipping %d dct coefficients to %d..%d\n",
1834                overflow, minlevel, maxlevel);
1835 }
1836
1837 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1838 {
1839     int x, y;
1840     // FIXME optimize
1841     for (y = 0; y < 8; y++) {
1842         for (x = 0; x < 8; x++) {
1843             int x2, y2;
1844             int sum = 0;
1845             int sqr = 0;
1846             int count = 0;
1847
1848             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1849                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1850                     int v = ptr[x2 + y2 * stride];
1851                     sum += v;
1852                     sqr += v * v;
1853                     count++;
1854                 }
1855             }
1856             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1857         }
1858     }
1859 }
1860
1861 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1862                                                 int motion_x, int motion_y,
1863                                                 int mb_block_height,
1864                                                 int mb_block_count)
1865 {
1866     int16_t weight[8][64];
1867     int16_t orig[8][64];
1868     const int mb_x = s->mb_x;
1869     const int mb_y = s->mb_y;
1870     int i;
1871     int skip_dct[8];
1872     int dct_offset = s->linesize * 8; // default for progressive frames
1873     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1874     ptrdiff_t wrap_y, wrap_c;
1875
1876     for (i = 0; i < mb_block_count; i++)
1877         skip_dct[i] = s->skipdct;
1878
1879     if (s->adaptive_quant) {
1880         const int last_qp = s->qscale;
1881         const int mb_xy = mb_x + mb_y * s->mb_stride;
1882
1883         s->lambda = s->lambda_table[mb_xy];
1884         update_qscale(s);
1885
1886         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1887             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1888             s->dquant = s->qscale - last_qp;
1889
1890             if (s->out_format == FMT_H263) {
1891                 s->dquant = av_clip(s->dquant, -2, 2);
1892
1893                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1894                     if (!s->mb_intra) {
1895                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1896                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1897                                 s->dquant = 0;
1898                         }
1899                         if (s->mv_type == MV_TYPE_8X8)
1900                             s->dquant = 0;
1901                     }
1902                 }
1903             }
1904         }
1905         ff_set_qscale(s, last_qp + s->dquant);
1906     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1907         ff_set_qscale(s, s->qscale + s->dquant);
1908
1909     wrap_y = s->linesize;
1910     wrap_c = s->uvlinesize;
1911     ptr_y  = s->new_picture.f->data[0] +
1912              (mb_y * 16 * wrap_y)              + mb_x * 16;
1913     ptr_cb = s->new_picture.f->data[1] +
1914              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1915     ptr_cr = s->new_picture.f->data[2] +
1916              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1917
1918     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1919         uint8_t *ebuf = s->edge_emu_buffer + 32;
1920         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1921                                  wrap_y, wrap_y,
1922                                  16, 16, mb_x * 16, mb_y * 16,
1923                                  s->width, s->height);
1924         ptr_y = ebuf;
1925         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1926                                  wrap_c, wrap_c,
1927                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1928                                  s->width >> 1, s->height >> 1);
1929         ptr_cb = ebuf + 18 * wrap_y;
1930         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1931                                  wrap_c, wrap_c,
1932                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1933                                  s->width >> 1, s->height >> 1);
1934         ptr_cr = ebuf + 18 * wrap_y + 8;
1935     }
1936
1937     if (s->mb_intra) {
1938         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1939             int progressive_score, interlaced_score;
1940
1941             s->interlaced_dct = 0;
1942             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
1943                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1944                                                      NULL, wrap_y, 8) - 400;
1945
1946             if (progressive_score > 0) {
1947                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
1948                                                         NULL, wrap_y * 2, 8) +
1949                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
1950                                                         NULL, wrap_y * 2, 8);
1951                 if (progressive_score > interlaced_score) {
1952                     s->interlaced_dct = 1;
1953
1954                     dct_offset = wrap_y;
1955                     wrap_y <<= 1;
1956                     if (s->chroma_format == CHROMA_422)
1957                         wrap_c <<= 1;
1958                 }
1959             }
1960         }
1961
1962         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
1963         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
1964         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
1965         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
1966
1967         if (s->flags & CODEC_FLAG_GRAY) {
1968             skip_dct[4] = 1;
1969             skip_dct[5] = 1;
1970         } else {
1971             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1972             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1973             if (!s->chroma_y_shift) { /* 422 */
1974                 s->pdsp.get_pixels(s->block[6],
1975                                    ptr_cb + (dct_offset >> 1), wrap_c);
1976                 s->pdsp.get_pixels(s->block[7],
1977                                    ptr_cr + (dct_offset >> 1), wrap_c);
1978             }
1979         }
1980     } else {
1981         op_pixels_func (*op_pix)[4];
1982         qpel_mc_func (*op_qpix)[16];
1983         uint8_t *dest_y, *dest_cb, *dest_cr;
1984
1985         dest_y  = s->dest[0];
1986         dest_cb = s->dest[1];
1987         dest_cr = s->dest[2];
1988
1989         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1990             op_pix  = s->hdsp.put_pixels_tab;
1991             op_qpix = s->qdsp.put_qpel_pixels_tab;
1992         } else {
1993             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1994             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
1995         }
1996
1997         if (s->mv_dir & MV_DIR_FORWARD) {
1998             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
1999                           s->last_picture.f->data,
2000                           op_pix, op_qpix);
2001             op_pix  = s->hdsp.avg_pixels_tab;
2002             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2003         }
2004         if (s->mv_dir & MV_DIR_BACKWARD) {
2005             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2006                           s->next_picture.f->data,
2007                           op_pix, op_qpix);
2008         }
2009
2010         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2011             int progressive_score, interlaced_score;
2012
2013             s->interlaced_dct = 0;
2014             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2015                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2016                                                      ptr_y + wrap_y * 8,
2017                                                      wrap_y, 8) - 400;
2018
2019             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2020                 progressive_score -= 400;
2021
2022             if (progressive_score > 0) {
2023                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2024                                                         wrap_y * 2, 8) +
2025                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2026                                                         ptr_y + wrap_y,
2027                                                         wrap_y * 2, 8);
2028
2029                 if (progressive_score > interlaced_score) {
2030                     s->interlaced_dct = 1;
2031
2032                     dct_offset = wrap_y;
2033                     wrap_y <<= 1;
2034                     if (s->chroma_format == CHROMA_422)
2035                         wrap_c <<= 1;
2036                 }
2037             }
2038         }
2039
2040         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2041         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2042         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2043                             dest_y + dct_offset, wrap_y);
2044         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2045                             dest_y + dct_offset + 8, wrap_y);
2046
2047         if (s->flags & CODEC_FLAG_GRAY) {
2048             skip_dct[4] = 1;
2049             skip_dct[5] = 1;
2050         } else {
2051             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2052             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2053             if (!s->chroma_y_shift) { /* 422 */
2054                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2055                                     dest_cb + (dct_offset >> 1), wrap_c);
2056                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2057                                     dest_cr + (dct_offset >> 1), wrap_c);
2058             }
2059         }
2060         /* pre quantization */
2061         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2062                 2 * s->qscale * s->qscale) {
2063             // FIXME optimize
2064             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2065                 skip_dct[0] = 1;
2066             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2067                 skip_dct[1] = 1;
2068             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2069                                wrap_y, 8) < 20 * s->qscale)
2070                 skip_dct[2] = 1;
2071             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2072                                wrap_y, 8) < 20 * s->qscale)
2073                 skip_dct[3] = 1;
2074             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2075                 skip_dct[4] = 1;
2076             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2077                 skip_dct[5] = 1;
2078             if (!s->chroma_y_shift) { /* 422 */
2079                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2080                                    dest_cb + (dct_offset >> 1),
2081                                    wrap_c, 8) < 20 * s->qscale)
2082                     skip_dct[6] = 1;
2083                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2084                                    dest_cr + (dct_offset >> 1),
2085                                    wrap_c, 8) < 20 * s->qscale)
2086                     skip_dct[7] = 1;
2087             }
2088         }
2089     }
2090
2091     if (s->quantizer_noise_shaping) {
2092         if (!skip_dct[0])
2093             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2094         if (!skip_dct[1])
2095             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2096         if (!skip_dct[2])
2097             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2098         if (!skip_dct[3])
2099             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2100         if (!skip_dct[4])
2101             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2102         if (!skip_dct[5])
2103             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2104         if (!s->chroma_y_shift) { /* 422 */
2105             if (!skip_dct[6])
2106                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2107                                   wrap_c);
2108             if (!skip_dct[7])
2109                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2110                                   wrap_c);
2111         }
2112         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2113     }
2114
2115     /* DCT & quantize */
2116     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2117     {
2118         for (i = 0; i < mb_block_count; i++) {
2119             if (!skip_dct[i]) {
2120                 int overflow;
2121                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2122                 // FIXME we could decide to change to quantizer instead of
2123                 // clipping
2124                 // JS: I don't think that would be a good idea it could lower
2125                 //     quality instead of improve it. Just INTRADC clipping
2126                 //     deserves changes in quantizer
2127                 if (overflow)
2128                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2129             } else
2130                 s->block_last_index[i] = -1;
2131         }
2132         if (s->quantizer_noise_shaping) {
2133             for (i = 0; i < mb_block_count; i++) {
2134                 if (!skip_dct[i]) {
2135                     s->block_last_index[i] =
2136                         dct_quantize_refine(s, s->block[i], weight[i],
2137                                             orig[i], i, s->qscale);
2138                 }
2139             }
2140         }
2141
2142         if (s->luma_elim_threshold && !s->mb_intra)
2143             for (i = 0; i < 4; i++)
2144                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2145         if (s->chroma_elim_threshold && !s->mb_intra)
2146             for (i = 4; i < mb_block_count; i++)
2147                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2148
2149         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2150             for (i = 0; i < mb_block_count; i++) {
2151                 if (s->block_last_index[i] == -1)
2152                     s->coded_score[i] = INT_MAX / 256;
2153             }
2154         }
2155     }
2156
2157     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2158         s->block_last_index[4] =
2159         s->block_last_index[5] = 0;
2160         s->block[4][0] =
2161         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2162     }
2163
2164     // non c quantize code returns incorrect block_last_index FIXME
2165     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2166         for (i = 0; i < mb_block_count; i++) {
2167             int j;
2168             if (s->block_last_index[i] > 0) {
2169                 for (j = 63; j > 0; j--) {
2170                     if (s->block[i][s->intra_scantable.permutated[j]])
2171                         break;
2172                 }
2173                 s->block_last_index[i] = j;
2174             }
2175         }
2176     }
2177
2178     /* huffman encode */
2179     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2180     case AV_CODEC_ID_MPEG1VIDEO:
2181     case AV_CODEC_ID_MPEG2VIDEO:
2182         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2183             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2184         break;
2185     case AV_CODEC_ID_MPEG4:
2186         if (CONFIG_MPEG4_ENCODER)
2187             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2188         break;
2189     case AV_CODEC_ID_MSMPEG4V2:
2190     case AV_CODEC_ID_MSMPEG4V3:
2191     case AV_CODEC_ID_WMV1:
2192         if (CONFIG_MSMPEG4_ENCODER)
2193             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2194         break;
2195     case AV_CODEC_ID_WMV2:
2196         if (CONFIG_WMV2_ENCODER)
2197             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2198         break;
2199     case AV_CODEC_ID_H261:
2200         if (CONFIG_H261_ENCODER)
2201             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2202         break;
2203     case AV_CODEC_ID_H263:
2204     case AV_CODEC_ID_H263P:
2205     case AV_CODEC_ID_FLV1:
2206     case AV_CODEC_ID_RV10:
2207     case AV_CODEC_ID_RV20:
2208         if (CONFIG_H263_ENCODER)
2209             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2210         break;
2211     case AV_CODEC_ID_MJPEG:
2212         if (CONFIG_MJPEG_ENCODER)
2213             ff_mjpeg_encode_mb(s, s->block);
2214         break;
2215     default:
2216         assert(0);
2217     }
2218 }
2219
2220 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2221 {
2222     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2223     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2224 }
2225
2226 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2227     int i;
2228
2229     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2230
2231     /* mpeg1 */
2232     d->mb_skip_run= s->mb_skip_run;
2233     for(i=0; i<3; i++)
2234         d->last_dc[i] = s->last_dc[i];
2235
2236     /* statistics */
2237     d->mv_bits= s->mv_bits;
2238     d->i_tex_bits= s->i_tex_bits;
2239     d->p_tex_bits= s->p_tex_bits;
2240     d->i_count= s->i_count;
2241     d->f_count= s->f_count;
2242     d->b_count= s->b_count;
2243     d->skip_count= s->skip_count;
2244     d->misc_bits= s->misc_bits;
2245     d->last_bits= 0;
2246
2247     d->mb_skipped= 0;
2248     d->qscale= s->qscale;
2249     d->dquant= s->dquant;
2250
2251     d->esc3_level_length= s->esc3_level_length;
2252 }
2253
2254 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2255     int i;
2256
2257     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2258     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2259
2260     /* mpeg1 */
2261     d->mb_skip_run= s->mb_skip_run;
2262     for(i=0; i<3; i++)
2263         d->last_dc[i] = s->last_dc[i];
2264
2265     /* statistics */
2266     d->mv_bits= s->mv_bits;
2267     d->i_tex_bits= s->i_tex_bits;
2268     d->p_tex_bits= s->p_tex_bits;
2269     d->i_count= s->i_count;
2270     d->f_count= s->f_count;
2271     d->b_count= s->b_count;
2272     d->skip_count= s->skip_count;
2273     d->misc_bits= s->misc_bits;
2274
2275     d->mb_intra= s->mb_intra;
2276     d->mb_skipped= s->mb_skipped;
2277     d->mv_type= s->mv_type;
2278     d->mv_dir= s->mv_dir;
2279     d->pb= s->pb;
2280     if(s->data_partitioning){
2281         d->pb2= s->pb2;
2282         d->tex_pb= s->tex_pb;
2283     }
2284     d->block= s->block;
2285     for(i=0; i<8; i++)
2286         d->block_last_index[i]= s->block_last_index[i];
2287     d->interlaced_dct= s->interlaced_dct;
2288     d->qscale= s->qscale;
2289
2290     d->esc3_level_length= s->esc3_level_length;
2291 }
2292
2293 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2294                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2295                            int *dmin, int *next_block, int motion_x, int motion_y)
2296 {
2297     int score;
2298     uint8_t *dest_backup[3];
2299
2300     copy_context_before_encode(s, backup, type);
2301
2302     s->block= s->blocks[*next_block];
2303     s->pb= pb[*next_block];
2304     if(s->data_partitioning){
2305         s->pb2   = pb2   [*next_block];
2306         s->tex_pb= tex_pb[*next_block];
2307     }
2308
2309     if(*next_block){
2310         memcpy(dest_backup, s->dest, sizeof(s->dest));
2311         s->dest[0] = s->rd_scratchpad;
2312         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2313         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2314         assert(s->linesize >= 32); //FIXME
2315     }
2316
2317     encode_mb(s, motion_x, motion_y);
2318
2319     score= put_bits_count(&s->pb);
2320     if(s->data_partitioning){
2321         score+= put_bits_count(&s->pb2);
2322         score+= put_bits_count(&s->tex_pb);
2323     }
2324
2325     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2326         ff_mpv_decode_mb(s, s->block);
2327
2328         score *= s->lambda2;
2329         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2330     }
2331
2332     if(*next_block){
2333         memcpy(s->dest, dest_backup, sizeof(s->dest));
2334     }
2335
2336     if(score<*dmin){
2337         *dmin= score;
2338         *next_block^=1;
2339
2340         copy_context_after_encode(best, s, type);
2341     }
2342 }
2343
2344 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2345     uint32_t *sq = ff_square_tab + 256;
2346     int acc=0;
2347     int x,y;
2348
2349     if(w==16 && h==16)
2350         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2351     else if(w==8 && h==8)
2352         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2353
2354     for(y=0; y<h; y++){
2355         for(x=0; x<w; x++){
2356             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2357         }
2358     }
2359
2360     assert(acc>=0);
2361
2362     return acc;
2363 }
2364
2365 static int sse_mb(MpegEncContext *s){
2366     int w= 16;
2367     int h= 16;
2368
2369     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2370     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2371
2372     if(w==16 && h==16)
2373       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2374         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2375                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2376                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2377       }else{
2378         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2379                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2380                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2381       }
2382     else
2383         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2384                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2385                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2386 }
2387
2388 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2389     MpegEncContext *s= *(void**)arg;
2390
2391
2392     s->me.pre_pass=1;
2393     s->me.dia_size= s->avctx->pre_dia_size;
2394     s->first_slice_line=1;
2395     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2396         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2397             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2398         }
2399         s->first_slice_line=0;
2400     }
2401
2402     s->me.pre_pass=0;
2403
2404     return 0;
2405 }
2406
2407 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2408     MpegEncContext *s= *(void**)arg;
2409
2410     s->me.dia_size= s->avctx->dia_size;
2411     s->first_slice_line=1;
2412     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2413         s->mb_x=0; //for block init below
2414         ff_init_block_index(s);
2415         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2416             s->block_index[0]+=2;
2417             s->block_index[1]+=2;
2418             s->block_index[2]+=2;
2419             s->block_index[3]+=2;
2420
2421             /* compute motion vector & mb_type and store in context */
2422             if(s->pict_type==AV_PICTURE_TYPE_B)
2423                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2424             else
2425                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2426         }
2427         s->first_slice_line=0;
2428     }
2429     return 0;
2430 }
2431
2432 static int mb_var_thread(AVCodecContext *c, void *arg){
2433     MpegEncContext *s= *(void**)arg;
2434     int mb_x, mb_y;
2435
2436     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2437         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2438             int xx = mb_x * 16;
2439             int yy = mb_y * 16;
2440             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2441             int varc;
2442             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2443
2444             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2445                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2446
2447             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2448             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2449             s->me.mb_var_sum_temp    += varc;
2450         }
2451     }
2452     return 0;
2453 }
2454
2455 static void write_slice_end(MpegEncContext *s){
2456     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2457         if(s->partitioned_frame){
2458             ff_mpeg4_merge_partitions(s);
2459         }
2460
2461         ff_mpeg4_stuffing(&s->pb);
2462     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2463         ff_mjpeg_encode_stuffing(&s->pb);
2464     }
2465
2466     avpriv_align_put_bits(&s->pb);
2467     flush_put_bits(&s->pb);
2468
2469     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2470         s->misc_bits+= get_bits_diff(s);
2471 }
2472
2473 static void write_mb_info(MpegEncContext *s)
2474 {
2475     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2476     int offset = put_bits_count(&s->pb);
2477     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2478     int gobn = s->mb_y / s->gob_index;
2479     int pred_x, pred_y;
2480     if (CONFIG_H263_ENCODER)
2481         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2482     bytestream_put_le32(&ptr, offset);
2483     bytestream_put_byte(&ptr, s->qscale);
2484     bytestream_put_byte(&ptr, gobn);
2485     bytestream_put_le16(&ptr, mba);
2486     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2487     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2488     /* 4MV not implemented */
2489     bytestream_put_byte(&ptr, 0); /* hmv2 */
2490     bytestream_put_byte(&ptr, 0); /* vmv2 */
2491 }
2492
2493 static void update_mb_info(MpegEncContext *s, int startcode)
2494 {
2495     if (!s->mb_info)
2496         return;
2497     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2498         s->mb_info_size += 12;
2499         s->prev_mb_info = s->last_mb_info;
2500     }
2501     if (startcode) {
2502         s->prev_mb_info = put_bits_count(&s->pb)/8;
2503         /* This might have incremented mb_info_size above, and we return without
2504          * actually writing any info into that slot yet. But in that case,
2505          * this will be called again at the start of the after writing the
2506          * start code, actually writing the mb info. */
2507         return;
2508     }
2509
2510     s->last_mb_info = put_bits_count(&s->pb)/8;
2511     if (!s->mb_info_size)
2512         s->mb_info_size += 12;
2513     write_mb_info(s);
2514 }
2515
2516 static int encode_thread(AVCodecContext *c, void *arg){
2517     MpegEncContext *s= *(void**)arg;
2518     int mb_x, mb_y, pdif = 0;
2519     int chr_h= 16>>s->chroma_y_shift;
2520     int i, j;
2521     MpegEncContext best_s, backup_s;
2522     uint8_t bit_buf[2][MAX_MB_BYTES];
2523     uint8_t bit_buf2[2][MAX_MB_BYTES];
2524     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2525     PutBitContext pb[2], pb2[2], tex_pb[2];
2526
2527     for(i=0; i<2; i++){
2528         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2529         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2530         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2531     }
2532
2533     s->last_bits= put_bits_count(&s->pb);
2534     s->mv_bits=0;
2535     s->misc_bits=0;
2536     s->i_tex_bits=0;
2537     s->p_tex_bits=0;
2538     s->i_count=0;
2539     s->f_count=0;
2540     s->b_count=0;
2541     s->skip_count=0;
2542
2543     for(i=0; i<3; i++){
2544         /* init last dc values */
2545         /* note: quant matrix value (8) is implied here */
2546         s->last_dc[i] = 128 << s->intra_dc_precision;
2547
2548         s->current_picture.f->error[i] = 0;
2549     }
2550     s->mb_skip_run = 0;
2551     memset(s->last_mv, 0, sizeof(s->last_mv));
2552
2553     s->last_mv_dir = 0;
2554
2555     switch(s->codec_id){
2556     case AV_CODEC_ID_H263:
2557     case AV_CODEC_ID_H263P:
2558     case AV_CODEC_ID_FLV1:
2559         if (CONFIG_H263_ENCODER)
2560             s->gob_index = ff_h263_get_gob_height(s);
2561         break;
2562     case AV_CODEC_ID_MPEG4:
2563         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2564             ff_mpeg4_init_partitions(s);
2565         break;
2566     }
2567
2568     s->resync_mb_x=0;
2569     s->resync_mb_y=0;
2570     s->first_slice_line = 1;
2571     s->ptr_lastgob = s->pb.buf;
2572     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2573         s->mb_x=0;
2574         s->mb_y= mb_y;
2575
2576         ff_set_qscale(s, s->qscale);
2577         ff_init_block_index(s);
2578
2579         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2580             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2581             int mb_type= s->mb_type[xy];
2582 //            int d;
2583             int dmin= INT_MAX;
2584             int dir;
2585
2586             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2587                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2588                 return -1;
2589             }
2590             if(s->data_partitioning){
2591                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2592                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2593                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2594                     return -1;
2595                 }
2596             }
2597
2598             s->mb_x = mb_x;
2599             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2600             ff_update_block_index(s);
2601
2602             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2603                 ff_h261_reorder_mb_index(s);
2604                 xy= s->mb_y*s->mb_stride + s->mb_x;
2605                 mb_type= s->mb_type[xy];
2606             }
2607
2608             /* write gob / video packet header  */
2609             if(s->rtp_mode){
2610                 int current_packet_size, is_gob_start;
2611
2612                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2613
2614                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2615
2616                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2617
2618                 switch(s->codec_id){
2619                 case AV_CODEC_ID_H263:
2620                 case AV_CODEC_ID_H263P:
2621                     if(!s->h263_slice_structured)
2622                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2623                     break;
2624                 case AV_CODEC_ID_MPEG2VIDEO:
2625                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2626                 case AV_CODEC_ID_MPEG1VIDEO:
2627                     if(s->mb_skip_run) is_gob_start=0;
2628                     break;
2629                 }
2630
2631                 if(is_gob_start){
2632                     if(s->start_mb_y != mb_y || mb_x!=0){
2633                         write_slice_end(s);
2634
2635                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2636                             ff_mpeg4_init_partitions(s);
2637                         }
2638                     }
2639
2640                     assert((put_bits_count(&s->pb)&7) == 0);
2641                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2642
2643                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2644                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2645                         int d = 100 / s->error_rate;
2646                         if(r % d == 0){
2647                             current_packet_size=0;
2648                             s->pb.buf_ptr= s->ptr_lastgob;
2649                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2650                         }
2651                     }
2652
2653                     if (s->avctx->rtp_callback){
2654                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2655                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2656                     }
2657                     update_mb_info(s, 1);
2658
2659                     switch(s->codec_id){
2660                     case AV_CODEC_ID_MPEG4:
2661                         if (CONFIG_MPEG4_ENCODER) {
2662                             ff_mpeg4_encode_video_packet_header(s);
2663                             ff_mpeg4_clean_buffers(s);
2664                         }
2665                     break;
2666                     case AV_CODEC_ID_MPEG1VIDEO:
2667                     case AV_CODEC_ID_MPEG2VIDEO:
2668                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2669                             ff_mpeg1_encode_slice_header(s);
2670                             ff_mpeg1_clean_buffers(s);
2671                         }
2672                     break;
2673                     case AV_CODEC_ID_H263:
2674                     case AV_CODEC_ID_H263P:
2675                         if (CONFIG_H263_ENCODER)
2676                             ff_h263_encode_gob_header(s, mb_y);
2677                     break;
2678                     }
2679
2680                     if(s->flags&CODEC_FLAG_PASS1){
2681                         int bits= put_bits_count(&s->pb);
2682                         s->misc_bits+= bits - s->last_bits;
2683                         s->last_bits= bits;
2684                     }
2685
2686                     s->ptr_lastgob += current_packet_size;
2687                     s->first_slice_line=1;
2688                     s->resync_mb_x=mb_x;
2689                     s->resync_mb_y=mb_y;
2690                 }
2691             }
2692
2693             if(  (s->resync_mb_x   == s->mb_x)
2694                && s->resync_mb_y+1 == s->mb_y){
2695                 s->first_slice_line=0;
2696             }
2697
2698             s->mb_skipped=0;
2699             s->dquant=0; //only for QP_RD
2700
2701             update_mb_info(s, 0);
2702
2703             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2704                 int next_block=0;
2705                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2706
2707                 copy_context_before_encode(&backup_s, s, -1);
2708                 backup_s.pb= s->pb;
2709                 best_s.data_partitioning= s->data_partitioning;
2710                 best_s.partitioned_frame= s->partitioned_frame;
2711                 if(s->data_partitioning){
2712                     backup_s.pb2= s->pb2;
2713                     backup_s.tex_pb= s->tex_pb;
2714                 }
2715
2716                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2717                     s->mv_dir = MV_DIR_FORWARD;
2718                     s->mv_type = MV_TYPE_16X16;
2719                     s->mb_intra= 0;
2720                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2721                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2722                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2723                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2724                 }
2725                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2726                     s->mv_dir = MV_DIR_FORWARD;
2727                     s->mv_type = MV_TYPE_FIELD;
2728                     s->mb_intra= 0;
2729                     for(i=0; i<2; i++){
2730                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2731                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2732                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2733                     }
2734                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2735                                  &dmin, &next_block, 0, 0);
2736                 }
2737                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2738                     s->mv_dir = MV_DIR_FORWARD;
2739                     s->mv_type = MV_TYPE_16X16;
2740                     s->mb_intra= 0;
2741                     s->mv[0][0][0] = 0;
2742                     s->mv[0][0][1] = 0;
2743                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2744                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2745                 }
2746                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2747                     s->mv_dir = MV_DIR_FORWARD;
2748                     s->mv_type = MV_TYPE_8X8;
2749                     s->mb_intra= 0;
2750                     for(i=0; i<4; i++){
2751                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2752                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2753                     }
2754                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2755                                  &dmin, &next_block, 0, 0);
2756                 }
2757                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2758                     s->mv_dir = MV_DIR_FORWARD;
2759                     s->mv_type = MV_TYPE_16X16;
2760                     s->mb_intra= 0;
2761                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2762                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2763                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2764                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2765                 }
2766                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2767                     s->mv_dir = MV_DIR_BACKWARD;
2768                     s->mv_type = MV_TYPE_16X16;
2769                     s->mb_intra= 0;
2770                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2771                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2772                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2773                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2774                 }
2775                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2776                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2777                     s->mv_type = MV_TYPE_16X16;
2778                     s->mb_intra= 0;
2779                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2780                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2781                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2782                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2783                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2784                                  &dmin, &next_block, 0, 0);
2785                 }
2786                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2787                     s->mv_dir = MV_DIR_FORWARD;
2788                     s->mv_type = MV_TYPE_FIELD;
2789                     s->mb_intra= 0;
2790                     for(i=0; i<2; i++){
2791                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2792                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2793                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2794                     }
2795                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2796                                  &dmin, &next_block, 0, 0);
2797                 }
2798                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2799                     s->mv_dir = MV_DIR_BACKWARD;
2800                     s->mv_type = MV_TYPE_FIELD;
2801                     s->mb_intra= 0;
2802                     for(i=0; i<2; i++){
2803                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2804                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2805                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2806                     }
2807                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2808                                  &dmin, &next_block, 0, 0);
2809                 }
2810                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2811                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2812                     s->mv_type = MV_TYPE_FIELD;
2813                     s->mb_intra= 0;
2814                     for(dir=0; dir<2; dir++){
2815                         for(i=0; i<2; i++){
2816                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2817                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2818                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2819                         }
2820                     }
2821                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2822                                  &dmin, &next_block, 0, 0);
2823                 }
2824                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2825                     s->mv_dir = 0;
2826                     s->mv_type = MV_TYPE_16X16;
2827                     s->mb_intra= 1;
2828                     s->mv[0][0][0] = 0;
2829                     s->mv[0][0][1] = 0;
2830                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2831                                  &dmin, &next_block, 0, 0);
2832                     if(s->h263_pred || s->h263_aic){
2833                         if(best_s.mb_intra)
2834                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2835                         else
2836                             ff_clean_intra_table_entries(s); //old mode?
2837                     }
2838                 }
2839
2840                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2841                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2842                         const int last_qp= backup_s.qscale;
2843                         int qpi, qp, dc[6];
2844                         int16_t ac[6][16];
2845                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2846                         static const int dquant_tab[4]={-1,1,-2,2};
2847
2848                         assert(backup_s.dquant == 0);
2849
2850                         //FIXME intra
2851                         s->mv_dir= best_s.mv_dir;
2852                         s->mv_type = MV_TYPE_16X16;
2853                         s->mb_intra= best_s.mb_intra;
2854                         s->mv[0][0][0] = best_s.mv[0][0][0];
2855                         s->mv[0][0][1] = best_s.mv[0][0][1];
2856                         s->mv[1][0][0] = best_s.mv[1][0][0];
2857                         s->mv[1][0][1] = best_s.mv[1][0][1];
2858
2859                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2860                         for(; qpi<4; qpi++){
2861                             int dquant= dquant_tab[qpi];
2862                             qp= last_qp + dquant;
2863                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2864                                 continue;
2865                             backup_s.dquant= dquant;
2866                             if(s->mb_intra && s->dc_val[0]){
2867                                 for(i=0; i<6; i++){
2868                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2869                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2870                                 }
2871                             }
2872
2873                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2874                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2875                             if(best_s.qscale != qp){
2876                                 if(s->mb_intra && s->dc_val[0]){
2877                                     for(i=0; i<6; i++){
2878                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2879                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2880                                     }
2881                                 }
2882                             }
2883                         }
2884                     }
2885                 }
2886                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2887                     int mx= s->b_direct_mv_table[xy][0];
2888                     int my= s->b_direct_mv_table[xy][1];
2889
2890                     backup_s.dquant = 0;
2891                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2892                     s->mb_intra= 0;
2893                     ff_mpeg4_set_direct_mv(s, mx, my);
2894                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2895                                  &dmin, &next_block, mx, my);
2896                 }
2897                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2898                     backup_s.dquant = 0;
2899                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2900                     s->mb_intra= 0;
2901                     ff_mpeg4_set_direct_mv(s, 0, 0);
2902                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2903                                  &dmin, &next_block, 0, 0);
2904                 }
2905                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2906                     int coded=0;
2907                     for(i=0; i<6; i++)
2908                         coded |= s->block_last_index[i];
2909                     if(coded){
2910                         int mx,my;
2911                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2912                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2913                             mx=my=0; //FIXME find the one we actually used
2914                             ff_mpeg4_set_direct_mv(s, mx, my);
2915                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2916                             mx= s->mv[1][0][0];
2917                             my= s->mv[1][0][1];
2918                         }else{
2919                             mx= s->mv[0][0][0];
2920                             my= s->mv[0][0][1];
2921                         }
2922
2923                         s->mv_dir= best_s.mv_dir;
2924                         s->mv_type = best_s.mv_type;
2925                         s->mb_intra= 0;
2926 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2927                         s->mv[0][0][1] = best_s.mv[0][0][1];
2928                         s->mv[1][0][0] = best_s.mv[1][0][0];
2929                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2930                         backup_s.dquant= 0;
2931                         s->skipdct=1;
2932                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2933                                         &dmin, &next_block, mx, my);
2934                         s->skipdct=0;
2935                     }
2936                 }
2937
2938                 s->current_picture.qscale_table[xy] = best_s.qscale;
2939
2940                 copy_context_after_encode(s, &best_s, -1);
2941
2942                 pb_bits_count= put_bits_count(&s->pb);
2943                 flush_put_bits(&s->pb);
2944                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2945                 s->pb= backup_s.pb;
2946
2947                 if(s->data_partitioning){
2948                     pb2_bits_count= put_bits_count(&s->pb2);
2949                     flush_put_bits(&s->pb2);
2950                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2951                     s->pb2= backup_s.pb2;
2952
2953                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2954                     flush_put_bits(&s->tex_pb);
2955                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2956                     s->tex_pb= backup_s.tex_pb;
2957                 }
2958                 s->last_bits= put_bits_count(&s->pb);
2959
2960                 if (CONFIG_H263_ENCODER &&
2961                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2962                     ff_h263_update_motion_val(s);
2963
2964                 if(next_block==0){ //FIXME 16 vs linesize16
2965                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2966                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2967                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2968                 }
2969
2970                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2971                     ff_mpv_decode_mb(s, s->block);
2972             } else {
2973                 int motion_x = 0, motion_y = 0;
2974                 s->mv_type=MV_TYPE_16X16;
2975                 // only one MB-Type possible
2976
2977                 switch(mb_type){
2978                 case CANDIDATE_MB_TYPE_INTRA:
2979                     s->mv_dir = 0;
2980                     s->mb_intra= 1;
2981                     motion_x= s->mv[0][0][0] = 0;
2982                     motion_y= s->mv[0][0][1] = 0;
2983                     break;
2984                 case CANDIDATE_MB_TYPE_INTER:
2985                     s->mv_dir = MV_DIR_FORWARD;
2986                     s->mb_intra= 0;
2987                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2988                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2989                     break;
2990                 case CANDIDATE_MB_TYPE_INTER_I:
2991                     s->mv_dir = MV_DIR_FORWARD;
2992                     s->mv_type = MV_TYPE_FIELD;
2993                     s->mb_intra= 0;
2994                     for(i=0; i<2; i++){
2995                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2996                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2997                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2998                     }
2999                     break;
3000                 case CANDIDATE_MB_TYPE_INTER4V:
3001                     s->mv_dir = MV_DIR_FORWARD;
3002                     s->mv_type = MV_TYPE_8X8;
3003                     s->mb_intra= 0;
3004                     for(i=0; i<4; i++){
3005                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3006                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3007                     }
3008                     break;
3009                 case CANDIDATE_MB_TYPE_DIRECT:
3010                     if (CONFIG_MPEG4_ENCODER) {
3011                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3012                         s->mb_intra= 0;
3013                         motion_x=s->b_direct_mv_table[xy][0];
3014                         motion_y=s->b_direct_mv_table[xy][1];
3015                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3016                     }
3017                     break;
3018                 case CANDIDATE_MB_TYPE_DIRECT0:
3019                     if (CONFIG_MPEG4_ENCODER) {
3020                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3021                         s->mb_intra= 0;
3022                         ff_mpeg4_set_direct_mv(s, 0, 0);
3023                     }
3024                     break;
3025                 case CANDIDATE_MB_TYPE_BIDIR:
3026                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3027                     s->mb_intra= 0;
3028                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3029                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3030                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3031                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3032                     break;
3033                 case CANDIDATE_MB_TYPE_BACKWARD:
3034                     s->mv_dir = MV_DIR_BACKWARD;
3035                     s->mb_intra= 0;
3036                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3037                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3038                     break;
3039                 case CANDIDATE_MB_TYPE_FORWARD:
3040                     s->mv_dir = MV_DIR_FORWARD;
3041                     s->mb_intra= 0;
3042                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3043                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3044                     break;
3045                 case CANDIDATE_MB_TYPE_FORWARD_I:
3046                     s->mv_dir = MV_DIR_FORWARD;
3047                     s->mv_type = MV_TYPE_FIELD;
3048                     s->mb_intra= 0;
3049                     for(i=0; i<2; i++){
3050                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3051                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3052                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3053                     }
3054                     break;
3055                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3056                     s->mv_dir = MV_DIR_BACKWARD;
3057                     s->mv_type = MV_TYPE_FIELD;
3058                     s->mb_intra= 0;
3059                     for(i=0; i<2; i++){
3060                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3061                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3062                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3063                     }
3064                     break;
3065                 case CANDIDATE_MB_TYPE_BIDIR_I:
3066                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3067                     s->mv_type = MV_TYPE_FIELD;
3068                     s->mb_intra= 0;
3069                     for(dir=0; dir<2; dir++){
3070                         for(i=0; i<2; i++){
3071                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3072                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3073                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3074                         }
3075                     }
3076                     break;
3077                 default:
3078                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3079                 }
3080
3081                 encode_mb(s, motion_x, motion_y);
3082
3083                 // RAL: Update last macroblock type
3084                 s->last_mv_dir = s->mv_dir;
3085
3086                 if (CONFIG_H263_ENCODER &&
3087                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3088                     ff_h263_update_motion_val(s);
3089
3090                 ff_mpv_decode_mb(s, s->block);
3091             }
3092
3093             /* clean the MV table in IPS frames for direct mode in B frames */
3094             if(s->mb_intra /* && I,P,S_TYPE */){
3095                 s->p_mv_table[xy][0]=0;
3096                 s->p_mv_table[xy][1]=0;
3097             }
3098
3099             if(s->flags&CODEC_FLAG_PSNR){
3100                 int w= 16;
3101                 int h= 16;
3102
3103                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3104                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3105
3106                 s->current_picture.f->error[0] += sse(
3107                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3108                     s->dest[0], w, h, s->linesize);
3109                 s->current_picture.f->error[1] += sse(
3110                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3111                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3112                 s->current_picture.f->error[2] += sse(
3113                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3114                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3115             }
3116             if(s->loop_filter){
3117                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3118                     ff_h263_loop_filter(s);
3119             }
3120             av_dlog(s->avctx, "MB %d %d bits\n",
3121                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3122         }
3123     }
3124
3125     //not beautiful here but we must write it before flushing so it has to be here
3126     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3127         ff_msmpeg4_encode_ext_header(s);
3128
3129     write_slice_end(s);
3130
3131     /* Send the last GOB if RTP */
3132     if (s->avctx->rtp_callback) {
3133         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3134         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3135         /* Call the RTP callback to send the last GOB */
3136         emms_c();
3137         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3138     }
3139
3140     return 0;
3141 }
3142
3143 #define MERGE(field) dst->field += src->field; src->field=0
3144 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3145     MERGE(me.scene_change_score);
3146     MERGE(me.mc_mb_var_sum_temp);
3147     MERGE(me.mb_var_sum_temp);
3148 }
3149
3150 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3151     int i;
3152
3153     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3154     MERGE(dct_count[1]);
3155     MERGE(mv_bits);
3156     MERGE(i_tex_bits);
3157     MERGE(p_tex_bits);
3158     MERGE(i_count);
3159     MERGE(f_count);
3160     MERGE(b_count);
3161     MERGE(skip_count);
3162     MERGE(misc_bits);
3163     MERGE(er.error_count);
3164     MERGE(padding_bug_score);
3165     MERGE(current_picture.f->error[0]);
3166     MERGE(current_picture.f->error[1]);
3167     MERGE(current_picture.f->error[2]);
3168
3169     if(dst->avctx->noise_reduction){
3170         for(i=0; i<64; i++){
3171             MERGE(dct_error_sum[0][i]);
3172             MERGE(dct_error_sum[1][i]);
3173         }
3174     }
3175
3176     assert(put_bits_count(&src->pb) % 8 ==0);
3177     assert(put_bits_count(&dst->pb) % 8 ==0);
3178     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3179     flush_put_bits(&dst->pb);
3180 }
3181
3182 static int estimate_qp(MpegEncContext *s, int dry_run){
3183     if (s->next_lambda){
3184         s->current_picture_ptr->f->quality =
3185         s->current_picture.f->quality = s->next_lambda;
3186         if(!dry_run) s->next_lambda= 0;
3187     } else if (!s->fixed_qscale) {
3188         s->current_picture_ptr->f->quality =
3189         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3190         if (s->current_picture.f->quality < 0)
3191             return -1;
3192     }
3193
3194     if(s->adaptive_quant){
3195         switch(s->codec_id){
3196         case AV_CODEC_ID_MPEG4:
3197             if (CONFIG_MPEG4_ENCODER)
3198                 ff_clean_mpeg4_qscales(s);
3199             break;
3200         case AV_CODEC_ID_H263:
3201         case AV_CODEC_ID_H263P:
3202         case AV_CODEC_ID_FLV1:
3203             if (CONFIG_H263_ENCODER)
3204                 ff_clean_h263_qscales(s);
3205             break;
3206         default:
3207             ff_init_qscale_tab(s);
3208         }
3209
3210         s->lambda= s->lambda_table[0];
3211         //FIXME broken
3212     }else
3213         s->lambda = s->current_picture.f->quality;
3214     update_qscale(s);
3215     return 0;
3216 }
3217
3218 /* must be called before writing the header */
3219 static void set_frame_distances(MpegEncContext * s){
3220     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3221     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3222
3223     if(s->pict_type==AV_PICTURE_TYPE_B){
3224         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3225         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3226     }else{
3227         s->pp_time= s->time - s->last_non_b_time;
3228         s->last_non_b_time= s->time;
3229         assert(s->picture_number==0 || s->pp_time > 0);
3230     }
3231 }
3232
3233 static int encode_picture(MpegEncContext *s, int picture_number)
3234 {
3235     int i, ret;
3236     int bits;
3237     int context_count = s->slice_context_count;
3238
3239     s->picture_number = picture_number;
3240
3241     /* Reset the average MB variance */
3242     s->me.mb_var_sum_temp    =
3243     s->me.mc_mb_var_sum_temp = 0;
3244
3245     /* we need to initialize some time vars before we can encode b-frames */
3246     // RAL: Condition added for MPEG1VIDEO
3247     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3248         set_frame_distances(s);
3249     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3250         ff_set_mpeg4_time(s);
3251
3252     s->me.scene_change_score=0;
3253
3254 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3255
3256     if(s->pict_type==AV_PICTURE_TYPE_I){
3257         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3258         else                        s->no_rounding=0;
3259     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3260         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3261             s->no_rounding ^= 1;
3262     }
3263
3264     if(s->flags & CODEC_FLAG_PASS2){
3265         if (estimate_qp(s,1) < 0)
3266             return -1;
3267         ff_get_2pass_fcode(s);
3268     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3269         if(s->pict_type==AV_PICTURE_TYPE_B)
3270             s->lambda= s->last_lambda_for[s->pict_type];
3271         else
3272             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3273         update_qscale(s);
3274     }
3275
3276     s->mb_intra=0; //for the rate distortion & bit compare functions
3277     for(i=1; i<context_count; i++){
3278         ret = ff_update_duplicate_context(s->thread_context[i], s);
3279         if (ret < 0)
3280             return ret;
3281     }
3282
3283     if(ff_init_me(s)<0)
3284         return -1;
3285
3286     /* Estimate motion for every MB */
3287     if(s->pict_type != AV_PICTURE_TYPE_I){
3288         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3289         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3290         if (s->pict_type != AV_PICTURE_TYPE_B) {
3291             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3292                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3293             }
3294         }
3295
3296         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3297     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3298         /* I-Frame */
3299         for(i=0; i<s->mb_stride*s->mb_height; i++)
3300             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3301
3302         if(!s->fixed_qscale){
3303             /* finding spatial complexity for I-frame rate control */
3304             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3305         }
3306     }
3307     for(i=1; i<context_count; i++){
3308         merge_context_after_me(s, s->thread_context[i]);
3309     }
3310     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3311     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3312     emms_c();
3313
3314     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3315         s->pict_type= AV_PICTURE_TYPE_I;
3316         for(i=0; i<s->mb_stride*s->mb_height; i++)
3317             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3318         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3319                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3320     }
3321
3322     if(!s->umvplus){
3323         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3324             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3325
3326             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3327                 int a,b;
3328                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3329                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3330                 s->f_code= FFMAX3(s->f_code, a, b);
3331             }
3332
3333             ff_fix_long_p_mvs(s);
3334             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3335             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3336                 int j;
3337                 for(i=0; i<2; i++){
3338                     for(j=0; j<2; j++)
3339                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3340                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3341                 }
3342             }
3343         }
3344
3345         if(s->pict_type==AV_PICTURE_TYPE_B){
3346             int a, b;
3347
3348             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3349             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3350             s->f_code = FFMAX(a, b);
3351
3352             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3353             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3354             s->b_code = FFMAX(a, b);
3355
3356             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3357             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3358             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3359             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3360             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3361                 int dir, j;
3362                 for(dir=0; dir<2; dir++){
3363                     for(i=0; i<2; i++){
3364                         for(j=0; j<2; j++){
3365                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3366                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3367                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3368                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3369                         }
3370                     }
3371                 }
3372             }
3373         }
3374     }
3375
3376     if (estimate_qp(s, 0) < 0)
3377         return -1;
3378
3379     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3380         s->qscale= 3; //reduce clipping problems
3381
3382     if (s->out_format == FMT_MJPEG) {
3383         /* for mjpeg, we do include qscale in the matrix */
3384         for(i=1;i<64;i++){
3385             int j = s->idsp.idct_permutation[i];
3386
3387             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3388         }
3389         s->y_dc_scale_table=
3390         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3391         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3392         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3393                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3394         s->qscale= 8;
3395     }
3396
3397     //FIXME var duplication
3398     s->current_picture_ptr->f->key_frame =
3399     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3400     s->current_picture_ptr->f->pict_type =
3401     s->current_picture.f->pict_type = s->pict_type;
3402
3403     if (s->current_picture.f->key_frame)
3404         s->picture_in_gop_number=0;
3405
3406     s->last_bits= put_bits_count(&s->pb);
3407     switch(s->out_format) {
3408     case FMT_MJPEG:
3409         if (CONFIG_MJPEG_ENCODER)
3410             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3411                                            s->intra_matrix);
3412         break;
3413     case FMT_H261:
3414         if (CONFIG_H261_ENCODER)
3415             ff_h261_encode_picture_header(s, picture_number);
3416         break;
3417     case FMT_H263:
3418         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3419             ff_wmv2_encode_picture_header(s, picture_number);
3420         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3421             ff_msmpeg4_encode_picture_header(s, picture_number);
3422         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3423             ff_mpeg4_encode_picture_header(s, picture_number);
3424         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3425             ff_rv10_encode_picture_header(s, picture_number);
3426         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3427             ff_rv20_encode_picture_header(s, picture_number);
3428         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3429             ff_flv_encode_picture_header(s, picture_number);
3430         else if (CONFIG_H263_ENCODER)
3431             ff_h263_encode_picture_header(s, picture_number);
3432         break;
3433     case FMT_MPEG1:
3434         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3435             ff_mpeg1_encode_picture_header(s, picture_number);
3436         break;
3437     default:
3438         assert(0);
3439     }
3440     bits= put_bits_count(&s->pb);
3441     s->header_bits= bits - s->last_bits;
3442
3443     for(i=1; i<context_count; i++){
3444         update_duplicate_context_after_me(s->thread_context[i], s);
3445     }
3446     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3447     for(i=1; i<context_count; i++){
3448         merge_context_after_encode(s, s->thread_context[i]);
3449     }
3450     emms_c();
3451     return 0;
3452 }
3453
3454 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3455     const int intra= s->mb_intra;
3456     int i;
3457
3458     s->dct_count[intra]++;
3459
3460     for(i=0; i<64; i++){
3461         int level= block[i];
3462
3463         if(level){
3464             if(level>0){
3465                 s->dct_error_sum[intra][i] += level;
3466                 level -= s->dct_offset[intra][i];
3467                 if(level<0) level=0;
3468             }else{
3469                 s->dct_error_sum[intra][i] -= level;
3470                 level += s->dct_offset[intra][i];
3471                 if(level>0) level=0;
3472             }
3473             block[i]= level;
3474         }
3475     }
3476 }
3477
3478 static int dct_quantize_trellis_c(MpegEncContext *s,
3479                                   int16_t *block, int n,
3480                                   int qscale, int *overflow){
3481     const int *qmat;
3482     const uint8_t *scantable= s->intra_scantable.scantable;
3483     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3484     int max=0;
3485     unsigned int threshold1, threshold2;
3486     int bias=0;
3487     int run_tab[65];
3488     int level_tab[65];
3489     int score_tab[65];
3490     int survivor[65];
3491     int survivor_count;
3492     int last_run=0;
3493     int last_level=0;
3494     int last_score= 0;
3495     int last_i;
3496     int coeff[2][64];
3497     int coeff_count[64];
3498     int qmul, qadd, start_i, last_non_zero, i, dc;
3499     const int esc_length= s->ac_esc_length;
3500     uint8_t * length;
3501     uint8_t * last_length;
3502     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3503
3504     s->fdsp.fdct(block);
3505
3506     if(s->dct_error_sum)
3507         s->denoise_dct(s, block);
3508     qmul= qscale*16;
3509     qadd= ((qscale-1)|1)*8;
3510
3511     if (s->mb_intra) {
3512         int q;
3513         if (!s->h263_aic) {
3514             if (n < 4)
3515                 q = s->y_dc_scale;
3516             else
3517                 q = s->c_dc_scale;
3518             q = q << 3;
3519         } else{
3520             /* For AIC we skip quant/dequant of INTRADC */
3521             q = 1 << 3;
3522             qadd=0;
3523         }
3524
3525         /* note: block[0] is assumed to be positive */
3526         block[0] = (block[0] + (q >> 1)) / q;
3527         start_i = 1;
3528         last_non_zero = 0;
3529         qmat = s->q_intra_matrix[qscale];
3530         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3531             bias= 1<<(QMAT_SHIFT-1);
3532         length     = s->intra_ac_vlc_length;
3533         last_length= s->intra_ac_vlc_last_length;
3534     } else {
3535         start_i = 0;
3536         last_non_zero = -1;
3537         qmat = s->q_inter_matrix[qscale];
3538         length     = s->inter_ac_vlc_length;
3539         last_length= s->inter_ac_vlc_last_length;
3540     }
3541     last_i= start_i;
3542
3543     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3544     threshold2= (threshold1<<1);
3545
3546     for(i=63; i>=start_i; i--) {
3547         const int j = scantable[i];
3548         int level = block[j] * qmat[j];
3549
3550         if(((unsigned)(level+threshold1))>threshold2){
3551             last_non_zero = i;
3552             break;
3553         }
3554     }
3555
3556     for(i=start_i; i<=last_non_zero; i++) {
3557         const int j = scantable[i];
3558         int level = block[j] * qmat[j];
3559
3560 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3561 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3562         if(((unsigned)(level+threshold1))>threshold2){
3563             if(level>0){
3564                 level= (bias + level)>>QMAT_SHIFT;
3565                 coeff[0][i]= level;
3566                 coeff[1][i]= level-1;
3567 //                coeff[2][k]= level-2;
3568             }else{
3569                 level= (bias - level)>>QMAT_SHIFT;
3570                 coeff[0][i]= -level;
3571                 coeff[1][i]= -level+1;
3572 //                coeff[2][k]= -level+2;
3573             }
3574             coeff_count[i]= FFMIN(level, 2);
3575             assert(coeff_count[i]);
3576             max |=level;
3577         }else{
3578             coeff[0][i]= (level>>31)|1;
3579             coeff_count[i]= 1;
3580         }
3581     }
3582
3583     *overflow= s->max_qcoeff < max; //overflow might have happened
3584
3585     if(last_non_zero < start_i){
3586         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3587         return last_non_zero;
3588     }
3589
3590     score_tab[start_i]= 0;
3591     survivor[0]= start_i;
3592     survivor_count= 1;
3593
3594     for(i=start_i; i<=last_non_zero; i++){
3595         int level_index, j, zero_distortion;
3596         int dct_coeff= FFABS(block[ scantable[i] ]);
3597         int best_score=256*256*256*120;
3598
3599         if (s->fdsp.fdct == ff_fdct_ifast)
3600             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3601         zero_distortion= dct_coeff*dct_coeff;
3602
3603         for(level_index=0; level_index < coeff_count[i]; level_index++){
3604             int distortion;
3605             int level= coeff[level_index][i];
3606             const int alevel= FFABS(level);
3607             int unquant_coeff;
3608
3609             assert(level);
3610
3611             if(s->out_format == FMT_H263){
3612                 unquant_coeff= alevel*qmul + qadd;
3613             }else{ //MPEG1
3614                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3615                 if(s->mb_intra){
3616                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3617                         unquant_coeff =   (unquant_coeff - 1) | 1;
3618                 }else{
3619                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3620                         unquant_coeff =   (unquant_coeff - 1) | 1;
3621                 }
3622                 unquant_coeff<<= 3;
3623             }
3624
3625             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3626             level+=64;
3627             if((level&(~127)) == 0){
3628                 for(j=survivor_count-1; j>=0; j--){
3629                     int run= i - survivor[j];
3630                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3631                     score += score_tab[i-run];
3632
3633                     if(score < best_score){
3634                         best_score= score;
3635                         run_tab[i+1]= run;
3636                         level_tab[i+1]= level-64;
3637                     }
3638                 }
3639
3640                 if(s->out_format == FMT_H263){
3641                     for(j=survivor_count-1; j>=0; j--){
3642                         int run= i - survivor[j];
3643                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3644                         score += score_tab[i-run];
3645                         if(score < last_score){
3646                             last_score= score;
3647                             last_run= run;
3648                             last_level= level-64;
3649                             last_i= i+1;
3650                         }
3651                     }
3652                 }
3653             }else{
3654                 distortion += esc_length*lambda;
3655                 for(j=survivor_count-1; j>=0; j--){
3656                     int run= i - survivor[j];
3657                     int score= distortion + score_tab[i-run];
3658
3659                     if(score < best_score){
3660                         best_score= score;
3661                         run_tab[i+1]= run;
3662                         level_tab[i+1]= level-64;
3663                     }
3664                 }
3665
3666                 if(s->out_format == FMT_H263){
3667                   for(j=survivor_count-1; j>=0; j--){
3668                         int run= i - survivor[j];
3669                         int score= distortion + score_tab[i-run];
3670                         if(score < last_score){
3671                             last_score= score;
3672                             last_run= run;
3673                             last_level= level-64;
3674                             last_i= i+1;
3675                         }
3676                     }
3677                 }
3678             }
3679         }
3680
3681         score_tab[i+1]= best_score;
3682
3683         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3684         if(last_non_zero <= 27){
3685             for(; survivor_count; survivor_count--){
3686                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3687                     break;
3688             }
3689         }else{
3690             for(; survivor_count; survivor_count--){
3691                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3692                     break;
3693             }
3694         }
3695
3696         survivor[ survivor_count++ ]= i+1;
3697     }
3698
3699     if(s->out_format != FMT_H263){
3700         last_score= 256*256*256*120;
3701         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3702             int score= score_tab[i];
3703             if(i) score += lambda*2; //FIXME exacter?
3704
3705             if(score < last_score){
3706                 last_score= score;
3707                 last_i= i;
3708                 last_level= level_tab[i];
3709                 last_run= run_tab[i];
3710             }
3711         }
3712     }
3713
3714     s->coded_score[n] = last_score;
3715
3716     dc= FFABS(block[0]);
3717     last_non_zero= last_i - 1;
3718     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3719
3720     if(last_non_zero < start_i)
3721         return last_non_zero;
3722
3723     if(last_non_zero == 0 && start_i == 0){
3724         int best_level= 0;
3725         int best_score= dc * dc;
3726
3727         for(i=0; i<coeff_count[0]; i++){
3728             int level= coeff[i][0];
3729             int alevel= FFABS(level);
3730             int unquant_coeff, score, distortion;
3731
3732             if(s->out_format == FMT_H263){
3733                     unquant_coeff= (alevel*qmul + qadd)>>3;
3734             }else{ //MPEG1
3735                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3736                     unquant_coeff =   (unquant_coeff - 1) | 1;
3737             }
3738             unquant_coeff = (unquant_coeff + 4) >> 3;
3739             unquant_coeff<<= 3 + 3;
3740
3741             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3742             level+=64;
3743             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3744             else                    score= distortion + esc_length*lambda;
3745
3746             if(score < best_score){
3747                 best_score= score;
3748                 best_level= level - 64;
3749             }
3750         }
3751         block[0]= best_level;
3752         s->coded_score[n] = best_score - dc*dc;
3753         if(best_level == 0) return -1;
3754         else                return last_non_zero;
3755     }
3756
3757     i= last_i;
3758     assert(last_level);
3759
3760     block[ perm_scantable[last_non_zero] ]= last_level;
3761     i -= last_run + 1;
3762
3763     for(; i>start_i; i -= run_tab[i] + 1){
3764         block[ perm_scantable[i-1] ]= level_tab[i];
3765     }
3766
3767     return last_non_zero;
3768 }
3769
3770 //#define REFINE_STATS 1
3771 static int16_t basis[64][64];
3772
3773 static void build_basis(uint8_t *perm){
3774     int i, j, x, y;
3775     emms_c();
3776     for(i=0; i<8; i++){
3777         for(j=0; j<8; j++){
3778             for(y=0; y<8; y++){
3779                 for(x=0; x<8; x++){
3780                     double s= 0.25*(1<<BASIS_SHIFT);
3781                     int index= 8*i + j;
3782                     int perm_index= perm[index];
3783                     if(i==0) s*= sqrt(0.5);
3784                     if(j==0) s*= sqrt(0.5);
3785                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3786                 }
3787             }
3788         }
3789     }
3790 }
3791
3792 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3793                         int16_t *block, int16_t *weight, int16_t *orig,
3794                         int n, int qscale){
3795     int16_t rem[64];
3796     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3797     const uint8_t *scantable= s->intra_scantable.scantable;
3798     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3799 //    unsigned int threshold1, threshold2;
3800 //    int bias=0;
3801     int run_tab[65];
3802     int prev_run=0;
3803     int prev_level=0;
3804     int qmul, qadd, start_i, last_non_zero, i, dc;
3805     uint8_t * length;
3806     uint8_t * last_length;
3807     int lambda;
3808     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3809 #ifdef REFINE_STATS
3810 static int count=0;
3811 static int after_last=0;
3812 static int to_zero=0;
3813 static int from_zero=0;
3814 static int raise=0;
3815 static int lower=0;
3816 static int messed_sign=0;
3817 #endif
3818
3819     if(basis[0][0] == 0)
3820         build_basis(s->idsp.idct_permutation);
3821
3822     qmul= qscale*2;
3823     qadd= (qscale-1)|1;
3824     if (s->mb_intra) {
3825         if (!s->h263_aic) {
3826             if (n < 4)
3827                 q = s->y_dc_scale;
3828             else
3829                 q = s->c_dc_scale;
3830         } else{
3831             /* For AIC we skip quant/dequant of INTRADC */
3832             q = 1;
3833             qadd=0;
3834         }
3835         q <<= RECON_SHIFT-3;
3836         /* note: block[0] is assumed to be positive */
3837         dc= block[0]*q;
3838 //        block[0] = (block[0] + (q >> 1)) / q;
3839         start_i = 1;
3840 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3841 //            bias= 1<<(QMAT_SHIFT-1);
3842         length     = s->intra_ac_vlc_length;
3843         last_length= s->intra_ac_vlc_last_length;
3844     } else {
3845         dc= 0;
3846         start_i = 0;
3847         length     = s->inter_ac_vlc_length;
3848         last_length= s->inter_ac_vlc_last_length;
3849     }
3850     last_non_zero = s->block_last_index[n];
3851
3852 #ifdef REFINE_STATS
3853 {START_TIMER
3854 #endif
3855     dc += (1<<(RECON_SHIFT-1));
3856     for(i=0; i<64; i++){
3857         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3858     }
3859 #ifdef REFINE_STATS
3860 STOP_TIMER("memset rem[]")}
3861 #endif
3862     sum=0;
3863     for(i=0; i<64; i++){
3864         int one= 36;
3865         int qns=4;
3866         int w;
3867
3868         w= FFABS(weight[i]) + qns*one;
3869         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3870
3871         weight[i] = w;
3872 //        w=weight[i] = (63*qns + (w/2)) / w;
3873
3874         assert(w>0);
3875         assert(w<(1<<6));
3876         sum += w*w;
3877     }
3878     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3879 #ifdef REFINE_STATS
3880 {START_TIMER
3881 #endif
3882     run=0;
3883     rle_index=0;
3884     for(i=start_i; i<=last_non_zero; i++){
3885         int j= perm_scantable[i];
3886         const int level= block[j];
3887         int coeff;
3888
3889         if(level){
3890             if(level<0) coeff= qmul*level - qadd;
3891             else        coeff= qmul*level + qadd;
3892             run_tab[rle_index++]=run;
3893             run=0;
3894
3895             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
3896         }else{
3897             run++;
3898         }
3899     }
3900 #ifdef REFINE_STATS
3901 if(last_non_zero>0){
3902 STOP_TIMER("init rem[]")
3903 }
3904 }
3905
3906 {START_TIMER
3907 #endif
3908     for(;;){
3909         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
3910         int best_coeff=0;
3911         int best_change=0;
3912         int run2, best_unquant_change=0, analyze_gradient;
3913 #ifdef REFINE_STATS
3914 {START_TIMER
3915 #endif
3916         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3917
3918         if(analyze_gradient){
3919 #ifdef REFINE_STATS
3920 {START_TIMER
3921 #endif
3922             for(i=0; i<64; i++){
3923                 int w= weight[i];
3924
3925                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3926             }
3927 #ifdef REFINE_STATS
3928 STOP_TIMER("rem*w*w")}
3929 {START_TIMER
3930 #endif
3931             s->fdsp.fdct(d1);
3932 #ifdef REFINE_STATS
3933 STOP_TIMER("dct")}
3934 #endif
3935         }
3936
3937         if(start_i){
3938             const int level= block[0];
3939             int change, old_coeff;
3940
3941             assert(s->mb_intra);
3942
3943             old_coeff= q*level;
3944
3945             for(change=-1; change<=1; change+=2){
3946                 int new_level= level + change;
3947                 int score, new_coeff;
3948
3949                 new_coeff= q*new_level;
3950                 if(new_coeff >= 2048 || new_coeff < 0)
3951                     continue;
3952
3953                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
3954                                                   new_coeff - old_coeff);
3955                 if(score<best_score){
3956                     best_score= score;
3957                     best_coeff= 0;
3958                     best_change= change;
3959                     best_unquant_change= new_coeff - old_coeff;
3960                 }
3961             }
3962         }
3963
3964         run=0;
3965         rle_index=0;
3966         run2= run_tab[rle_index++];
3967         prev_level=0;
3968         prev_run=0;
3969
3970         for(i=start_i; i<64; i++){
3971             int j= perm_scantable[i];
3972             const int level= block[j];
3973             int change, old_coeff;
3974
3975             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3976                 break;
3977
3978             if(level){
3979                 if(level<0) old_coeff= qmul*level - qadd;
3980                 else        old_coeff= qmul*level + qadd;
3981                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3982             }else{
3983                 old_coeff=0;
3984                 run2--;
3985                 assert(run2>=0 || i >= last_non_zero );
3986             }
3987
3988             for(change=-1; change<=1; change+=2){
3989                 int new_level= level + change;
3990                 int score, new_coeff, unquant_change;
3991
3992                 score=0;
3993                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3994                    continue;
3995
3996                 if(new_level){
3997                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3998                     else            new_coeff= qmul*new_level + qadd;
3999                     if(new_coeff >= 2048 || new_coeff <= -2048)
4000                         continue;
4001                     //FIXME check for overflow
4002
4003                     if(level){
4004                         if(level < 63 && level > -63){
4005                             if(i < last_non_zero)
4006                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4007                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4008                             else
4009                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4010                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4011                         }
4012                     }else{
4013                         assert(FFABS(new_level)==1);
4014
4015                         if(analyze_gradient){
4016                             int g= d1[ scantable[i] ];
4017                             if(g && (g^new_level) >= 0)
4018                                 continue;
4019                         }
4020
4021                         if(i < last_non_zero){
4022                             int next_i= i + run2 + 1;
4023                             int next_level= block[ perm_scantable[next_i] ] + 64;
4024
4025                             if(next_level&(~127))
4026                                 next_level= 0;
4027
4028                             if(next_i < last_non_zero)
4029                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4030                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4031                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4032                             else
4033                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4034                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4035                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4036                         }else{
4037                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4038                             if(prev_level){
4039                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4040                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4041                             }
4042                         }
4043                     }
4044                 }else{
4045                     new_coeff=0;
4046                     assert(FFABS(level)==1);
4047
4048                     if(i < last_non_zero){
4049                         int next_i= i + run2 + 1;
4050                         int next_level= block[ perm_scantable[next_i] ] + 64;
4051
4052                         if(next_level&(~127))
4053                             next_level= 0;
4054
4055                         if(next_i < last_non_zero)
4056                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4057                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4058                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4059                         else
4060                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4061                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4062                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4063                     }else{
4064                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4065                         if(prev_level){
4066                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4067                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4068                         }
4069                     }
4070                 }
4071
4072                 score *= lambda;
4073
4074                 unquant_change= new_coeff - old_coeff;
4075                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4076
4077                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4078                                                    unquant_change);
4079                 if(score<best_score){
4080                     best_score= score;
4081                     best_coeff= i;
4082                     best_change= change;
4083                     best_unquant_change= unquant_change;
4084                 }
4085             }
4086             if(level){
4087                 prev_level= level + 64;
4088                 if(prev_level&(~127))
4089                     prev_level= 0;
4090                 prev_run= run;
4091                 run=0;
4092             }else{
4093                 run++;
4094             }
4095         }
4096 #ifdef REFINE_STATS
4097 STOP_TIMER("iterative step")}
4098 #endif
4099
4100         if(best_change){
4101             int j= perm_scantable[ best_coeff ];
4102
4103             block[j] += best_change;
4104
4105             if(best_coeff > last_non_zero){
4106                 last_non_zero= best_coeff;
4107                 assert(block[j]);
4108 #ifdef REFINE_STATS
4109 after_last++;
4110 #endif
4111             }else{
4112 #ifdef REFINE_STATS
4113 if(block[j]){
4114     if(block[j] - best_change){
4115         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4116             raise++;
4117         }else{
4118             lower++;
4119         }
4120     }else{
4121         from_zero++;
4122     }
4123 }else{
4124     to_zero++;
4125 }
4126 #endif
4127                 for(; last_non_zero>=start_i; last_non_zero--){
4128                     if(block[perm_scantable[last_non_zero]])
4129                         break;
4130                 }
4131             }
4132 #ifdef REFINE_STATS
4133 count++;
4134 if(256*256*256*64 % count == 0){
4135     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4136 }
4137 #endif
4138             run=0;
4139             rle_index=0;
4140             for(i=start_i; i<=last_non_zero; i++){
4141                 int j= perm_scantable[i];
4142                 const int level= block[j];
4143
4144                  if(level){
4145                      run_tab[rle_index++]=run;
4146                      run=0;
4147                  }else{
4148                      run++;
4149                  }
4150             }
4151
4152             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4153         }else{
4154             break;
4155         }
4156     }
4157 #ifdef REFINE_STATS
4158 if(last_non_zero>0){
4159 STOP_TIMER("iterative search")
4160 }
4161 }
4162 #endif
4163
4164     return last_non_zero;
4165 }
4166
4167 int ff_dct_quantize_c(MpegEncContext *s,
4168                         int16_t *block, int n,
4169                         int qscale, int *overflow)
4170 {
4171     int i, j, level, last_non_zero, q, start_i;
4172     const int *qmat;
4173     const uint8_t *scantable= s->intra_scantable.scantable;
4174     int bias;
4175     int max=0;
4176     unsigned int threshold1, threshold2;
4177
4178     s->fdsp.fdct(block);
4179
4180     if(s->dct_error_sum)
4181         s->denoise_dct(s, block);
4182
4183     if (s->mb_intra) {
4184         if (!s->h263_aic) {
4185             if (n < 4)
4186                 q = s->y_dc_scale;
4187             else
4188                 q = s->c_dc_scale;
4189             q = q << 3;
4190         } else
4191             /* For AIC we skip quant/dequant of INTRADC */
4192             q = 1 << 3;
4193
4194         /* note: block[0] is assumed to be positive */
4195         block[0] = (block[0] + (q >> 1)) / q;
4196         start_i = 1;
4197         last_non_zero = 0;
4198         qmat = s->q_intra_matrix[qscale];
4199         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4200     } else {
4201         start_i = 0;
4202         last_non_zero = -1;
4203         qmat = s->q_inter_matrix[qscale];
4204         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4205     }
4206     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4207     threshold2= (threshold1<<1);
4208     for(i=63;i>=start_i;i--) {
4209         j = scantable[i];
4210         level = block[j] * qmat[j];
4211
4212         if(((unsigned)(level+threshold1))>threshold2){
4213             last_non_zero = i;
4214             break;
4215         }else{
4216             block[j]=0;
4217         }
4218     }
4219     for(i=start_i; i<=last_non_zero; i++) {
4220         j = scantable[i];
4221         level = block[j] * qmat[j];
4222
4223 //        if(   bias+level >= (1<<QMAT_SHIFT)
4224 //           || bias-level >= (1<<QMAT_SHIFT)){
4225         if(((unsigned)(level+threshold1))>threshold2){
4226             if(level>0){
4227                 level= (bias + level)>>QMAT_SHIFT;
4228                 block[j]= level;
4229             }else{
4230                 level= (bias - level)>>QMAT_SHIFT;
4231                 block[j]= -level;
4232             }
4233             max |=level;
4234         }else{
4235             block[j]=0;
4236         }
4237     }
4238     *overflow= s->max_qcoeff < max; //overflow might have happened
4239
4240     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4241     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4242         ff_block_permute(block, s->idsp.idct_permutation,
4243                          scantable, last_non_zero);
4244
4245     return last_non_zero;
4246 }
4247
4248 #define OFFSET(x) offsetof(MpegEncContext, x)
4249 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4250 static const AVOption h263_options[] = {
4251     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4252     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4253     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4254     FF_MPV_COMMON_OPTS
4255     { NULL },
4256 };
4257
4258 static const AVClass h263_class = {
4259     .class_name = "H.263 encoder",
4260     .item_name  = av_default_item_name,
4261     .option     = h263_options,
4262     .version    = LIBAVUTIL_VERSION_INT,
4263 };
4264
4265 AVCodec ff_h263_encoder = {
4266     .name           = "h263",
4267     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4268     .type           = AVMEDIA_TYPE_VIDEO,
4269     .id             = AV_CODEC_ID_H263,
4270     .priv_data_size = sizeof(MpegEncContext),
4271     .init           = ff_mpv_encode_init,
4272     .encode2        = ff_mpv_encode_picture,
4273     .close          = ff_mpv_encode_end,
4274     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4275     .priv_class     = &h263_class,
4276 };
4277
4278 static const AVOption h263p_options[] = {
4279     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4280     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4281     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4282     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4283     FF_MPV_COMMON_OPTS
4284     { NULL },
4285 };
4286 static const AVClass h263p_class = {
4287     .class_name = "H.263p encoder",
4288     .item_name  = av_default_item_name,
4289     .option     = h263p_options,
4290     .version    = LIBAVUTIL_VERSION_INT,
4291 };
4292
4293 AVCodec ff_h263p_encoder = {
4294     .name           = "h263p",
4295     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4296     .type           = AVMEDIA_TYPE_VIDEO,
4297     .id             = AV_CODEC_ID_H263P,
4298     .priv_data_size = sizeof(MpegEncContext),
4299     .init           = ff_mpv_encode_init,
4300     .encode2        = ff_mpv_encode_picture,
4301     .close          = ff_mpv_encode_end,
4302     .capabilities   = CODEC_CAP_SLICE_THREADS,
4303     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4304     .priv_class     = &h263p_class,
4305 };
4306
4307 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4308
4309 AVCodec ff_msmpeg4v2_encoder = {
4310     .name           = "msmpeg4v2",
4311     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4312     .type           = AVMEDIA_TYPE_VIDEO,
4313     .id             = AV_CODEC_ID_MSMPEG4V2,
4314     .priv_data_size = sizeof(MpegEncContext),
4315     .init           = ff_mpv_encode_init,
4316     .encode2        = ff_mpv_encode_picture,
4317     .close          = ff_mpv_encode_end,
4318     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4319     .priv_class     = &msmpeg4v2_class,
4320 };
4321
4322 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4323
4324 AVCodec ff_msmpeg4v3_encoder = {
4325     .name           = "msmpeg4",
4326     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4327     .type           = AVMEDIA_TYPE_VIDEO,
4328     .id             = AV_CODEC_ID_MSMPEG4V3,
4329     .priv_data_size = sizeof(MpegEncContext),
4330     .init           = ff_mpv_encode_init,
4331     .encode2        = ff_mpv_encode_picture,
4332     .close          = ff_mpv_encode_end,
4333     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4334     .priv_class     = &msmpeg4v3_class,
4335 };
4336
4337 FF_MPV_GENERIC_CLASS(wmv1)
4338
4339 AVCodec ff_wmv1_encoder = {
4340     .name           = "wmv1",
4341     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4342     .type           = AVMEDIA_TYPE_VIDEO,
4343     .id             = AV_CODEC_ID_WMV1,
4344     .priv_data_size = sizeof(MpegEncContext),
4345     .init           = ff_mpv_encode_init,
4346     .encode2        = ff_mpv_encode_picture,
4347     .close          = ff_mpv_encode_end,
4348     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4349     .priv_class     = &wmv1_class,
4350 };