]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
mpegvideo: Use the current_picture pts
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "dsputil.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mathops.h"
46 #include "mpegutils.h"
47 #include "mjpegenc.h"
48 #include "msmpeg4.h"
49 #include "faandct.h"
50 #include "thread.h"
51 #include "aandcttab.h"
52 #include "flv.h"
53 #include "mpeg4video.h"
54 #include "internal.h"
55 #include "bytestream.h"
56 #include <limits.h>
57
58 static int encode_picture(MpegEncContext *s, int picture_number);
59 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
60 static int sse_mb(MpegEncContext *s);
61 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
62 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
63
64 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
65 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
66
67 const AVOption ff_mpv_generic_options[] = {
68     FF_MPV_COMMON_OPTS
69     { NULL },
70 };
71
72 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
73                        uint16_t (*qmat16)[2][64],
74                        const uint16_t *quant_matrix,
75                        int bias, int qmin, int qmax, int intra)
76 {
77     int qscale;
78     int shift = 0;
79
80     for (qscale = qmin; qscale <= qmax; qscale++) {
81         int i;
82         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
83             dsp->fdct == ff_jpeg_fdct_islow_10 ||
84             dsp->fdct == ff_faandct) {
85             for (i = 0; i < 64; i++) {
86                 const int j = dsp->idct_permutation[i];
87                 /* 16 <= qscale * quant_matrix[i] <= 7905
88                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
89                  *             19952 <=              x  <= 249205026
90                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
91                  *           3444240 >= (1 << 36) / (x) >= 275 */
92
93                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
94                                         (qscale * quant_matrix[j]));
95             }
96         } else if (dsp->fdct == ff_fdct_ifast) {
97             for (i = 0; i < 64; i++) {
98                 const int j = dsp->idct_permutation[i];
99                 /* 16 <= qscale * quant_matrix[i] <= 7905
100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
101                  *             19952 <=              x  <= 249205026
102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
103                  *           3444240 >= (1 << 36) / (x) >= 275 */
104
105                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
106                                         (ff_aanscales[i] * qscale *
107                                          quant_matrix[j]));
108             }
109         } else {
110             for (i = 0; i < 64; i++) {
111                 const int j = dsp->idct_permutation[i];
112                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
113                  * Assume x = qscale * quant_matrix[i]
114                  * So             16 <=              x  <= 7905
115                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
116                  * so          32768 >= (1 << 19) / (x) >= 67 */
117                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
118                                         (qscale * quant_matrix[j]));
119                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
120                 //                    (qscale * quant_matrix[i]);
121                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
122                                        (qscale * quant_matrix[j]);
123
124                 if (qmat16[qscale][0][i] == 0 ||
125                     qmat16[qscale][0][i] == 128 * 256)
126                     qmat16[qscale][0][i] = 128 * 256 - 1;
127                 qmat16[qscale][1][i] =
128                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
129                                 qmat16[qscale][0][i]);
130             }
131         }
132
133         for (i = intra; i < 64; i++) {
134             int64_t max = 8191;
135             if (dsp->fdct == ff_fdct_ifast) {
136                 max = (8191LL * ff_aanscales[i]) >> 14;
137             }
138             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
139                 shift++;
140             }
141         }
142     }
143     if (shift) {
144         av_log(NULL, AV_LOG_INFO,
145                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
146                QMAT_SHIFT - shift);
147     }
148 }
149
150 static inline void update_qscale(MpegEncContext *s)
151 {
152     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
153                 (FF_LAMBDA_SHIFT + 7);
154     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
155
156     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
157                  FF_LAMBDA_SHIFT;
158 }
159
160 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
161 {
162     int i;
163
164     if (matrix) {
165         put_bits(pb, 1, 1);
166         for (i = 0; i < 64; i++) {
167             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
168         }
169     } else
170         put_bits(pb, 1, 0);
171 }
172
173 /**
174  * init s->current_picture.qscale_table from s->lambda_table
175  */
176 void ff_init_qscale_tab(MpegEncContext *s)
177 {
178     int8_t * const qscale_table = s->current_picture.qscale_table;
179     int i;
180
181     for (i = 0; i < s->mb_num; i++) {
182         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
183         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
184         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
185                                                   s->avctx->qmax);
186     }
187 }
188
189 static void update_duplicate_context_after_me(MpegEncContext *dst,
190                                               MpegEncContext *src)
191 {
192 #define COPY(a) dst->a= src->a
193     COPY(pict_type);
194     COPY(current_picture);
195     COPY(f_code);
196     COPY(b_code);
197     COPY(qscale);
198     COPY(lambda);
199     COPY(lambda2);
200     COPY(picture_in_gop_number);
201     COPY(gop_picture_number);
202     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
203     COPY(progressive_frame);    // FIXME don't set in encode_header
204     COPY(partitioned_frame);    // FIXME don't set in encode_header
205 #undef COPY
206 }
207
208 /**
209  * Set the given MpegEncContext to defaults for encoding.
210  * the changed fields will not depend upon the prior state of the MpegEncContext.
211  */
212 static void MPV_encode_defaults(MpegEncContext *s)
213 {
214     int i;
215     ff_MPV_common_defaults(s);
216
217     for (i = -16; i < 16; i++) {
218         default_fcode_tab[i + MAX_MV] = 1;
219     }
220     s->me.mv_penalty = default_mv_penalty;
221     s->fcode_tab     = default_fcode_tab;
222
223     s->input_picture_number  = 0;
224     s->picture_in_gop_number = 0;
225 }
226
227 /* init video encoder */
228 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
229 {
230     MpegEncContext *s = avctx->priv_data;
231     int i, ret;
232
233     MPV_encode_defaults(s);
234
235     switch (avctx->codec_id) {
236     case AV_CODEC_ID_MPEG2VIDEO:
237         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
238             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
239             av_log(avctx, AV_LOG_ERROR,
240                    "only YUV420 and YUV422 are supported\n");
241             return -1;
242         }
243         break;
244     case AV_CODEC_ID_MJPEG:
245         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
246             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
247             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
248               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
249              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
250             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
251             return -1;
252         }
253         break;
254     default:
255         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
256             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
257             return -1;
258         }
259     }
260
261     switch (avctx->pix_fmt) {
262     case AV_PIX_FMT_YUVJ422P:
263     case AV_PIX_FMT_YUV422P:
264         s->chroma_format = CHROMA_422;
265         break;
266     case AV_PIX_FMT_YUVJ420P:
267     case AV_PIX_FMT_YUV420P:
268     default:
269         s->chroma_format = CHROMA_420;
270         break;
271     }
272
273     s->bit_rate = avctx->bit_rate;
274     s->width    = avctx->width;
275     s->height   = avctx->height;
276     if (avctx->gop_size > 600 &&
277         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
278         av_log(avctx, AV_LOG_ERROR,
279                "Warning keyframe interval too large! reducing it ...\n");
280         avctx->gop_size = 600;
281     }
282     s->gop_size     = avctx->gop_size;
283     s->avctx        = avctx;
284     s->flags        = avctx->flags;
285     s->flags2       = avctx->flags2;
286     if (avctx->max_b_frames > MAX_B_FRAMES) {
287         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
288                "is %d.\n", MAX_B_FRAMES);
289     }
290     s->max_b_frames = avctx->max_b_frames;
291     s->codec_id     = avctx->codec->id;
292     s->strict_std_compliance = avctx->strict_std_compliance;
293     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
294     s->mpeg_quant         = avctx->mpeg_quant;
295     s->rtp_mode           = !!avctx->rtp_payload_size;
296     s->intra_dc_precision = avctx->intra_dc_precision;
297     s->user_specified_pts = AV_NOPTS_VALUE;
298
299     if (s->gop_size <= 1) {
300         s->intra_only = 1;
301         s->gop_size   = 12;
302     } else {
303         s->intra_only = 0;
304     }
305
306     s->me_method = avctx->me_method;
307
308     /* Fixed QSCALE */
309     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
310
311     s->adaptive_quant = (s->avctx->lumi_masking ||
312                          s->avctx->dark_masking ||
313                          s->avctx->temporal_cplx_masking ||
314                          s->avctx->spatial_cplx_masking  ||
315                          s->avctx->p_masking      ||
316                          s->avctx->border_masking ||
317                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
318                         !s->fixed_qscale;
319
320     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
321
322     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
323         av_log(avctx, AV_LOG_ERROR,
324                "a vbv buffer size is needed, "
325                "for encoding with a maximum bitrate\n");
326         return -1;
327     }
328
329     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
330         av_log(avctx, AV_LOG_INFO,
331                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
332     }
333
334     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
335         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
336         return -1;
337     }
338
339     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
340         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
341         return -1;
342     }
343
344     if (avctx->rc_max_rate &&
345         avctx->rc_max_rate == avctx->bit_rate &&
346         avctx->rc_max_rate != avctx->rc_min_rate) {
347         av_log(avctx, AV_LOG_INFO,
348                "impossible bitrate constraints, this will fail\n");
349     }
350
351     if (avctx->rc_buffer_size &&
352         avctx->bit_rate * (int64_t)avctx->time_base.num >
353             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
354         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
355         return -1;
356     }
357
358     if (!s->fixed_qscale &&
359         avctx->bit_rate * av_q2d(avctx->time_base) >
360             avctx->bit_rate_tolerance) {
361         av_log(avctx, AV_LOG_ERROR,
362                "bitrate tolerance too small for bitrate\n");
363         return -1;
364     }
365
366     if (s->avctx->rc_max_rate &&
367         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
368         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
369          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
370         90000LL * (avctx->rc_buffer_size - 1) >
371             s->avctx->rc_max_rate * 0xFFFFLL) {
372         av_log(avctx, AV_LOG_INFO,
373                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
374                "specified vbv buffer is too large for the given bitrate!\n");
375     }
376
377     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
378         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
379         s->codec_id != AV_CODEC_ID_FLV1) {
380         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
381         return -1;
382     }
383
384     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
385         av_log(avctx, AV_LOG_ERROR,
386                "OBMC is only supported with simple mb decision\n");
387         return -1;
388     }
389
390     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
391         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
392         return -1;
393     }
394
395     if (s->max_b_frames                    &&
396         s->codec_id != AV_CODEC_ID_MPEG4      &&
397         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
398         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
399         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
400         return -1;
401     }
402
403     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
404          s->codec_id == AV_CODEC_ID_H263  ||
405          s->codec_id == AV_CODEC_ID_H263P) &&
406         (avctx->sample_aspect_ratio.num > 255 ||
407          avctx->sample_aspect_ratio.den > 255)) {
408         av_log(avctx, AV_LOG_ERROR,
409                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
410                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
411         return -1;
412     }
413
414     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
415         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
416         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
417         return -1;
418     }
419
420     // FIXME mpeg2 uses that too
421     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
422         av_log(avctx, AV_LOG_ERROR,
423                "mpeg2 style quantization not supported by codec\n");
424         return -1;
425     }
426
427     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
428         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
429         return -1;
430     }
431
432     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
433         s->avctx->mb_decision != FF_MB_DECISION_RD) {
434         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
435         return -1;
436     }
437
438     if (s->avctx->scenechange_threshold < 1000000000 &&
439         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
440         av_log(avctx, AV_LOG_ERROR,
441                "closed gop with scene change detection are not supported yet, "
442                "set threshold to 1000000000\n");
443         return -1;
444     }
445
446     if (s->flags & CODEC_FLAG_LOW_DELAY) {
447         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
448             av_log(avctx, AV_LOG_ERROR,
449                   "low delay forcing is only available for mpeg2\n");
450             return -1;
451         }
452         if (s->max_b_frames != 0) {
453             av_log(avctx, AV_LOG_ERROR,
454                    "b frames cannot be used with low delay\n");
455             return -1;
456         }
457     }
458
459     if (s->q_scale_type == 1) {
460         if (avctx->qmax > 12) {
461             av_log(avctx, AV_LOG_ERROR,
462                    "non linear quant only supports qmax <= 12 currently\n");
463             return -1;
464         }
465     }
466
467     if (s->avctx->thread_count > 1         &&
468         s->codec_id != AV_CODEC_ID_MPEG4      &&
469         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
470         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
471         (s->codec_id != AV_CODEC_ID_H263P)) {
472         av_log(avctx, AV_LOG_ERROR,
473                "multi threaded encoding not supported by codec\n");
474         return -1;
475     }
476
477     if (s->avctx->thread_count < 1) {
478         av_log(avctx, AV_LOG_ERROR,
479                "automatic thread number detection not supported by codec,"
480                "patch welcome\n");
481         return -1;
482     }
483
484     if (s->avctx->thread_count > 1)
485         s->rtp_mode = 1;
486
487     if (!avctx->time_base.den || !avctx->time_base.num) {
488         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
489         return -1;
490     }
491
492     i = (INT_MAX / 2 + 128) >> 8;
493     if (avctx->mb_threshold >= i) {
494         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
495                i - 1);
496         return -1;
497     }
498
499     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
500         av_log(avctx, AV_LOG_INFO,
501                "notice: b_frame_strategy only affects the first pass\n");
502         avctx->b_frame_strategy = 0;
503     }
504
505     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
506     if (i > 1) {
507         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
508         avctx->time_base.den /= i;
509         avctx->time_base.num /= i;
510         //return -1;
511     }
512
513     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
514         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
515         // (a + x * 3 / 8) / x
516         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
517         s->inter_quant_bias = 0;
518     } else {
519         s->intra_quant_bias = 0;
520         // (a - x / 4) / x
521         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
522     }
523
524     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
525         s->intra_quant_bias = avctx->intra_quant_bias;
526     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
527         s->inter_quant_bias = avctx->inter_quant_bias;
528
529     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
530         s->avctx->time_base.den > (1 << 16) - 1) {
531         av_log(avctx, AV_LOG_ERROR,
532                "timebase %d/%d not supported by MPEG 4 standard, "
533                "the maximum admitted value for the timebase denominator "
534                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
535                (1 << 16) - 1);
536         return -1;
537     }
538     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
539
540     switch (avctx->codec->id) {
541     case AV_CODEC_ID_MPEG1VIDEO:
542         s->out_format = FMT_MPEG1;
543         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
544         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
545         break;
546     case AV_CODEC_ID_MPEG2VIDEO:
547         s->out_format = FMT_MPEG1;
548         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
549         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
550         s->rtp_mode   = 1;
551         break;
552     case AV_CODEC_ID_MJPEG:
553         s->out_format = FMT_MJPEG;
554         s->intra_only = 1; /* force intra only for jpeg */
555         if (!CONFIG_MJPEG_ENCODER ||
556             ff_mjpeg_encode_init(s) < 0)
557             return -1;
558         avctx->delay = 0;
559         s->low_delay = 1;
560         break;
561     case AV_CODEC_ID_H261:
562         if (!CONFIG_H261_ENCODER)
563             return -1;
564         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
565             av_log(avctx, AV_LOG_ERROR,
566                    "The specified picture size of %dx%d is not valid for the "
567                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
568                     s->width, s->height);
569             return -1;
570         }
571         s->out_format = FMT_H261;
572         avctx->delay  = 0;
573         s->low_delay  = 1;
574         break;
575     case AV_CODEC_ID_H263:
576         if (!CONFIG_H263_ENCODER)
577         return -1;
578         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
579                              s->width, s->height) == 8) {
580             av_log(avctx, AV_LOG_INFO,
581                    "The specified picture size of %dx%d is not valid for "
582                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
583                    "352x288, 704x576, and 1408x1152."
584                    "Try H.263+.\n", s->width, s->height);
585             return -1;
586         }
587         s->out_format = FMT_H263;
588         avctx->delay  = 0;
589         s->low_delay  = 1;
590         break;
591     case AV_CODEC_ID_H263P:
592         s->out_format = FMT_H263;
593         s->h263_plus  = 1;
594         /* Fx */
595         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
596         s->modified_quant  = s->h263_aic;
597         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
598         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
599
600         /* /Fx */
601         /* These are just to be sure */
602         avctx->delay = 0;
603         s->low_delay = 1;
604         break;
605     case AV_CODEC_ID_FLV1:
606         s->out_format      = FMT_H263;
607         s->h263_flv        = 2; /* format = 1; 11-bit codes */
608         s->unrestricted_mv = 1;
609         s->rtp_mode  = 0; /* don't allow GOB */
610         avctx->delay = 0;
611         s->low_delay = 1;
612         break;
613     case AV_CODEC_ID_RV10:
614         s->out_format = FMT_H263;
615         avctx->delay  = 0;
616         s->low_delay  = 1;
617         break;
618     case AV_CODEC_ID_RV20:
619         s->out_format      = FMT_H263;
620         avctx->delay       = 0;
621         s->low_delay       = 1;
622         s->modified_quant  = 1;
623         s->h263_aic        = 1;
624         s->h263_plus       = 1;
625         s->loop_filter     = 1;
626         s->unrestricted_mv = 0;
627         break;
628     case AV_CODEC_ID_MPEG4:
629         s->out_format      = FMT_H263;
630         s->h263_pred       = 1;
631         s->unrestricted_mv = 1;
632         s->low_delay       = s->max_b_frames ? 0 : 1;
633         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
634         break;
635     case AV_CODEC_ID_MSMPEG4V2:
636         s->out_format      = FMT_H263;
637         s->h263_pred       = 1;
638         s->unrestricted_mv = 1;
639         s->msmpeg4_version = 2;
640         avctx->delay       = 0;
641         s->low_delay       = 1;
642         break;
643     case AV_CODEC_ID_MSMPEG4V3:
644         s->out_format        = FMT_H263;
645         s->h263_pred         = 1;
646         s->unrestricted_mv   = 1;
647         s->msmpeg4_version   = 3;
648         s->flipflop_rounding = 1;
649         avctx->delay         = 0;
650         s->low_delay         = 1;
651         break;
652     case AV_CODEC_ID_WMV1:
653         s->out_format        = FMT_H263;
654         s->h263_pred         = 1;
655         s->unrestricted_mv   = 1;
656         s->msmpeg4_version   = 4;
657         s->flipflop_rounding = 1;
658         avctx->delay         = 0;
659         s->low_delay         = 1;
660         break;
661     case AV_CODEC_ID_WMV2:
662         s->out_format        = FMT_H263;
663         s->h263_pred         = 1;
664         s->unrestricted_mv   = 1;
665         s->msmpeg4_version   = 5;
666         s->flipflop_rounding = 1;
667         avctx->delay         = 0;
668         s->low_delay         = 1;
669         break;
670     default:
671         return -1;
672     }
673
674     avctx->has_b_frames = !s->low_delay;
675
676     s->encoding = 1;
677
678     s->progressive_frame    =
679     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
680                                                 CODEC_FLAG_INTERLACED_ME) ||
681                                 s->alternate_scan);
682
683     /* init */
684     if (ff_MPV_common_init(s) < 0)
685         return -1;
686
687     if (ARCH_X86)
688         ff_MPV_encode_init_x86(s);
689
690     s->avctx->coded_frame = s->current_picture.f;
691
692     if (s->msmpeg4_version) {
693         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
694                           2 * 2 * (MAX_LEVEL + 1) *
695                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
696     }
697     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
698
699     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
700     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
701     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
702     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
703     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
704                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
705     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
706                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
707
708     if (s->avctx->noise_reduction) {
709         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
710                           2 * 64 * sizeof(uint16_t), fail);
711     }
712
713     if (CONFIG_H263_ENCODER)
714         ff_h263dsp_init(&s->h263dsp);
715     if (!s->dct_quantize)
716         s->dct_quantize = ff_dct_quantize_c;
717     if (!s->denoise_dct)
718         s->denoise_dct  = denoise_dct_c;
719     s->fast_dct_quantize = s->dct_quantize;
720     if (avctx->trellis)
721         s->dct_quantize  = dct_quantize_trellis_c;
722
723     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
724         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
725
726     s->quant_precision = 5;
727
728     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
729     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
730
731     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
732         ff_h261_encode_init(s);
733     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
734         ff_h263_encode_init(s);
735     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
736         ff_msmpeg4_encode_init(s);
737     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
738         && s->out_format == FMT_MPEG1)
739         ff_mpeg1_encode_init(s);
740
741     /* init q matrix */
742     for (i = 0; i < 64; i++) {
743         int j = s->dsp.idct_permutation[i];
744         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
745             s->mpeg_quant) {
746             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
747             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
748         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
749             s->intra_matrix[j] =
750             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
751         } else {
752             /* mpeg1/2 */
753             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
754             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
755         }
756         if (s->avctx->intra_matrix)
757             s->intra_matrix[j] = s->avctx->intra_matrix[i];
758         if (s->avctx->inter_matrix)
759             s->inter_matrix[j] = s->avctx->inter_matrix[i];
760     }
761
762     /* precompute matrix */
763     /* for mjpeg, we do include qscale in the matrix */
764     if (s->out_format != FMT_MJPEG) {
765         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
766                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
767                           31, 1);
768         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
769                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
770                           31, 0);
771     }
772
773     if (ff_rate_control_init(s) < 0)
774         return -1;
775
776 #if FF_API_ERROR_RATE
777     FF_DISABLE_DEPRECATION_WARNINGS
778     if (avctx->error_rate)
779         s->error_rate = avctx->error_rate;
780     FF_ENABLE_DEPRECATION_WARNINGS;
781 #endif
782
783     if (avctx->b_frame_strategy == 2) {
784         for (i = 0; i < s->max_b_frames + 2; i++) {
785             s->tmp_frames[i] = av_frame_alloc();
786             if (!s->tmp_frames[i])
787                 return AVERROR(ENOMEM);
788
789             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
790             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
791             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
792
793             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
794             if (ret < 0)
795                 return ret;
796         }
797     }
798
799     return 0;
800 fail:
801     ff_MPV_encode_end(avctx);
802     return AVERROR_UNKNOWN;
803 }
804
805 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
806 {
807     MpegEncContext *s = avctx->priv_data;
808     int i;
809
810     ff_rate_control_uninit(s);
811
812     ff_MPV_common_end(s);
813     if (CONFIG_MJPEG_ENCODER &&
814         s->out_format == FMT_MJPEG)
815         ff_mjpeg_encode_close(s);
816
817     av_freep(&avctx->extradata);
818
819     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
820         av_frame_free(&s->tmp_frames[i]);
821
822     ff_free_picture_tables(&s->new_picture);
823     ff_mpeg_unref_picture(s, &s->new_picture);
824
825     av_freep(&s->avctx->stats_out);
826     av_freep(&s->ac_stats);
827
828     av_freep(&s->q_intra_matrix);
829     av_freep(&s->q_inter_matrix);
830     av_freep(&s->q_intra_matrix16);
831     av_freep(&s->q_inter_matrix16);
832     av_freep(&s->input_picture);
833     av_freep(&s->reordered_input_picture);
834     av_freep(&s->dct_offset);
835
836     return 0;
837 }
838
839 static int get_sae(uint8_t *src, int ref, int stride)
840 {
841     int x,y;
842     int acc = 0;
843
844     for (y = 0; y < 16; y++) {
845         for (x = 0; x < 16; x++) {
846             acc += FFABS(src[x + y * stride] - ref);
847         }
848     }
849
850     return acc;
851 }
852
853 static int get_intra_count(MpegEncContext *s, uint8_t *src,
854                            uint8_t *ref, int stride)
855 {
856     int x, y, w, h;
857     int acc = 0;
858
859     w = s->width  & ~15;
860     h = s->height & ~15;
861
862     for (y = 0; y < h; y += 16) {
863         for (x = 0; x < w; x += 16) {
864             int offset = x + y * stride;
865             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
866                                      16);
867             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
868             int sae  = get_sae(src + offset, mean, stride);
869
870             acc += sae + 500 < sad;
871         }
872     }
873     return acc;
874 }
875
876
877 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
878 {
879     Picture *pic = NULL;
880     int64_t pts;
881     int i, display_picture_number = 0, ret;
882     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
883                                                  (s->low_delay ? 0 : 1);
884     int direct = 1;
885
886     if (pic_arg) {
887         pts = pic_arg->pts;
888         display_picture_number = s->input_picture_number++;
889
890         if (pts != AV_NOPTS_VALUE) {
891             if (s->user_specified_pts != AV_NOPTS_VALUE) {
892                 int64_t time = pts;
893                 int64_t last = s->user_specified_pts;
894
895                 if (time <= last) {
896                     av_log(s->avctx, AV_LOG_ERROR,
897                            "Error, Invalid timestamp=%"PRId64", "
898                            "last=%"PRId64"\n", pts, s->user_specified_pts);
899                     return -1;
900                 }
901
902                 if (!s->low_delay && display_picture_number == 1)
903                     s->dts_delta = time - last;
904             }
905             s->user_specified_pts = pts;
906         } else {
907             if (s->user_specified_pts != AV_NOPTS_VALUE) {
908                 s->user_specified_pts =
909                 pts = s->user_specified_pts + 1;
910                 av_log(s->avctx, AV_LOG_INFO,
911                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
912                        pts);
913             } else {
914                 pts = display_picture_number;
915             }
916         }
917     }
918
919     if (pic_arg) {
920         if (!pic_arg->buf[0]);
921             direct = 0;
922         if (pic_arg->linesize[0] != s->linesize)
923             direct = 0;
924         if (pic_arg->linesize[1] != s->uvlinesize)
925             direct = 0;
926         if (pic_arg->linesize[2] != s->uvlinesize)
927             direct = 0;
928
929         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
930                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
931
932         if (direct) {
933             i = ff_find_unused_picture(s, 1);
934             if (i < 0)
935                 return i;
936
937             pic = &s->picture[i];
938             pic->reference = 3;
939
940             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
941                 return ret;
942             if (ff_alloc_picture(s, pic, 1) < 0) {
943                 return -1;
944             }
945         } else {
946             i = ff_find_unused_picture(s, 0);
947             if (i < 0)
948                 return i;
949
950             pic = &s->picture[i];
951             pic->reference = 3;
952
953             if (ff_alloc_picture(s, pic, 0) < 0) {
954                 return -1;
955             }
956
957             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
958                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
959                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
960                 // empty
961             } else {
962                 int h_chroma_shift, v_chroma_shift;
963                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
964                                                  &h_chroma_shift,
965                                                  &v_chroma_shift);
966
967                 for (i = 0; i < 3; i++) {
968                     int src_stride = pic_arg->linesize[i];
969                     int dst_stride = i ? s->uvlinesize : s->linesize;
970                     int h_shift = i ? h_chroma_shift : 0;
971                     int v_shift = i ? v_chroma_shift : 0;
972                     int w = s->width  >> h_shift;
973                     int h = s->height >> v_shift;
974                     uint8_t *src = pic_arg->data[i];
975                     uint8_t *dst = pic->f->data[i];
976
977                     if (!s->avctx->rc_buffer_size)
978                         dst += INPLACE_OFFSET;
979
980                     if (src_stride == dst_stride)
981                         memcpy(dst, src, src_stride * h);
982                     else {
983                         while (h--) {
984                             memcpy(dst, src, w);
985                             dst += dst_stride;
986                             src += src_stride;
987                         }
988                     }
989                 }
990             }
991         }
992         ret = av_frame_copy_props(pic->f, pic_arg);
993         if (ret < 0)
994             return ret;
995
996         pic->f->display_picture_number = display_picture_number;
997         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
998     }
999
1000     /* shift buffer entries */
1001     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1002         s->input_picture[i - 1] = s->input_picture[i];
1003
1004     s->input_picture[encoding_delay] = (Picture*) pic;
1005
1006     return 0;
1007 }
1008
1009 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1010 {
1011     int x, y, plane;
1012     int score = 0;
1013     int64_t score64 = 0;
1014
1015     for (plane = 0; plane < 3; plane++) {
1016         const int stride = p->f->linesize[plane];
1017         const int bw = plane ? 1 : 2;
1018         for (y = 0; y < s->mb_height * bw; y++) {
1019             for (x = 0; x < s->mb_width * bw; x++) {
1020                 int off = p->shared ? 0 : 16;
1021                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1022                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1023                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1024
1025                 switch (s->avctx->frame_skip_exp) {
1026                 case 0: score    =  FFMAX(score, v);          break;
1027                 case 1: score   += FFABS(v);                  break;
1028                 case 2: score   += v * v;                     break;
1029                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1030                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1031                 }
1032             }
1033         }
1034     }
1035
1036     if (score)
1037         score64 = score;
1038
1039     if (score64 < s->avctx->frame_skip_threshold)
1040         return 1;
1041     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1042         return 1;
1043     return 0;
1044 }
1045
1046 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1047 {
1048     AVPacket pkt = { 0 };
1049     int ret, got_output;
1050
1051     av_init_packet(&pkt);
1052     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1053     if (ret < 0)
1054         return ret;
1055
1056     ret = pkt.size;
1057     av_free_packet(&pkt);
1058     return ret;
1059 }
1060
1061 static int estimate_best_b_count(MpegEncContext *s)
1062 {
1063     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1064     AVCodecContext *c = avcodec_alloc_context3(NULL);
1065     const int scale = s->avctx->brd_scale;
1066     int i, j, out_size, p_lambda, b_lambda, lambda2;
1067     int64_t best_rd  = INT64_MAX;
1068     int best_b_count = -1;
1069
1070     assert(scale >= 0 && scale <= 3);
1071
1072     //emms_c();
1073     //s->next_picture_ptr->quality;
1074     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1075     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1076     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1077     if (!b_lambda) // FIXME we should do this somewhere else
1078         b_lambda = p_lambda;
1079     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1080                FF_LAMBDA_SHIFT;
1081
1082     c->width        = s->width  >> scale;
1083     c->height       = s->height >> scale;
1084     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1085                       CODEC_FLAG_INPUT_PRESERVED;
1086     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1087     c->mb_decision  = s->avctx->mb_decision;
1088     c->me_cmp       = s->avctx->me_cmp;
1089     c->mb_cmp       = s->avctx->mb_cmp;
1090     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1091     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1092     c->time_base    = s->avctx->time_base;
1093     c->max_b_frames = s->max_b_frames;
1094
1095     if (avcodec_open2(c, codec, NULL) < 0)
1096         return -1;
1097
1098     for (i = 0; i < s->max_b_frames + 2; i++) {
1099         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1100                                                 s->next_picture_ptr;
1101
1102         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1103             pre_input = *pre_input_ptr;
1104
1105             if (!pre_input.shared && i) {
1106                 pre_input.f->data[0] += INPLACE_OFFSET;
1107                 pre_input.f->data[1] += INPLACE_OFFSET;
1108                 pre_input.f->data[2] += INPLACE_OFFSET;
1109             }
1110
1111             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1112                                  pre_input.f->data[0], pre_input.f->linesize[0],
1113                                  c->width,      c->height);
1114             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1115                                  pre_input.f->data[1], pre_input.f->linesize[1],
1116                                  c->width >> 1, c->height >> 1);
1117             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1118                                  pre_input.f->data[2], pre_input.f->linesize[2],
1119                                  c->width >> 1, c->height >> 1);
1120         }
1121     }
1122
1123     for (j = 0; j < s->max_b_frames + 1; j++) {
1124         int64_t rd = 0;
1125
1126         if (!s->input_picture[j])
1127             break;
1128
1129         c->error[0] = c->error[1] = c->error[2] = 0;
1130
1131         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1132         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1133
1134         out_size = encode_frame(c, s->tmp_frames[0]);
1135
1136         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1137
1138         for (i = 0; i < s->max_b_frames + 1; i++) {
1139             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1140
1141             s->tmp_frames[i + 1]->pict_type = is_p ?
1142                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1143             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1144
1145             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1146
1147             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1148         }
1149
1150         /* get the delayed frames */
1151         while (out_size) {
1152             out_size = encode_frame(c, NULL);
1153             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1154         }
1155
1156         rd += c->error[0] + c->error[1] + c->error[2];
1157
1158         if (rd < best_rd) {
1159             best_rd = rd;
1160             best_b_count = j;
1161         }
1162     }
1163
1164     avcodec_close(c);
1165     av_freep(&c);
1166
1167     return best_b_count;
1168 }
1169
1170 static int select_input_picture(MpegEncContext *s)
1171 {
1172     int i, ret;
1173
1174     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1175         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1176     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1177
1178     /* set next picture type & ordering */
1179     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1180         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1181             s->next_picture_ptr == NULL || s->intra_only) {
1182             s->reordered_input_picture[0] = s->input_picture[0];
1183             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1184             s->reordered_input_picture[0]->f->coded_picture_number =
1185                 s->coded_picture_number++;
1186         } else {
1187             int b_frames;
1188
1189             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1190                 if (s->picture_in_gop_number < s->gop_size &&
1191                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1192                     // FIXME check that te gop check above is +-1 correct
1193                     av_frame_unref(s->input_picture[0]->f);
1194
1195                     emms_c();
1196                     ff_vbv_update(s, 0);
1197
1198                     goto no_output_pic;
1199                 }
1200             }
1201
1202             if (s->flags & CODEC_FLAG_PASS2) {
1203                 for (i = 0; i < s->max_b_frames + 1; i++) {
1204                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1205
1206                     if (pict_num >= s->rc_context.num_entries)
1207                         break;
1208                     if (!s->input_picture[i]) {
1209                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1210                         break;
1211                     }
1212
1213                     s->input_picture[i]->f->pict_type =
1214                         s->rc_context.entry[pict_num].new_pict_type;
1215                 }
1216             }
1217
1218             if (s->avctx->b_frame_strategy == 0) {
1219                 b_frames = s->max_b_frames;
1220                 while (b_frames && !s->input_picture[b_frames])
1221                     b_frames--;
1222             } else if (s->avctx->b_frame_strategy == 1) {
1223                 for (i = 1; i < s->max_b_frames + 1; i++) {
1224                     if (s->input_picture[i] &&
1225                         s->input_picture[i]->b_frame_score == 0) {
1226                         s->input_picture[i]->b_frame_score =
1227                             get_intra_count(s,
1228                                             s->input_picture[i    ]->f->data[0],
1229                                             s->input_picture[i - 1]->f->data[0],
1230                                             s->linesize) + 1;
1231                     }
1232                 }
1233                 for (i = 0; i < s->max_b_frames + 1; i++) {
1234                     if (s->input_picture[i] == NULL ||
1235                         s->input_picture[i]->b_frame_score - 1 >
1236                             s->mb_num / s->avctx->b_sensitivity)
1237                         break;
1238                 }
1239
1240                 b_frames = FFMAX(0, i - 1);
1241
1242                 /* reset scores */
1243                 for (i = 0; i < b_frames + 1; i++) {
1244                     s->input_picture[i]->b_frame_score = 0;
1245                 }
1246             } else if (s->avctx->b_frame_strategy == 2) {
1247                 b_frames = estimate_best_b_count(s);
1248             } else {
1249                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1250                 b_frames = 0;
1251             }
1252
1253             emms_c();
1254
1255             for (i = b_frames - 1; i >= 0; i--) {
1256                 int type = s->input_picture[i]->f->pict_type;
1257                 if (type && type != AV_PICTURE_TYPE_B)
1258                     b_frames = i;
1259             }
1260             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1261                 b_frames == s->max_b_frames) {
1262                 av_log(s->avctx, AV_LOG_ERROR,
1263                        "warning, too many b frames in a row\n");
1264             }
1265
1266             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1267                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1268                     s->gop_size > s->picture_in_gop_number) {
1269                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1270                 } else {
1271                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1272                         b_frames = 0;
1273                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1274                 }
1275             }
1276
1277             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1278                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1279                 b_frames--;
1280
1281             s->reordered_input_picture[0] = s->input_picture[b_frames];
1282             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1283                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1284             s->reordered_input_picture[0]->f->coded_picture_number =
1285                 s->coded_picture_number++;
1286             for (i = 0; i < b_frames; i++) {
1287                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1288                 s->reordered_input_picture[i + 1]->f->pict_type =
1289                     AV_PICTURE_TYPE_B;
1290                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1291                     s->coded_picture_number++;
1292             }
1293         }
1294     }
1295 no_output_pic:
1296     if (s->reordered_input_picture[0]) {
1297         s->reordered_input_picture[0]->reference =
1298            s->reordered_input_picture[0]->f->pict_type !=
1299                AV_PICTURE_TYPE_B ? 3 : 0;
1300
1301         ff_mpeg_unref_picture(s, &s->new_picture);
1302         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1303             return ret;
1304
1305         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1306             // input is a shared pix, so we can't modifiy it -> alloc a new
1307             // one & ensure that the shared one is reuseable
1308
1309             Picture *pic;
1310             int i = ff_find_unused_picture(s, 0);
1311             if (i < 0)
1312                 return i;
1313             pic = &s->picture[i];
1314
1315             pic->reference = s->reordered_input_picture[0]->reference;
1316             if (ff_alloc_picture(s, pic, 0) < 0) {
1317                 return -1;
1318             }
1319
1320             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1321             if (ret < 0)
1322                 return ret;
1323
1324             /* mark us unused / free shared pic */
1325             av_frame_unref(s->reordered_input_picture[0]->f);
1326             s->reordered_input_picture[0]->shared = 0;
1327
1328             s->current_picture_ptr = pic;
1329         } else {
1330             // input is not a shared pix -> reuse buffer for current_pix
1331             s->current_picture_ptr = s->reordered_input_picture[0];
1332             for (i = 0; i < 4; i++) {
1333                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1334             }
1335         }
1336         ff_mpeg_unref_picture(s, &s->current_picture);
1337         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1338                                        s->current_picture_ptr)) < 0)
1339             return ret;
1340
1341         s->picture_number = s->new_picture.f->display_picture_number;
1342     } else {
1343         ff_mpeg_unref_picture(s, &s->new_picture);
1344     }
1345     return 0;
1346 }
1347
1348 static void frame_end(MpegEncContext *s)
1349 {
1350     int i;
1351
1352     if (s->unrestricted_mv &&
1353         s->current_picture.reference &&
1354         !s->intra_only) {
1355         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1356         int hshift = desc->log2_chroma_w;
1357         int vshift = desc->log2_chroma_h;
1358         s->dsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1359                           s->h_edge_pos, s->v_edge_pos,
1360                           EDGE_WIDTH, EDGE_WIDTH,
1361                           EDGE_TOP | EDGE_BOTTOM);
1362         s->dsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1363                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1364                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1365                           EDGE_TOP | EDGE_BOTTOM);
1366         s->dsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1367                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1368                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1369                           EDGE_TOP | EDGE_BOTTOM);
1370     }
1371
1372     emms_c();
1373
1374     s->last_pict_type                 = s->pict_type;
1375     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1376     if (s->pict_type!= AV_PICTURE_TYPE_B)
1377         s->last_non_b_pict_type = s->pict_type;
1378
1379     if (s->encoding) {
1380         /* release non-reference frames */
1381         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1382             if (!s->picture[i].reference)
1383                 ff_mpeg_unref_picture(s, &s->picture[i]);
1384         }
1385     }
1386
1387     s->avctx->coded_frame = s->current_picture_ptr->f;
1388
1389 }
1390
1391 static void update_noise_reduction(MpegEncContext *s)
1392 {
1393     int intra, i;
1394
1395     for (intra = 0; intra < 2; intra++) {
1396         if (s->dct_count[intra] > (1 << 16)) {
1397             for (i = 0; i < 64; i++) {
1398                 s->dct_error_sum[intra][i] >>= 1;
1399             }
1400             s->dct_count[intra] >>= 1;
1401         }
1402
1403         for (i = 0; i < 64; i++) {
1404             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1405                                        s->dct_count[intra] +
1406                                        s->dct_error_sum[intra][i] / 2) /
1407                                       (s->dct_error_sum[intra][i] + 1);
1408         }
1409     }
1410 }
1411
1412 static int frame_start(MpegEncContext *s)
1413 {
1414     int ret;
1415
1416     /* mark & release old frames */
1417     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1418         s->last_picture_ptr != s->next_picture_ptr &&
1419         s->last_picture_ptr->f->buf[0]) {
1420         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1421     }
1422
1423     s->current_picture_ptr->f->pict_type = s->pict_type;
1424     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1425
1426     ff_mpeg_unref_picture(s, &s->current_picture);
1427     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1428                                    s->current_picture_ptr)) < 0)
1429         return ret;
1430
1431     if (s->pict_type != AV_PICTURE_TYPE_B) {
1432         s->last_picture_ptr = s->next_picture_ptr;
1433         if (!s->droppable)
1434             s->next_picture_ptr = s->current_picture_ptr;
1435     }
1436
1437     if (s->last_picture_ptr) {
1438         ff_mpeg_unref_picture(s, &s->last_picture);
1439         if (s->last_picture_ptr->f->buf[0] &&
1440             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1441                                        s->last_picture_ptr)) < 0)
1442             return ret;
1443     }
1444     if (s->next_picture_ptr) {
1445         ff_mpeg_unref_picture(s, &s->next_picture);
1446         if (s->next_picture_ptr->f->buf[0] &&
1447             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1448                                        s->next_picture_ptr)) < 0)
1449             return ret;
1450     }
1451
1452     if (s->picture_structure!= PICT_FRAME) {
1453         int i;
1454         for (i = 0; i < 4; i++) {
1455             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1456                 s->current_picture.f->data[i] +=
1457                     s->current_picture.f->linesize[i];
1458             }
1459             s->current_picture.f->linesize[i] *= 2;
1460             s->last_picture.f->linesize[i]    *= 2;
1461             s->next_picture.f->linesize[i]    *= 2;
1462         }
1463     }
1464
1465     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1466         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1467         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1468     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1469         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1470         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1471     } else {
1472         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1473         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1474     }
1475
1476     if (s->dct_error_sum) {
1477         assert(s->avctx->noise_reduction && s->encoding);
1478         update_noise_reduction(s);
1479     }
1480
1481     return 0;
1482 }
1483
1484 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1485                           const AVFrame *pic_arg, int *got_packet)
1486 {
1487     MpegEncContext *s = avctx->priv_data;
1488     int i, stuffing_count, ret;
1489     int context_count = s->slice_context_count;
1490
1491     s->picture_in_gop_number++;
1492
1493     if (load_input_picture(s, pic_arg) < 0)
1494         return -1;
1495
1496     if (select_input_picture(s) < 0) {
1497         return -1;
1498     }
1499
1500     /* output? */
1501     if (s->new_picture.f->data[0]) {
1502         if (!pkt->data &&
1503             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1504             return ret;
1505         if (s->mb_info) {
1506             s->mb_info_ptr = av_packet_new_side_data(pkt,
1507                                  AV_PKT_DATA_H263_MB_INFO,
1508                                  s->mb_width*s->mb_height*12);
1509             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1510         }
1511
1512         for (i = 0; i < context_count; i++) {
1513             int start_y = s->thread_context[i]->start_mb_y;
1514             int   end_y = s->thread_context[i]->  end_mb_y;
1515             int h       = s->mb_height;
1516             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1517             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1518
1519             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1520         }
1521
1522         s->pict_type = s->new_picture.f->pict_type;
1523         //emms_c();
1524         ret = frame_start(s);
1525         if (ret < 0)
1526             return ret;
1527 vbv_retry:
1528         if (encode_picture(s, s->picture_number) < 0)
1529             return -1;
1530
1531         avctx->header_bits = s->header_bits;
1532         avctx->mv_bits     = s->mv_bits;
1533         avctx->misc_bits   = s->misc_bits;
1534         avctx->i_tex_bits  = s->i_tex_bits;
1535         avctx->p_tex_bits  = s->p_tex_bits;
1536         avctx->i_count     = s->i_count;
1537         // FIXME f/b_count in avctx
1538         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1539         avctx->skip_count  = s->skip_count;
1540
1541         frame_end(s);
1542
1543         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1544             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1545
1546         if (avctx->rc_buffer_size) {
1547             RateControlContext *rcc = &s->rc_context;
1548             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1549
1550             if (put_bits_count(&s->pb) > max_size &&
1551                 s->lambda < s->avctx->lmax) {
1552                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1553                                        (s->qscale + 1) / s->qscale);
1554                 if (s->adaptive_quant) {
1555                     int i;
1556                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1557                         s->lambda_table[i] =
1558                             FFMAX(s->lambda_table[i] + 1,
1559                                   s->lambda_table[i] * (s->qscale + 1) /
1560                                   s->qscale);
1561                 }
1562                 s->mb_skipped = 0;        // done in frame_start()
1563                 // done in encode_picture() so we must undo it
1564                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1565                     if (s->flipflop_rounding          ||
1566                         s->codec_id == AV_CODEC_ID_H263P ||
1567                         s->codec_id == AV_CODEC_ID_MPEG4)
1568                         s->no_rounding ^= 1;
1569                 }
1570                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1571                     s->time_base       = s->last_time_base;
1572                     s->last_non_b_time = s->time - s->pp_time;
1573                 }
1574                 for (i = 0; i < context_count; i++) {
1575                     PutBitContext *pb = &s->thread_context[i]->pb;
1576                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1577                 }
1578                 goto vbv_retry;
1579             }
1580
1581             assert(s->avctx->rc_max_rate);
1582         }
1583
1584         if (s->flags & CODEC_FLAG_PASS1)
1585             ff_write_pass1_stats(s);
1586
1587         for (i = 0; i < 4; i++) {
1588             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1589             avctx->error[i] += s->current_picture_ptr->f->error[i];
1590         }
1591
1592         if (s->flags & CODEC_FLAG_PASS1)
1593             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1594                    avctx->i_tex_bits + avctx->p_tex_bits ==
1595                        put_bits_count(&s->pb));
1596         flush_put_bits(&s->pb);
1597         s->frame_bits  = put_bits_count(&s->pb);
1598
1599         stuffing_count = ff_vbv_update(s, s->frame_bits);
1600         if (stuffing_count) {
1601             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1602                     stuffing_count + 50) {
1603                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1604                 return -1;
1605             }
1606
1607             switch (s->codec_id) {
1608             case AV_CODEC_ID_MPEG1VIDEO:
1609             case AV_CODEC_ID_MPEG2VIDEO:
1610                 while (stuffing_count--) {
1611                     put_bits(&s->pb, 8, 0);
1612                 }
1613             break;
1614             case AV_CODEC_ID_MPEG4:
1615                 put_bits(&s->pb, 16, 0);
1616                 put_bits(&s->pb, 16, 0x1C3);
1617                 stuffing_count -= 4;
1618                 while (stuffing_count--) {
1619                     put_bits(&s->pb, 8, 0xFF);
1620                 }
1621             break;
1622             default:
1623                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1624             }
1625             flush_put_bits(&s->pb);
1626             s->frame_bits  = put_bits_count(&s->pb);
1627         }
1628
1629         /* update mpeg1/2 vbv_delay for CBR */
1630         if (s->avctx->rc_max_rate                          &&
1631             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1632             s->out_format == FMT_MPEG1                     &&
1633             90000LL * (avctx->rc_buffer_size - 1) <=
1634                 s->avctx->rc_max_rate * 0xFFFFLL) {
1635             int vbv_delay, min_delay;
1636             double inbits  = s->avctx->rc_max_rate *
1637                              av_q2d(s->avctx->time_base);
1638             int    minbits = s->frame_bits - 8 *
1639                              (s->vbv_delay_ptr - s->pb.buf - 1);
1640             double bits    = s->rc_context.buffer_index + minbits - inbits;
1641
1642             if (bits < 0)
1643                 av_log(s->avctx, AV_LOG_ERROR,
1644                        "Internal error, negative bits\n");
1645
1646             assert(s->repeat_first_field == 0);
1647
1648             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1649             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1650                         s->avctx->rc_max_rate;
1651
1652             vbv_delay = FFMAX(vbv_delay, min_delay);
1653
1654             assert(vbv_delay < 0xFFFF);
1655
1656             s->vbv_delay_ptr[0] &= 0xF8;
1657             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1658             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1659             s->vbv_delay_ptr[2] &= 0x07;
1660             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1661             avctx->vbv_delay     = vbv_delay * 300;
1662         }
1663         s->total_bits     += s->frame_bits;
1664         avctx->frame_bits  = s->frame_bits;
1665
1666         pkt->pts = s->current_picture.f->pts;
1667         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1668             if (!s->current_picture.f->coded_picture_number)
1669                 pkt->dts = pkt->pts - s->dts_delta;
1670             else
1671                 pkt->dts = s->reordered_pts;
1672             s->reordered_pts = pkt->pts;
1673         } else
1674             pkt->dts = pkt->pts;
1675         if (s->current_picture.f->key_frame)
1676             pkt->flags |= AV_PKT_FLAG_KEY;
1677         if (s->mb_info)
1678             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1679     } else {
1680         s->frame_bits = 0;
1681     }
1682     assert((s->frame_bits & 7) == 0);
1683
1684     pkt->size = s->frame_bits / 8;
1685     *got_packet = !!pkt->size;
1686     return 0;
1687 }
1688
1689 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1690                                                 int n, int threshold)
1691 {
1692     static const char tab[64] = {
1693         3, 2, 2, 1, 1, 1, 1, 1,
1694         1, 1, 1, 1, 1, 1, 1, 1,
1695         1, 1, 1, 1, 1, 1, 1, 1,
1696         0, 0, 0, 0, 0, 0, 0, 0,
1697         0, 0, 0, 0, 0, 0, 0, 0,
1698         0, 0, 0, 0, 0, 0, 0, 0,
1699         0, 0, 0, 0, 0, 0, 0, 0,
1700         0, 0, 0, 0, 0, 0, 0, 0
1701     };
1702     int score = 0;
1703     int run = 0;
1704     int i;
1705     int16_t *block = s->block[n];
1706     const int last_index = s->block_last_index[n];
1707     int skip_dc;
1708
1709     if (threshold < 0) {
1710         skip_dc = 0;
1711         threshold = -threshold;
1712     } else
1713         skip_dc = 1;
1714
1715     /* Are all we could set to zero already zero? */
1716     if (last_index <= skip_dc - 1)
1717         return;
1718
1719     for (i = 0; i <= last_index; i++) {
1720         const int j = s->intra_scantable.permutated[i];
1721         const int level = FFABS(block[j]);
1722         if (level == 1) {
1723             if (skip_dc && i == 0)
1724                 continue;
1725             score += tab[run];
1726             run = 0;
1727         } else if (level > 1) {
1728             return;
1729         } else {
1730             run++;
1731         }
1732     }
1733     if (score >= threshold)
1734         return;
1735     for (i = skip_dc; i <= last_index; i++) {
1736         const int j = s->intra_scantable.permutated[i];
1737         block[j] = 0;
1738     }
1739     if (block[0])
1740         s->block_last_index[n] = 0;
1741     else
1742         s->block_last_index[n] = -1;
1743 }
1744
1745 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1746                                int last_index)
1747 {
1748     int i;
1749     const int maxlevel = s->max_qcoeff;
1750     const int minlevel = s->min_qcoeff;
1751     int overflow = 0;
1752
1753     if (s->mb_intra) {
1754         i = 1; // skip clipping of intra dc
1755     } else
1756         i = 0;
1757
1758     for (; i <= last_index; i++) {
1759         const int j = s->intra_scantable.permutated[i];
1760         int level = block[j];
1761
1762         if (level > maxlevel) {
1763             level = maxlevel;
1764             overflow++;
1765         } else if (level < minlevel) {
1766             level = minlevel;
1767             overflow++;
1768         }
1769
1770         block[j] = level;
1771     }
1772
1773     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1774         av_log(s->avctx, AV_LOG_INFO,
1775                "warning, clipping %d dct coefficients to %d..%d\n",
1776                overflow, minlevel, maxlevel);
1777 }
1778
1779 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1780 {
1781     int x, y;
1782     // FIXME optimize
1783     for (y = 0; y < 8; y++) {
1784         for (x = 0; x < 8; x++) {
1785             int x2, y2;
1786             int sum = 0;
1787             int sqr = 0;
1788             int count = 0;
1789
1790             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1791                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1792                     int v = ptr[x2 + y2 * stride];
1793                     sum += v;
1794                     sqr += v * v;
1795                     count++;
1796                 }
1797             }
1798             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1799         }
1800     }
1801 }
1802
1803 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1804                                                 int motion_x, int motion_y,
1805                                                 int mb_block_height,
1806                                                 int mb_block_count)
1807 {
1808     int16_t weight[8][64];
1809     int16_t orig[8][64];
1810     const int mb_x = s->mb_x;
1811     const int mb_y = s->mb_y;
1812     int i;
1813     int skip_dct[8];
1814     int dct_offset = s->linesize * 8; // default for progressive frames
1815     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1816     ptrdiff_t wrap_y, wrap_c;
1817
1818     for (i = 0; i < mb_block_count; i++)
1819         skip_dct[i] = s->skipdct;
1820
1821     if (s->adaptive_quant) {
1822         const int last_qp = s->qscale;
1823         const int mb_xy = mb_x + mb_y * s->mb_stride;
1824
1825         s->lambda = s->lambda_table[mb_xy];
1826         update_qscale(s);
1827
1828         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1829             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1830             s->dquant = s->qscale - last_qp;
1831
1832             if (s->out_format == FMT_H263) {
1833                 s->dquant = av_clip(s->dquant, -2, 2);
1834
1835                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1836                     if (!s->mb_intra) {
1837                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1838                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1839                                 s->dquant = 0;
1840                         }
1841                         if (s->mv_type == MV_TYPE_8X8)
1842                             s->dquant = 0;
1843                     }
1844                 }
1845             }
1846         }
1847         ff_set_qscale(s, last_qp + s->dquant);
1848     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1849         ff_set_qscale(s, s->qscale + s->dquant);
1850
1851     wrap_y = s->linesize;
1852     wrap_c = s->uvlinesize;
1853     ptr_y  = s->new_picture.f->data[0] +
1854              (mb_y * 16 * wrap_y)              + mb_x * 16;
1855     ptr_cb = s->new_picture.f->data[1] +
1856              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1857     ptr_cr = s->new_picture.f->data[2] +
1858              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1859
1860     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1861         uint8_t *ebuf = s->edge_emu_buffer + 32;
1862         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1863                                  wrap_y, wrap_y,
1864                                  16, 16, mb_x * 16, mb_y * 16,
1865                                  s->width, s->height);
1866         ptr_y = ebuf;
1867         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1868                                  wrap_c, wrap_c,
1869                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1870                                  s->width >> 1, s->height >> 1);
1871         ptr_cb = ebuf + 18 * wrap_y;
1872         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1873                                  wrap_c, wrap_c,
1874                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1875                                  s->width >> 1, s->height >> 1);
1876         ptr_cr = ebuf + 18 * wrap_y + 8;
1877     }
1878
1879     if (s->mb_intra) {
1880         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1881             int progressive_score, interlaced_score;
1882
1883             s->interlaced_dct = 0;
1884             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1885                                                     NULL, wrap_y, 8) +
1886                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1887                                                     NULL, wrap_y, 8) - 400;
1888
1889             if (progressive_score > 0) {
1890                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1891                                                        NULL, wrap_y * 2, 8) +
1892                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1893                                                        NULL, wrap_y * 2, 8);
1894                 if (progressive_score > interlaced_score) {
1895                     s->interlaced_dct = 1;
1896
1897                     dct_offset = wrap_y;
1898                     wrap_y <<= 1;
1899                     if (s->chroma_format == CHROMA_422)
1900                         wrap_c <<= 1;
1901                 }
1902             }
1903         }
1904
1905         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1906         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1907         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1908         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1909
1910         if (s->flags & CODEC_FLAG_GRAY) {
1911             skip_dct[4] = 1;
1912             skip_dct[5] = 1;
1913         } else {
1914             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1915             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1916             if (!s->chroma_y_shift) { /* 422 */
1917                 s->dsp.get_pixels(s->block[6],
1918                                   ptr_cb + (dct_offset >> 1), wrap_c);
1919                 s->dsp.get_pixels(s->block[7],
1920                                   ptr_cr + (dct_offset >> 1), wrap_c);
1921             }
1922         }
1923     } else {
1924         op_pixels_func (*op_pix)[4];
1925         qpel_mc_func (*op_qpix)[16];
1926         uint8_t *dest_y, *dest_cb, *dest_cr;
1927
1928         dest_y  = s->dest[0];
1929         dest_cb = s->dest[1];
1930         dest_cr = s->dest[2];
1931
1932         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1933             op_pix  = s->hdsp.put_pixels_tab;
1934             op_qpix = s->dsp.put_qpel_pixels_tab;
1935         } else {
1936             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1937             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1938         }
1939
1940         if (s->mv_dir & MV_DIR_FORWARD) {
1941             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1942                           s->last_picture.f->data,
1943                           op_pix, op_qpix);
1944             op_pix  = s->hdsp.avg_pixels_tab;
1945             op_qpix = s->dsp.avg_qpel_pixels_tab;
1946         }
1947         if (s->mv_dir & MV_DIR_BACKWARD) {
1948             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1949                           s->next_picture.f->data,
1950                           op_pix, op_qpix);
1951         }
1952
1953         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1954             int progressive_score, interlaced_score;
1955
1956             s->interlaced_dct = 0;
1957             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1958                                                     ptr_y,              wrap_y,
1959                                                     8) +
1960                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1961                                                     ptr_y + wrap_y * 8, wrap_y,
1962                                                     8) - 400;
1963
1964             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1965                 progressive_score -= 400;
1966
1967             if (progressive_score > 0) {
1968                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1969                                                        ptr_y,
1970                                                        wrap_y * 2, 8) +
1971                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1972                                                        ptr_y + wrap_y,
1973                                                        wrap_y * 2, 8);
1974
1975                 if (progressive_score > interlaced_score) {
1976                     s->interlaced_dct = 1;
1977
1978                     dct_offset = wrap_y;
1979                     wrap_y <<= 1;
1980                     if (s->chroma_format == CHROMA_422)
1981                         wrap_c <<= 1;
1982                 }
1983             }
1984         }
1985
1986         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1987         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1988         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1989                            dest_y + dct_offset, wrap_y);
1990         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1991                            dest_y + dct_offset + 8, wrap_y);
1992
1993         if (s->flags & CODEC_FLAG_GRAY) {
1994             skip_dct[4] = 1;
1995             skip_dct[5] = 1;
1996         } else {
1997             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1998             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1999             if (!s->chroma_y_shift) { /* 422 */
2000                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2001                                    dest_cb + (dct_offset >> 1), wrap_c);
2002                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2003                                    dest_cr + (dct_offset >> 1), wrap_c);
2004             }
2005         }
2006         /* pre quantization */
2007         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2008                 2 * s->qscale * s->qscale) {
2009             // FIXME optimize
2010             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2011                               wrap_y, 8) < 20 * s->qscale)
2012                 skip_dct[0] = 1;
2013             if (s->dsp.sad[1](NULL, ptr_y + 8,
2014                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2015                 skip_dct[1] = 1;
2016             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2017                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2018                 skip_dct[2] = 1;
2019             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2020                               dest_y + dct_offset + 8,
2021                               wrap_y, 8) < 20 * s->qscale)
2022                 skip_dct[3] = 1;
2023             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2024                               wrap_c, 8) < 20 * s->qscale)
2025                 skip_dct[4] = 1;
2026             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2027                               wrap_c, 8) < 20 * s->qscale)
2028                 skip_dct[5] = 1;
2029             if (!s->chroma_y_shift) { /* 422 */
2030                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2031                                   dest_cb + (dct_offset >> 1),
2032                                   wrap_c, 8) < 20 * s->qscale)
2033                     skip_dct[6] = 1;
2034                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2035                                   dest_cr + (dct_offset >> 1),
2036                                   wrap_c, 8) < 20 * s->qscale)
2037                     skip_dct[7] = 1;
2038             }
2039         }
2040     }
2041
2042     if (s->quantizer_noise_shaping) {
2043         if (!skip_dct[0])
2044             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2045         if (!skip_dct[1])
2046             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2047         if (!skip_dct[2])
2048             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2049         if (!skip_dct[3])
2050             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2051         if (!skip_dct[4])
2052             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2053         if (!skip_dct[5])
2054             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2055         if (!s->chroma_y_shift) { /* 422 */
2056             if (!skip_dct[6])
2057                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2058                                   wrap_c);
2059             if (!skip_dct[7])
2060                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2061                                   wrap_c);
2062         }
2063         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2064     }
2065
2066     /* DCT & quantize */
2067     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2068     {
2069         for (i = 0; i < mb_block_count; i++) {
2070             if (!skip_dct[i]) {
2071                 int overflow;
2072                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2073                 // FIXME we could decide to change to quantizer instead of
2074                 // clipping
2075                 // JS: I don't think that would be a good idea it could lower
2076                 //     quality instead of improve it. Just INTRADC clipping
2077                 //     deserves changes in quantizer
2078                 if (overflow)
2079                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2080             } else
2081                 s->block_last_index[i] = -1;
2082         }
2083         if (s->quantizer_noise_shaping) {
2084             for (i = 0; i < mb_block_count; i++) {
2085                 if (!skip_dct[i]) {
2086                     s->block_last_index[i] =
2087                         dct_quantize_refine(s, s->block[i], weight[i],
2088                                             orig[i], i, s->qscale);
2089                 }
2090             }
2091         }
2092
2093         if (s->luma_elim_threshold && !s->mb_intra)
2094             for (i = 0; i < 4; i++)
2095                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2096         if (s->chroma_elim_threshold && !s->mb_intra)
2097             for (i = 4; i < mb_block_count; i++)
2098                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2099
2100         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2101             for (i = 0; i < mb_block_count; i++) {
2102                 if (s->block_last_index[i] == -1)
2103                     s->coded_score[i] = INT_MAX / 256;
2104             }
2105         }
2106     }
2107
2108     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2109         s->block_last_index[4] =
2110         s->block_last_index[5] = 0;
2111         s->block[4][0] =
2112         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2113     }
2114
2115     // non c quantize code returns incorrect block_last_index FIXME
2116     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2117         for (i = 0; i < mb_block_count; i++) {
2118             int j;
2119             if (s->block_last_index[i] > 0) {
2120                 for (j = 63; j > 0; j--) {
2121                     if (s->block[i][s->intra_scantable.permutated[j]])
2122                         break;
2123                 }
2124                 s->block_last_index[i] = j;
2125             }
2126         }
2127     }
2128
2129     /* huffman encode */
2130     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2131     case AV_CODEC_ID_MPEG1VIDEO:
2132     case AV_CODEC_ID_MPEG2VIDEO:
2133         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2134             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2135         break;
2136     case AV_CODEC_ID_MPEG4:
2137         if (CONFIG_MPEG4_ENCODER)
2138             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2139         break;
2140     case AV_CODEC_ID_MSMPEG4V2:
2141     case AV_CODEC_ID_MSMPEG4V3:
2142     case AV_CODEC_ID_WMV1:
2143         if (CONFIG_MSMPEG4_ENCODER)
2144             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2145         break;
2146     case AV_CODEC_ID_WMV2:
2147         if (CONFIG_WMV2_ENCODER)
2148             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2149         break;
2150     case AV_CODEC_ID_H261:
2151         if (CONFIG_H261_ENCODER)
2152             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2153         break;
2154     case AV_CODEC_ID_H263:
2155     case AV_CODEC_ID_H263P:
2156     case AV_CODEC_ID_FLV1:
2157     case AV_CODEC_ID_RV10:
2158     case AV_CODEC_ID_RV20:
2159         if (CONFIG_H263_ENCODER)
2160             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2161         break;
2162     case AV_CODEC_ID_MJPEG:
2163         if (CONFIG_MJPEG_ENCODER)
2164             ff_mjpeg_encode_mb(s, s->block);
2165         break;
2166     default:
2167         assert(0);
2168     }
2169 }
2170
2171 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2172 {
2173     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2174     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2175 }
2176
2177 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2178     int i;
2179
2180     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2181
2182     /* mpeg1 */
2183     d->mb_skip_run= s->mb_skip_run;
2184     for(i=0; i<3; i++)
2185         d->last_dc[i] = s->last_dc[i];
2186
2187     /* statistics */
2188     d->mv_bits= s->mv_bits;
2189     d->i_tex_bits= s->i_tex_bits;
2190     d->p_tex_bits= s->p_tex_bits;
2191     d->i_count= s->i_count;
2192     d->f_count= s->f_count;
2193     d->b_count= s->b_count;
2194     d->skip_count= s->skip_count;
2195     d->misc_bits= s->misc_bits;
2196     d->last_bits= 0;
2197
2198     d->mb_skipped= 0;
2199     d->qscale= s->qscale;
2200     d->dquant= s->dquant;
2201
2202     d->esc3_level_length= s->esc3_level_length;
2203 }
2204
2205 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2206     int i;
2207
2208     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2209     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2210
2211     /* mpeg1 */
2212     d->mb_skip_run= s->mb_skip_run;
2213     for(i=0; i<3; i++)
2214         d->last_dc[i] = s->last_dc[i];
2215
2216     /* statistics */
2217     d->mv_bits= s->mv_bits;
2218     d->i_tex_bits= s->i_tex_bits;
2219     d->p_tex_bits= s->p_tex_bits;
2220     d->i_count= s->i_count;
2221     d->f_count= s->f_count;
2222     d->b_count= s->b_count;
2223     d->skip_count= s->skip_count;
2224     d->misc_bits= s->misc_bits;
2225
2226     d->mb_intra= s->mb_intra;
2227     d->mb_skipped= s->mb_skipped;
2228     d->mv_type= s->mv_type;
2229     d->mv_dir= s->mv_dir;
2230     d->pb= s->pb;
2231     if(s->data_partitioning){
2232         d->pb2= s->pb2;
2233         d->tex_pb= s->tex_pb;
2234     }
2235     d->block= s->block;
2236     for(i=0; i<8; i++)
2237         d->block_last_index[i]= s->block_last_index[i];
2238     d->interlaced_dct= s->interlaced_dct;
2239     d->qscale= s->qscale;
2240
2241     d->esc3_level_length= s->esc3_level_length;
2242 }
2243
2244 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2245                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2246                            int *dmin, int *next_block, int motion_x, int motion_y)
2247 {
2248     int score;
2249     uint8_t *dest_backup[3];
2250
2251     copy_context_before_encode(s, backup, type);
2252
2253     s->block= s->blocks[*next_block];
2254     s->pb= pb[*next_block];
2255     if(s->data_partitioning){
2256         s->pb2   = pb2   [*next_block];
2257         s->tex_pb= tex_pb[*next_block];
2258     }
2259
2260     if(*next_block){
2261         memcpy(dest_backup, s->dest, sizeof(s->dest));
2262         s->dest[0] = s->rd_scratchpad;
2263         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2264         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2265         assert(s->linesize >= 32); //FIXME
2266     }
2267
2268     encode_mb(s, motion_x, motion_y);
2269
2270     score= put_bits_count(&s->pb);
2271     if(s->data_partitioning){
2272         score+= put_bits_count(&s->pb2);
2273         score+= put_bits_count(&s->tex_pb);
2274     }
2275
2276     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2277         ff_MPV_decode_mb(s, s->block);
2278
2279         score *= s->lambda2;
2280         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2281     }
2282
2283     if(*next_block){
2284         memcpy(s->dest, dest_backup, sizeof(s->dest));
2285     }
2286
2287     if(score<*dmin){
2288         *dmin= score;
2289         *next_block^=1;
2290
2291         copy_context_after_encode(best, s, type);
2292     }
2293 }
2294
2295 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2296     uint32_t *sq = ff_square_tab + 256;
2297     int acc=0;
2298     int x,y;
2299
2300     if(w==16 && h==16)
2301         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2302     else if(w==8 && h==8)
2303         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2304
2305     for(y=0; y<h; y++){
2306         for(x=0; x<w; x++){
2307             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2308         }
2309     }
2310
2311     assert(acc>=0);
2312
2313     return acc;
2314 }
2315
2316 static int sse_mb(MpegEncContext *s){
2317     int w= 16;
2318     int h= 16;
2319
2320     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2321     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2322
2323     if(w==16 && h==16)
2324       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2325         return  s->dsp.nsse[0](s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2326                +s->dsp.nsse[1](s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2327                +s->dsp.nsse[1](s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2328       }else{
2329         return  s->dsp.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2330                +s->dsp.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2331                +s->dsp.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2332       }
2333     else
2334         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2335                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2336                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2337 }
2338
2339 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2340     MpegEncContext *s= *(void**)arg;
2341
2342
2343     s->me.pre_pass=1;
2344     s->me.dia_size= s->avctx->pre_dia_size;
2345     s->first_slice_line=1;
2346     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2347         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2348             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2349         }
2350         s->first_slice_line=0;
2351     }
2352
2353     s->me.pre_pass=0;
2354
2355     return 0;
2356 }
2357
2358 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2359     MpegEncContext *s= *(void**)arg;
2360
2361     s->me.dia_size= s->avctx->dia_size;
2362     s->first_slice_line=1;
2363     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2364         s->mb_x=0; //for block init below
2365         ff_init_block_index(s);
2366         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2367             s->block_index[0]+=2;
2368             s->block_index[1]+=2;
2369             s->block_index[2]+=2;
2370             s->block_index[3]+=2;
2371
2372             /* compute motion vector & mb_type and store in context */
2373             if(s->pict_type==AV_PICTURE_TYPE_B)
2374                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2375             else
2376                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2377         }
2378         s->first_slice_line=0;
2379     }
2380     return 0;
2381 }
2382
2383 static int mb_var_thread(AVCodecContext *c, void *arg){
2384     MpegEncContext *s= *(void**)arg;
2385     int mb_x, mb_y;
2386
2387     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2388         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2389             int xx = mb_x * 16;
2390             int yy = mb_y * 16;
2391             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2392             int varc;
2393             int sum = s->dsp.pix_sum(pix, s->linesize);
2394
2395             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2396
2397             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2398             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2399             s->me.mb_var_sum_temp    += varc;
2400         }
2401     }
2402     return 0;
2403 }
2404
2405 static void write_slice_end(MpegEncContext *s){
2406     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2407         if(s->partitioned_frame){
2408             ff_mpeg4_merge_partitions(s);
2409         }
2410
2411         ff_mpeg4_stuffing(&s->pb);
2412     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2413         ff_mjpeg_encode_stuffing(&s->pb);
2414     }
2415
2416     avpriv_align_put_bits(&s->pb);
2417     flush_put_bits(&s->pb);
2418
2419     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2420         s->misc_bits+= get_bits_diff(s);
2421 }
2422
2423 static void write_mb_info(MpegEncContext *s)
2424 {
2425     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2426     int offset = put_bits_count(&s->pb);
2427     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2428     int gobn = s->mb_y / s->gob_index;
2429     int pred_x, pred_y;
2430     if (CONFIG_H263_ENCODER)
2431         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2432     bytestream_put_le32(&ptr, offset);
2433     bytestream_put_byte(&ptr, s->qscale);
2434     bytestream_put_byte(&ptr, gobn);
2435     bytestream_put_le16(&ptr, mba);
2436     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2437     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2438     /* 4MV not implemented */
2439     bytestream_put_byte(&ptr, 0); /* hmv2 */
2440     bytestream_put_byte(&ptr, 0); /* vmv2 */
2441 }
2442
2443 static void update_mb_info(MpegEncContext *s, int startcode)
2444 {
2445     if (!s->mb_info)
2446         return;
2447     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2448         s->mb_info_size += 12;
2449         s->prev_mb_info = s->last_mb_info;
2450     }
2451     if (startcode) {
2452         s->prev_mb_info = put_bits_count(&s->pb)/8;
2453         /* This might have incremented mb_info_size above, and we return without
2454          * actually writing any info into that slot yet. But in that case,
2455          * this will be called again at the start of the after writing the
2456          * start code, actually writing the mb info. */
2457         return;
2458     }
2459
2460     s->last_mb_info = put_bits_count(&s->pb)/8;
2461     if (!s->mb_info_size)
2462         s->mb_info_size += 12;
2463     write_mb_info(s);
2464 }
2465
2466 static int encode_thread(AVCodecContext *c, void *arg){
2467     MpegEncContext *s= *(void**)arg;
2468     int mb_x, mb_y, pdif = 0;
2469     int chr_h= 16>>s->chroma_y_shift;
2470     int i, j;
2471     MpegEncContext best_s, backup_s;
2472     uint8_t bit_buf[2][MAX_MB_BYTES];
2473     uint8_t bit_buf2[2][MAX_MB_BYTES];
2474     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2475     PutBitContext pb[2], pb2[2], tex_pb[2];
2476
2477     for(i=0; i<2; i++){
2478         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2479         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2480         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2481     }
2482
2483     s->last_bits= put_bits_count(&s->pb);
2484     s->mv_bits=0;
2485     s->misc_bits=0;
2486     s->i_tex_bits=0;
2487     s->p_tex_bits=0;
2488     s->i_count=0;
2489     s->f_count=0;
2490     s->b_count=0;
2491     s->skip_count=0;
2492
2493     for(i=0; i<3; i++){
2494         /* init last dc values */
2495         /* note: quant matrix value (8) is implied here */
2496         s->last_dc[i] = 128 << s->intra_dc_precision;
2497
2498         s->current_picture.f->error[i] = 0;
2499     }
2500     s->mb_skip_run = 0;
2501     memset(s->last_mv, 0, sizeof(s->last_mv));
2502
2503     s->last_mv_dir = 0;
2504
2505     switch(s->codec_id){
2506     case AV_CODEC_ID_H263:
2507     case AV_CODEC_ID_H263P:
2508     case AV_CODEC_ID_FLV1:
2509         if (CONFIG_H263_ENCODER)
2510             s->gob_index = ff_h263_get_gob_height(s);
2511         break;
2512     case AV_CODEC_ID_MPEG4:
2513         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2514             ff_mpeg4_init_partitions(s);
2515         break;
2516     }
2517
2518     s->resync_mb_x=0;
2519     s->resync_mb_y=0;
2520     s->first_slice_line = 1;
2521     s->ptr_lastgob = s->pb.buf;
2522     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2523         s->mb_x=0;
2524         s->mb_y= mb_y;
2525
2526         ff_set_qscale(s, s->qscale);
2527         ff_init_block_index(s);
2528
2529         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2530             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2531             int mb_type= s->mb_type[xy];
2532 //            int d;
2533             int dmin= INT_MAX;
2534             int dir;
2535
2536             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2537                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2538                 return -1;
2539             }
2540             if(s->data_partitioning){
2541                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2542                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2543                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2544                     return -1;
2545                 }
2546             }
2547
2548             s->mb_x = mb_x;
2549             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2550             ff_update_block_index(s);
2551
2552             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2553                 ff_h261_reorder_mb_index(s);
2554                 xy= s->mb_y*s->mb_stride + s->mb_x;
2555                 mb_type= s->mb_type[xy];
2556             }
2557
2558             /* write gob / video packet header  */
2559             if(s->rtp_mode){
2560                 int current_packet_size, is_gob_start;
2561
2562                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2563
2564                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2565
2566                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2567
2568                 switch(s->codec_id){
2569                 case AV_CODEC_ID_H263:
2570                 case AV_CODEC_ID_H263P:
2571                     if(!s->h263_slice_structured)
2572                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2573                     break;
2574                 case AV_CODEC_ID_MPEG2VIDEO:
2575                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2576                 case AV_CODEC_ID_MPEG1VIDEO:
2577                     if(s->mb_skip_run) is_gob_start=0;
2578                     break;
2579                 }
2580
2581                 if(is_gob_start){
2582                     if(s->start_mb_y != mb_y || mb_x!=0){
2583                         write_slice_end(s);
2584
2585                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2586                             ff_mpeg4_init_partitions(s);
2587                         }
2588                     }
2589
2590                     assert((put_bits_count(&s->pb)&7) == 0);
2591                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2592
2593                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2594                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2595                         int d = 100 / s->error_rate;
2596                         if(r % d == 0){
2597                             current_packet_size=0;
2598                             s->pb.buf_ptr= s->ptr_lastgob;
2599                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2600                         }
2601                     }
2602
2603                     if (s->avctx->rtp_callback){
2604                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2605                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2606                     }
2607                     update_mb_info(s, 1);
2608
2609                     switch(s->codec_id){
2610                     case AV_CODEC_ID_MPEG4:
2611                         if (CONFIG_MPEG4_ENCODER) {
2612                             ff_mpeg4_encode_video_packet_header(s);
2613                             ff_mpeg4_clean_buffers(s);
2614                         }
2615                     break;
2616                     case AV_CODEC_ID_MPEG1VIDEO:
2617                     case AV_CODEC_ID_MPEG2VIDEO:
2618                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2619                             ff_mpeg1_encode_slice_header(s);
2620                             ff_mpeg1_clean_buffers(s);
2621                         }
2622                     break;
2623                     case AV_CODEC_ID_H263:
2624                     case AV_CODEC_ID_H263P:
2625                         if (CONFIG_H263_ENCODER)
2626                             ff_h263_encode_gob_header(s, mb_y);
2627                     break;
2628                     }
2629
2630                     if(s->flags&CODEC_FLAG_PASS1){
2631                         int bits= put_bits_count(&s->pb);
2632                         s->misc_bits+= bits - s->last_bits;
2633                         s->last_bits= bits;
2634                     }
2635
2636                     s->ptr_lastgob += current_packet_size;
2637                     s->first_slice_line=1;
2638                     s->resync_mb_x=mb_x;
2639                     s->resync_mb_y=mb_y;
2640                 }
2641             }
2642
2643             if(  (s->resync_mb_x   == s->mb_x)
2644                && s->resync_mb_y+1 == s->mb_y){
2645                 s->first_slice_line=0;
2646             }
2647
2648             s->mb_skipped=0;
2649             s->dquant=0; //only for QP_RD
2650
2651             update_mb_info(s, 0);
2652
2653             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2654                 int next_block=0;
2655                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2656
2657                 copy_context_before_encode(&backup_s, s, -1);
2658                 backup_s.pb= s->pb;
2659                 best_s.data_partitioning= s->data_partitioning;
2660                 best_s.partitioned_frame= s->partitioned_frame;
2661                 if(s->data_partitioning){
2662                     backup_s.pb2= s->pb2;
2663                     backup_s.tex_pb= s->tex_pb;
2664                 }
2665
2666                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2667                     s->mv_dir = MV_DIR_FORWARD;
2668                     s->mv_type = MV_TYPE_16X16;
2669                     s->mb_intra= 0;
2670                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2671                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2672                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2673                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2674                 }
2675                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2676                     s->mv_dir = MV_DIR_FORWARD;
2677                     s->mv_type = MV_TYPE_FIELD;
2678                     s->mb_intra= 0;
2679                     for(i=0; i<2; i++){
2680                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2681                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2682                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2683                     }
2684                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2685                                  &dmin, &next_block, 0, 0);
2686                 }
2687                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2688                     s->mv_dir = MV_DIR_FORWARD;
2689                     s->mv_type = MV_TYPE_16X16;
2690                     s->mb_intra= 0;
2691                     s->mv[0][0][0] = 0;
2692                     s->mv[0][0][1] = 0;
2693                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2694                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2695                 }
2696                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2697                     s->mv_dir = MV_DIR_FORWARD;
2698                     s->mv_type = MV_TYPE_8X8;
2699                     s->mb_intra= 0;
2700                     for(i=0; i<4; i++){
2701                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2702                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2703                     }
2704                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2705                                  &dmin, &next_block, 0, 0);
2706                 }
2707                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2708                     s->mv_dir = MV_DIR_FORWARD;
2709                     s->mv_type = MV_TYPE_16X16;
2710                     s->mb_intra= 0;
2711                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2712                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2713                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2714                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2715                 }
2716                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2717                     s->mv_dir = MV_DIR_BACKWARD;
2718                     s->mv_type = MV_TYPE_16X16;
2719                     s->mb_intra= 0;
2720                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2721                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2722                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2723                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2724                 }
2725                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2726                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2727                     s->mv_type = MV_TYPE_16X16;
2728                     s->mb_intra= 0;
2729                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2730                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2731                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2732                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2733                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2734                                  &dmin, &next_block, 0, 0);
2735                 }
2736                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2737                     s->mv_dir = MV_DIR_FORWARD;
2738                     s->mv_type = MV_TYPE_FIELD;
2739                     s->mb_intra= 0;
2740                     for(i=0; i<2; i++){
2741                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2742                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2743                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2744                     }
2745                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2746                                  &dmin, &next_block, 0, 0);
2747                 }
2748                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2749                     s->mv_dir = MV_DIR_BACKWARD;
2750                     s->mv_type = MV_TYPE_FIELD;
2751                     s->mb_intra= 0;
2752                     for(i=0; i<2; i++){
2753                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2754                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2755                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2756                     }
2757                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2758                                  &dmin, &next_block, 0, 0);
2759                 }
2760                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2761                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2762                     s->mv_type = MV_TYPE_FIELD;
2763                     s->mb_intra= 0;
2764                     for(dir=0; dir<2; dir++){
2765                         for(i=0; i<2; i++){
2766                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2767                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2768                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2769                         }
2770                     }
2771                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2772                                  &dmin, &next_block, 0, 0);
2773                 }
2774                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2775                     s->mv_dir = 0;
2776                     s->mv_type = MV_TYPE_16X16;
2777                     s->mb_intra= 1;
2778                     s->mv[0][0][0] = 0;
2779                     s->mv[0][0][1] = 0;
2780                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2781                                  &dmin, &next_block, 0, 0);
2782                     if(s->h263_pred || s->h263_aic){
2783                         if(best_s.mb_intra)
2784                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2785                         else
2786                             ff_clean_intra_table_entries(s); //old mode?
2787                     }
2788                 }
2789
2790                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2791                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2792                         const int last_qp= backup_s.qscale;
2793                         int qpi, qp, dc[6];
2794                         int16_t ac[6][16];
2795                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2796                         static const int dquant_tab[4]={-1,1,-2,2};
2797
2798                         assert(backup_s.dquant == 0);
2799
2800                         //FIXME intra
2801                         s->mv_dir= best_s.mv_dir;
2802                         s->mv_type = MV_TYPE_16X16;
2803                         s->mb_intra= best_s.mb_intra;
2804                         s->mv[0][0][0] = best_s.mv[0][0][0];
2805                         s->mv[0][0][1] = best_s.mv[0][0][1];
2806                         s->mv[1][0][0] = best_s.mv[1][0][0];
2807                         s->mv[1][0][1] = best_s.mv[1][0][1];
2808
2809                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2810                         for(; qpi<4; qpi++){
2811                             int dquant= dquant_tab[qpi];
2812                             qp= last_qp + dquant;
2813                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2814                                 continue;
2815                             backup_s.dquant= dquant;
2816                             if(s->mb_intra && s->dc_val[0]){
2817                                 for(i=0; i<6; i++){
2818                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2819                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2820                                 }
2821                             }
2822
2823                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2824                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2825                             if(best_s.qscale != qp){
2826                                 if(s->mb_intra && s->dc_val[0]){
2827                                     for(i=0; i<6; i++){
2828                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2829                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2830                                     }
2831                                 }
2832                             }
2833                         }
2834                     }
2835                 }
2836                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2837                     int mx= s->b_direct_mv_table[xy][0];
2838                     int my= s->b_direct_mv_table[xy][1];
2839
2840                     backup_s.dquant = 0;
2841                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2842                     s->mb_intra= 0;
2843                     ff_mpeg4_set_direct_mv(s, mx, my);
2844                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2845                                  &dmin, &next_block, mx, my);
2846                 }
2847                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2848                     backup_s.dquant = 0;
2849                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2850                     s->mb_intra= 0;
2851                     ff_mpeg4_set_direct_mv(s, 0, 0);
2852                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2853                                  &dmin, &next_block, 0, 0);
2854                 }
2855                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2856                     int coded=0;
2857                     for(i=0; i<6; i++)
2858                         coded |= s->block_last_index[i];
2859                     if(coded){
2860                         int mx,my;
2861                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2862                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2863                             mx=my=0; //FIXME find the one we actually used
2864                             ff_mpeg4_set_direct_mv(s, mx, my);
2865                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2866                             mx= s->mv[1][0][0];
2867                             my= s->mv[1][0][1];
2868                         }else{
2869                             mx= s->mv[0][0][0];
2870                             my= s->mv[0][0][1];
2871                         }
2872
2873                         s->mv_dir= best_s.mv_dir;
2874                         s->mv_type = best_s.mv_type;
2875                         s->mb_intra= 0;
2876 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2877                         s->mv[0][0][1] = best_s.mv[0][0][1];
2878                         s->mv[1][0][0] = best_s.mv[1][0][0];
2879                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2880                         backup_s.dquant= 0;
2881                         s->skipdct=1;
2882                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2883                                         &dmin, &next_block, mx, my);
2884                         s->skipdct=0;
2885                     }
2886                 }
2887
2888                 s->current_picture.qscale_table[xy] = best_s.qscale;
2889
2890                 copy_context_after_encode(s, &best_s, -1);
2891
2892                 pb_bits_count= put_bits_count(&s->pb);
2893                 flush_put_bits(&s->pb);
2894                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2895                 s->pb= backup_s.pb;
2896
2897                 if(s->data_partitioning){
2898                     pb2_bits_count= put_bits_count(&s->pb2);
2899                     flush_put_bits(&s->pb2);
2900                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2901                     s->pb2= backup_s.pb2;
2902
2903                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2904                     flush_put_bits(&s->tex_pb);
2905                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2906                     s->tex_pb= backup_s.tex_pb;
2907                 }
2908                 s->last_bits= put_bits_count(&s->pb);
2909
2910                 if (CONFIG_H263_ENCODER &&
2911                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2912                     ff_h263_update_motion_val(s);
2913
2914                 if(next_block==0){ //FIXME 16 vs linesize16
2915                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2916                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2917                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2918                 }
2919
2920                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2921                     ff_MPV_decode_mb(s, s->block);
2922             } else {
2923                 int motion_x = 0, motion_y = 0;
2924                 s->mv_type=MV_TYPE_16X16;
2925                 // only one MB-Type possible
2926
2927                 switch(mb_type){
2928                 case CANDIDATE_MB_TYPE_INTRA:
2929                     s->mv_dir = 0;
2930                     s->mb_intra= 1;
2931                     motion_x= s->mv[0][0][0] = 0;
2932                     motion_y= s->mv[0][0][1] = 0;
2933                     break;
2934                 case CANDIDATE_MB_TYPE_INTER:
2935                     s->mv_dir = MV_DIR_FORWARD;
2936                     s->mb_intra= 0;
2937                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2938                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2939                     break;
2940                 case CANDIDATE_MB_TYPE_INTER_I:
2941                     s->mv_dir = MV_DIR_FORWARD;
2942                     s->mv_type = MV_TYPE_FIELD;
2943                     s->mb_intra= 0;
2944                     for(i=0; i<2; i++){
2945                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2946                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2947                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2948                     }
2949                     break;
2950                 case CANDIDATE_MB_TYPE_INTER4V:
2951                     s->mv_dir = MV_DIR_FORWARD;
2952                     s->mv_type = MV_TYPE_8X8;
2953                     s->mb_intra= 0;
2954                     for(i=0; i<4; i++){
2955                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2956                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2957                     }
2958                     break;
2959                 case CANDIDATE_MB_TYPE_DIRECT:
2960                     if (CONFIG_MPEG4_ENCODER) {
2961                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2962                         s->mb_intra= 0;
2963                         motion_x=s->b_direct_mv_table[xy][0];
2964                         motion_y=s->b_direct_mv_table[xy][1];
2965                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2966                     }
2967                     break;
2968                 case CANDIDATE_MB_TYPE_DIRECT0:
2969                     if (CONFIG_MPEG4_ENCODER) {
2970                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2971                         s->mb_intra= 0;
2972                         ff_mpeg4_set_direct_mv(s, 0, 0);
2973                     }
2974                     break;
2975                 case CANDIDATE_MB_TYPE_BIDIR:
2976                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2977                     s->mb_intra= 0;
2978                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2979                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2980                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2981                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2982                     break;
2983                 case CANDIDATE_MB_TYPE_BACKWARD:
2984                     s->mv_dir = MV_DIR_BACKWARD;
2985                     s->mb_intra= 0;
2986                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2987                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2988                     break;
2989                 case CANDIDATE_MB_TYPE_FORWARD:
2990                     s->mv_dir = MV_DIR_FORWARD;
2991                     s->mb_intra= 0;
2992                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2993                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2994                     break;
2995                 case CANDIDATE_MB_TYPE_FORWARD_I:
2996                     s->mv_dir = MV_DIR_FORWARD;
2997                     s->mv_type = MV_TYPE_FIELD;
2998                     s->mb_intra= 0;
2999                     for(i=0; i<2; i++){
3000                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3001                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3002                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3003                     }
3004                     break;
3005                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3006                     s->mv_dir = MV_DIR_BACKWARD;
3007                     s->mv_type = MV_TYPE_FIELD;
3008                     s->mb_intra= 0;
3009                     for(i=0; i<2; i++){
3010                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3011                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3012                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3013                     }
3014                     break;
3015                 case CANDIDATE_MB_TYPE_BIDIR_I:
3016                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3017                     s->mv_type = MV_TYPE_FIELD;
3018                     s->mb_intra= 0;
3019                     for(dir=0; dir<2; dir++){
3020                         for(i=0; i<2; i++){
3021                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3022                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3023                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3024                         }
3025                     }
3026                     break;
3027                 default:
3028                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3029                 }
3030
3031                 encode_mb(s, motion_x, motion_y);
3032
3033                 // RAL: Update last macroblock type
3034                 s->last_mv_dir = s->mv_dir;
3035
3036                 if (CONFIG_H263_ENCODER &&
3037                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3038                     ff_h263_update_motion_val(s);
3039
3040                 ff_MPV_decode_mb(s, s->block);
3041             }
3042
3043             /* clean the MV table in IPS frames for direct mode in B frames */
3044             if(s->mb_intra /* && I,P,S_TYPE */){
3045                 s->p_mv_table[xy][0]=0;
3046                 s->p_mv_table[xy][1]=0;
3047             }
3048
3049             if(s->flags&CODEC_FLAG_PSNR){
3050                 int w= 16;
3051                 int h= 16;
3052
3053                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3054                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3055
3056                 s->current_picture.f->error[0] += sse(
3057                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3058                     s->dest[0], w, h, s->linesize);
3059                 s->current_picture.f->error[1] += sse(
3060                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3061                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3062                 s->current_picture.f->error[2] += sse(
3063                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3064                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3065             }
3066             if(s->loop_filter){
3067                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3068                     ff_h263_loop_filter(s);
3069             }
3070             av_dlog(s->avctx, "MB %d %d bits\n",
3071                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3072         }
3073     }
3074
3075     //not beautiful here but we must write it before flushing so it has to be here
3076     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3077         ff_msmpeg4_encode_ext_header(s);
3078
3079     write_slice_end(s);
3080
3081     /* Send the last GOB if RTP */
3082     if (s->avctx->rtp_callback) {
3083         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3084         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3085         /* Call the RTP callback to send the last GOB */
3086         emms_c();
3087         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3088     }
3089
3090     return 0;
3091 }
3092
3093 #define MERGE(field) dst->field += src->field; src->field=0
3094 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3095     MERGE(me.scene_change_score);
3096     MERGE(me.mc_mb_var_sum_temp);
3097     MERGE(me.mb_var_sum_temp);
3098 }
3099
3100 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3101     int i;
3102
3103     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3104     MERGE(dct_count[1]);
3105     MERGE(mv_bits);
3106     MERGE(i_tex_bits);
3107     MERGE(p_tex_bits);
3108     MERGE(i_count);
3109     MERGE(f_count);
3110     MERGE(b_count);
3111     MERGE(skip_count);
3112     MERGE(misc_bits);
3113     MERGE(er.error_count);
3114     MERGE(padding_bug_score);
3115     MERGE(current_picture.f->error[0]);
3116     MERGE(current_picture.f->error[1]);
3117     MERGE(current_picture.f->error[2]);
3118
3119     if(dst->avctx->noise_reduction){
3120         for(i=0; i<64; i++){
3121             MERGE(dct_error_sum[0][i]);
3122             MERGE(dct_error_sum[1][i]);
3123         }
3124     }
3125
3126     assert(put_bits_count(&src->pb) % 8 ==0);
3127     assert(put_bits_count(&dst->pb) % 8 ==0);
3128     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3129     flush_put_bits(&dst->pb);
3130 }
3131
3132 static int estimate_qp(MpegEncContext *s, int dry_run){
3133     if (s->next_lambda){
3134         s->current_picture_ptr->f->quality =
3135         s->current_picture.f->quality = s->next_lambda;
3136         if(!dry_run) s->next_lambda= 0;
3137     } else if (!s->fixed_qscale) {
3138         s->current_picture_ptr->f->quality =
3139         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3140         if (s->current_picture.f->quality < 0)
3141             return -1;
3142     }
3143
3144     if(s->adaptive_quant){
3145         switch(s->codec_id){
3146         case AV_CODEC_ID_MPEG4:
3147             if (CONFIG_MPEG4_ENCODER)
3148                 ff_clean_mpeg4_qscales(s);
3149             break;
3150         case AV_CODEC_ID_H263:
3151         case AV_CODEC_ID_H263P:
3152         case AV_CODEC_ID_FLV1:
3153             if (CONFIG_H263_ENCODER)
3154                 ff_clean_h263_qscales(s);
3155             break;
3156         default:
3157             ff_init_qscale_tab(s);
3158         }
3159
3160         s->lambda= s->lambda_table[0];
3161         //FIXME broken
3162     }else
3163         s->lambda = s->current_picture.f->quality;
3164     update_qscale(s);
3165     return 0;
3166 }
3167
3168 /* must be called before writing the header */
3169 static void set_frame_distances(MpegEncContext * s){
3170     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3171     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3172
3173     if(s->pict_type==AV_PICTURE_TYPE_B){
3174         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3175         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3176     }else{
3177         s->pp_time= s->time - s->last_non_b_time;
3178         s->last_non_b_time= s->time;
3179         assert(s->picture_number==0 || s->pp_time > 0);
3180     }
3181 }
3182
3183 static int encode_picture(MpegEncContext *s, int picture_number)
3184 {
3185     int i, ret;
3186     int bits;
3187     int context_count = s->slice_context_count;
3188
3189     s->picture_number = picture_number;
3190
3191     /* Reset the average MB variance */
3192     s->me.mb_var_sum_temp    =
3193     s->me.mc_mb_var_sum_temp = 0;
3194
3195     /* we need to initialize some time vars before we can encode b-frames */
3196     // RAL: Condition added for MPEG1VIDEO
3197     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3198         set_frame_distances(s);
3199     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3200         ff_set_mpeg4_time(s);
3201
3202     s->me.scene_change_score=0;
3203
3204 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3205
3206     if(s->pict_type==AV_PICTURE_TYPE_I){
3207         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3208         else                        s->no_rounding=0;
3209     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3210         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3211             s->no_rounding ^= 1;
3212     }
3213
3214     if(s->flags & CODEC_FLAG_PASS2){
3215         if (estimate_qp(s,1) < 0)
3216             return -1;
3217         ff_get_2pass_fcode(s);
3218     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3219         if(s->pict_type==AV_PICTURE_TYPE_B)
3220             s->lambda= s->last_lambda_for[s->pict_type];
3221         else
3222             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3223         update_qscale(s);
3224     }
3225
3226     s->mb_intra=0; //for the rate distortion & bit compare functions
3227     for(i=1; i<context_count; i++){
3228         ret = ff_update_duplicate_context(s->thread_context[i], s);
3229         if (ret < 0)
3230             return ret;
3231     }
3232
3233     if(ff_init_me(s)<0)
3234         return -1;
3235
3236     /* Estimate motion for every MB */
3237     if(s->pict_type != AV_PICTURE_TYPE_I){
3238         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3239         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3240         if (s->pict_type != AV_PICTURE_TYPE_B) {
3241             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3242                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3243             }
3244         }
3245
3246         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3247     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3248         /* I-Frame */
3249         for(i=0; i<s->mb_stride*s->mb_height; i++)
3250             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3251
3252         if(!s->fixed_qscale){
3253             /* finding spatial complexity for I-frame rate control */
3254             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3255         }
3256     }
3257     for(i=1; i<context_count; i++){
3258         merge_context_after_me(s, s->thread_context[i]);
3259     }
3260     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3261     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3262     emms_c();
3263
3264     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3265         s->pict_type= AV_PICTURE_TYPE_I;
3266         for(i=0; i<s->mb_stride*s->mb_height; i++)
3267             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3268         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3269                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3270     }
3271
3272     if(!s->umvplus){
3273         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3274             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3275
3276             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3277                 int a,b;
3278                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3279                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3280                 s->f_code= FFMAX3(s->f_code, a, b);
3281             }
3282
3283             ff_fix_long_p_mvs(s);
3284             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3285             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3286                 int j;
3287                 for(i=0; i<2; i++){
3288                     for(j=0; j<2; j++)
3289                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3290                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3291                 }
3292             }
3293         }
3294
3295         if(s->pict_type==AV_PICTURE_TYPE_B){
3296             int a, b;
3297
3298             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3299             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3300             s->f_code = FFMAX(a, b);
3301
3302             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3303             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3304             s->b_code = FFMAX(a, b);
3305
3306             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3307             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3308             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3309             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3310             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3311                 int dir, j;
3312                 for(dir=0; dir<2; dir++){
3313                     for(i=0; i<2; i++){
3314                         for(j=0; j<2; j++){
3315                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3316                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3317                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3318                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3319                         }
3320                     }
3321                 }
3322             }
3323         }
3324     }
3325
3326     if (estimate_qp(s, 0) < 0)
3327         return -1;
3328
3329     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3330         s->qscale= 3; //reduce clipping problems
3331
3332     if (s->out_format == FMT_MJPEG) {
3333         /* for mjpeg, we do include qscale in the matrix */
3334         for(i=1;i<64;i++){
3335             int j= s->dsp.idct_permutation[i];
3336
3337             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3338         }
3339         s->y_dc_scale_table=
3340         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3341         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3342         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3343                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3344         s->qscale= 8;
3345     }
3346
3347     //FIXME var duplication
3348     s->current_picture_ptr->f->key_frame =
3349     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3350     s->current_picture_ptr->f->pict_type =
3351     s->current_picture.f->pict_type = s->pict_type;
3352
3353     if (s->current_picture.f->key_frame)
3354         s->picture_in_gop_number=0;
3355
3356     s->last_bits= put_bits_count(&s->pb);
3357     switch(s->out_format) {
3358     case FMT_MJPEG:
3359         if (CONFIG_MJPEG_ENCODER)
3360             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3361                                            s->intra_matrix);
3362         break;
3363     case FMT_H261:
3364         if (CONFIG_H261_ENCODER)
3365             ff_h261_encode_picture_header(s, picture_number);
3366         break;
3367     case FMT_H263:
3368         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3369             ff_wmv2_encode_picture_header(s, picture_number);
3370         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3371             ff_msmpeg4_encode_picture_header(s, picture_number);
3372         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3373             ff_mpeg4_encode_picture_header(s, picture_number);
3374         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3375             ff_rv10_encode_picture_header(s, picture_number);
3376         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3377             ff_rv20_encode_picture_header(s, picture_number);
3378         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3379             ff_flv_encode_picture_header(s, picture_number);
3380         else if (CONFIG_H263_ENCODER)
3381             ff_h263_encode_picture_header(s, picture_number);
3382         break;
3383     case FMT_MPEG1:
3384         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3385             ff_mpeg1_encode_picture_header(s, picture_number);
3386         break;
3387     default:
3388         assert(0);
3389     }
3390     bits= put_bits_count(&s->pb);
3391     s->header_bits= bits - s->last_bits;
3392
3393     for(i=1; i<context_count; i++){
3394         update_duplicate_context_after_me(s->thread_context[i], s);
3395     }
3396     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3397     for(i=1; i<context_count; i++){
3398         merge_context_after_encode(s, s->thread_context[i]);
3399     }
3400     emms_c();
3401     return 0;
3402 }
3403
3404 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3405     const int intra= s->mb_intra;
3406     int i;
3407
3408     s->dct_count[intra]++;
3409
3410     for(i=0; i<64; i++){
3411         int level= block[i];
3412
3413         if(level){
3414             if(level>0){
3415                 s->dct_error_sum[intra][i] += level;
3416                 level -= s->dct_offset[intra][i];
3417                 if(level<0) level=0;
3418             }else{
3419                 s->dct_error_sum[intra][i] -= level;
3420                 level += s->dct_offset[intra][i];
3421                 if(level>0) level=0;
3422             }
3423             block[i]= level;
3424         }
3425     }
3426 }
3427
3428 static int dct_quantize_trellis_c(MpegEncContext *s,
3429                                   int16_t *block, int n,
3430                                   int qscale, int *overflow){
3431     const int *qmat;
3432     const uint8_t *scantable= s->intra_scantable.scantable;
3433     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3434     int max=0;
3435     unsigned int threshold1, threshold2;
3436     int bias=0;
3437     int run_tab[65];
3438     int level_tab[65];
3439     int score_tab[65];
3440     int survivor[65];
3441     int survivor_count;
3442     int last_run=0;
3443     int last_level=0;
3444     int last_score= 0;
3445     int last_i;
3446     int coeff[2][64];
3447     int coeff_count[64];
3448     int qmul, qadd, start_i, last_non_zero, i, dc;
3449     const int esc_length= s->ac_esc_length;
3450     uint8_t * length;
3451     uint8_t * last_length;
3452     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3453
3454     s->dsp.fdct (block);
3455
3456     if(s->dct_error_sum)
3457         s->denoise_dct(s, block);
3458     qmul= qscale*16;
3459     qadd= ((qscale-1)|1)*8;
3460
3461     if (s->mb_intra) {
3462         int q;
3463         if (!s->h263_aic) {
3464             if (n < 4)
3465                 q = s->y_dc_scale;
3466             else
3467                 q = s->c_dc_scale;
3468             q = q << 3;
3469         } else{
3470             /* For AIC we skip quant/dequant of INTRADC */
3471             q = 1 << 3;
3472             qadd=0;
3473         }
3474
3475         /* note: block[0] is assumed to be positive */
3476         block[0] = (block[0] + (q >> 1)) / q;
3477         start_i = 1;
3478         last_non_zero = 0;
3479         qmat = s->q_intra_matrix[qscale];
3480         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3481             bias= 1<<(QMAT_SHIFT-1);
3482         length     = s->intra_ac_vlc_length;
3483         last_length= s->intra_ac_vlc_last_length;
3484     } else {
3485         start_i = 0;
3486         last_non_zero = -1;
3487         qmat = s->q_inter_matrix[qscale];
3488         length     = s->inter_ac_vlc_length;
3489         last_length= s->inter_ac_vlc_last_length;
3490     }
3491     last_i= start_i;
3492
3493     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3494     threshold2= (threshold1<<1);
3495
3496     for(i=63; i>=start_i; i--) {
3497         const int j = scantable[i];
3498         int level = block[j] * qmat[j];
3499
3500         if(((unsigned)(level+threshold1))>threshold2){
3501             last_non_zero = i;
3502             break;
3503         }
3504     }
3505
3506     for(i=start_i; i<=last_non_zero; i++) {
3507         const int j = scantable[i];
3508         int level = block[j] * qmat[j];
3509
3510 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3511 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3512         if(((unsigned)(level+threshold1))>threshold2){
3513             if(level>0){
3514                 level= (bias + level)>>QMAT_SHIFT;
3515                 coeff[0][i]= level;
3516                 coeff[1][i]= level-1;
3517 //                coeff[2][k]= level-2;
3518             }else{
3519                 level= (bias - level)>>QMAT_SHIFT;
3520                 coeff[0][i]= -level;
3521                 coeff[1][i]= -level+1;
3522 //                coeff[2][k]= -level+2;
3523             }
3524             coeff_count[i]= FFMIN(level, 2);
3525             assert(coeff_count[i]);
3526             max |=level;
3527         }else{
3528             coeff[0][i]= (level>>31)|1;
3529             coeff_count[i]= 1;
3530         }
3531     }
3532
3533     *overflow= s->max_qcoeff < max; //overflow might have happened
3534
3535     if(last_non_zero < start_i){
3536         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3537         return last_non_zero;
3538     }
3539
3540     score_tab[start_i]= 0;
3541     survivor[0]= start_i;
3542     survivor_count= 1;
3543
3544     for(i=start_i; i<=last_non_zero; i++){
3545         int level_index, j, zero_distortion;
3546         int dct_coeff= FFABS(block[ scantable[i] ]);
3547         int best_score=256*256*256*120;
3548
3549         if (s->dsp.fdct == ff_fdct_ifast)
3550             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3551         zero_distortion= dct_coeff*dct_coeff;
3552
3553         for(level_index=0; level_index < coeff_count[i]; level_index++){
3554             int distortion;
3555             int level= coeff[level_index][i];
3556             const int alevel= FFABS(level);
3557             int unquant_coeff;
3558
3559             assert(level);
3560
3561             if(s->out_format == FMT_H263){
3562                 unquant_coeff= alevel*qmul + qadd;
3563             }else{ //MPEG1
3564                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3565                 if(s->mb_intra){
3566                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3567                         unquant_coeff =   (unquant_coeff - 1) | 1;
3568                 }else{
3569                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3570                         unquant_coeff =   (unquant_coeff - 1) | 1;
3571                 }
3572                 unquant_coeff<<= 3;
3573             }
3574
3575             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3576             level+=64;
3577             if((level&(~127)) == 0){
3578                 for(j=survivor_count-1; j>=0; j--){
3579                     int run= i - survivor[j];
3580                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3581                     score += score_tab[i-run];
3582
3583                     if(score < best_score){
3584                         best_score= score;
3585                         run_tab[i+1]= run;
3586                         level_tab[i+1]= level-64;
3587                     }
3588                 }
3589
3590                 if(s->out_format == FMT_H263){
3591                     for(j=survivor_count-1; j>=0; j--){
3592                         int run= i - survivor[j];
3593                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3594                         score += score_tab[i-run];
3595                         if(score < last_score){
3596                             last_score= score;
3597                             last_run= run;
3598                             last_level= level-64;
3599                             last_i= i+1;
3600                         }
3601                     }
3602                 }
3603             }else{
3604                 distortion += esc_length*lambda;
3605                 for(j=survivor_count-1; j>=0; j--){
3606                     int run= i - survivor[j];
3607                     int score= distortion + score_tab[i-run];
3608
3609                     if(score < best_score){
3610                         best_score= score;
3611                         run_tab[i+1]= run;
3612                         level_tab[i+1]= level-64;
3613                     }
3614                 }
3615
3616                 if(s->out_format == FMT_H263){
3617                   for(j=survivor_count-1; j>=0; j--){
3618                         int run= i - survivor[j];
3619                         int score= distortion + score_tab[i-run];
3620                         if(score < last_score){
3621                             last_score= score;
3622                             last_run= run;
3623                             last_level= level-64;
3624                             last_i= i+1;
3625                         }
3626                     }
3627                 }
3628             }
3629         }
3630
3631         score_tab[i+1]= best_score;
3632
3633         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3634         if(last_non_zero <= 27){
3635             for(; survivor_count; survivor_count--){
3636                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3637                     break;
3638             }
3639         }else{
3640             for(; survivor_count; survivor_count--){
3641                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3642                     break;
3643             }
3644         }
3645
3646         survivor[ survivor_count++ ]= i+1;
3647     }
3648
3649     if(s->out_format != FMT_H263){
3650         last_score= 256*256*256*120;
3651         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3652             int score= score_tab[i];
3653             if(i) score += lambda*2; //FIXME exacter?
3654
3655             if(score < last_score){
3656                 last_score= score;
3657                 last_i= i;
3658                 last_level= level_tab[i];
3659                 last_run= run_tab[i];
3660             }
3661         }
3662     }
3663
3664     s->coded_score[n] = last_score;
3665
3666     dc= FFABS(block[0]);
3667     last_non_zero= last_i - 1;
3668     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3669
3670     if(last_non_zero < start_i)
3671         return last_non_zero;
3672
3673     if(last_non_zero == 0 && start_i == 0){
3674         int best_level= 0;
3675         int best_score= dc * dc;
3676
3677         for(i=0; i<coeff_count[0]; i++){
3678             int level= coeff[i][0];
3679             int alevel= FFABS(level);
3680             int unquant_coeff, score, distortion;
3681
3682             if(s->out_format == FMT_H263){
3683                     unquant_coeff= (alevel*qmul + qadd)>>3;
3684             }else{ //MPEG1
3685                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3686                     unquant_coeff =   (unquant_coeff - 1) | 1;
3687             }
3688             unquant_coeff = (unquant_coeff + 4) >> 3;
3689             unquant_coeff<<= 3 + 3;
3690
3691             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3692             level+=64;
3693             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3694             else                    score= distortion + esc_length*lambda;
3695
3696             if(score < best_score){
3697                 best_score= score;
3698                 best_level= level - 64;
3699             }
3700         }
3701         block[0]= best_level;
3702         s->coded_score[n] = best_score - dc*dc;
3703         if(best_level == 0) return -1;
3704         else                return last_non_zero;
3705     }
3706
3707     i= last_i;
3708     assert(last_level);
3709
3710     block[ perm_scantable[last_non_zero] ]= last_level;
3711     i -= last_run + 1;
3712
3713     for(; i>start_i; i -= run_tab[i] + 1){
3714         block[ perm_scantable[i-1] ]= level_tab[i];
3715     }
3716
3717     return last_non_zero;
3718 }
3719
3720 //#define REFINE_STATS 1
3721 static int16_t basis[64][64];
3722
3723 static void build_basis(uint8_t *perm){
3724     int i, j, x, y;
3725     emms_c();
3726     for(i=0; i<8; i++){
3727         for(j=0; j<8; j++){
3728             for(y=0; y<8; y++){
3729                 for(x=0; x<8; x++){
3730                     double s= 0.25*(1<<BASIS_SHIFT);
3731                     int index= 8*i + j;
3732                     int perm_index= perm[index];
3733                     if(i==0) s*= sqrt(0.5);
3734                     if(j==0) s*= sqrt(0.5);
3735                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3736                 }
3737             }
3738         }
3739     }
3740 }
3741
3742 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3743                         int16_t *block, int16_t *weight, int16_t *orig,
3744                         int n, int qscale){
3745     int16_t rem[64];
3746     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3747     const uint8_t *scantable= s->intra_scantable.scantable;
3748     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3749 //    unsigned int threshold1, threshold2;
3750 //    int bias=0;
3751     int run_tab[65];
3752     int prev_run=0;
3753     int prev_level=0;
3754     int qmul, qadd, start_i, last_non_zero, i, dc;
3755     uint8_t * length;
3756     uint8_t * last_length;
3757     int lambda;
3758     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3759 #ifdef REFINE_STATS
3760 static int count=0;
3761 static int after_last=0;
3762 static int to_zero=0;
3763 static int from_zero=0;
3764 static int raise=0;
3765 static int lower=0;
3766 static int messed_sign=0;
3767 #endif
3768
3769     if(basis[0][0] == 0)
3770         build_basis(s->dsp.idct_permutation);
3771
3772     qmul= qscale*2;
3773     qadd= (qscale-1)|1;
3774     if (s->mb_intra) {
3775         if (!s->h263_aic) {
3776             if (n < 4)
3777                 q = s->y_dc_scale;
3778             else
3779                 q = s->c_dc_scale;
3780         } else{
3781             /* For AIC we skip quant/dequant of INTRADC */
3782             q = 1;
3783             qadd=0;
3784         }
3785         q <<= RECON_SHIFT-3;
3786         /* note: block[0] is assumed to be positive */
3787         dc= block[0]*q;
3788 //        block[0] = (block[0] + (q >> 1)) / q;
3789         start_i = 1;
3790 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3791 //            bias= 1<<(QMAT_SHIFT-1);
3792         length     = s->intra_ac_vlc_length;
3793         last_length= s->intra_ac_vlc_last_length;
3794     } else {
3795         dc= 0;
3796         start_i = 0;
3797         length     = s->inter_ac_vlc_length;
3798         last_length= s->inter_ac_vlc_last_length;
3799     }
3800     last_non_zero = s->block_last_index[n];
3801
3802 #ifdef REFINE_STATS
3803 {START_TIMER
3804 #endif
3805     dc += (1<<(RECON_SHIFT-1));
3806     for(i=0; i<64; i++){
3807         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3808     }
3809 #ifdef REFINE_STATS
3810 STOP_TIMER("memset rem[]")}
3811 #endif
3812     sum=0;
3813     for(i=0; i<64; i++){
3814         int one= 36;
3815         int qns=4;
3816         int w;
3817
3818         w= FFABS(weight[i]) + qns*one;
3819         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3820
3821         weight[i] = w;
3822 //        w=weight[i] = (63*qns + (w/2)) / w;
3823
3824         assert(w>0);
3825         assert(w<(1<<6));
3826         sum += w*w;
3827     }
3828     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3829 #ifdef REFINE_STATS
3830 {START_TIMER
3831 #endif
3832     run=0;
3833     rle_index=0;
3834     for(i=start_i; i<=last_non_zero; i++){
3835         int j= perm_scantable[i];
3836         const int level= block[j];
3837         int coeff;
3838
3839         if(level){
3840             if(level<0) coeff= qmul*level - qadd;
3841             else        coeff= qmul*level + qadd;
3842             run_tab[rle_index++]=run;
3843             run=0;
3844
3845             s->dsp.add_8x8basis(rem, basis[j], coeff);
3846         }else{
3847             run++;
3848         }
3849     }
3850 #ifdef REFINE_STATS
3851 if(last_non_zero>0){
3852 STOP_TIMER("init rem[]")
3853 }
3854 }
3855
3856 {START_TIMER
3857 #endif
3858     for(;;){
3859         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3860         int best_coeff=0;
3861         int best_change=0;
3862         int run2, best_unquant_change=0, analyze_gradient;
3863 #ifdef REFINE_STATS
3864 {START_TIMER
3865 #endif
3866         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3867
3868         if(analyze_gradient){
3869 #ifdef REFINE_STATS
3870 {START_TIMER
3871 #endif
3872             for(i=0; i<64; i++){
3873                 int w= weight[i];
3874
3875                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3876             }
3877 #ifdef REFINE_STATS
3878 STOP_TIMER("rem*w*w")}
3879 {START_TIMER
3880 #endif
3881             s->dsp.fdct(d1);
3882 #ifdef REFINE_STATS
3883 STOP_TIMER("dct")}
3884 #endif
3885         }
3886
3887         if(start_i){
3888             const int level= block[0];
3889             int change, old_coeff;
3890
3891             assert(s->mb_intra);
3892
3893             old_coeff= q*level;
3894
3895             for(change=-1; change<=1; change+=2){
3896                 int new_level= level + change;
3897                 int score, new_coeff;
3898
3899                 new_coeff= q*new_level;
3900                 if(new_coeff >= 2048 || new_coeff < 0)
3901                     continue;
3902
3903                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3904                 if(score<best_score){
3905                     best_score= score;
3906                     best_coeff= 0;
3907                     best_change= change;
3908                     best_unquant_change= new_coeff - old_coeff;
3909                 }
3910             }
3911         }
3912
3913         run=0;
3914         rle_index=0;
3915         run2= run_tab[rle_index++];
3916         prev_level=0;
3917         prev_run=0;
3918
3919         for(i=start_i; i<64; i++){
3920             int j= perm_scantable[i];
3921             const int level= block[j];
3922             int change, old_coeff;
3923
3924             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3925                 break;
3926
3927             if(level){
3928                 if(level<0) old_coeff= qmul*level - qadd;
3929                 else        old_coeff= qmul*level + qadd;
3930                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3931             }else{
3932                 old_coeff=0;
3933                 run2--;
3934                 assert(run2>=0 || i >= last_non_zero );
3935             }
3936
3937             for(change=-1; change<=1; change+=2){
3938                 int new_level= level + change;
3939                 int score, new_coeff, unquant_change;
3940
3941                 score=0;
3942                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3943                    continue;
3944
3945                 if(new_level){
3946                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3947                     else            new_coeff= qmul*new_level + qadd;
3948                     if(new_coeff >= 2048 || new_coeff <= -2048)
3949                         continue;
3950                     //FIXME check for overflow
3951
3952                     if(level){
3953                         if(level < 63 && level > -63){
3954                             if(i < last_non_zero)
3955                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3956                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3957                             else
3958                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3959                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3960                         }
3961                     }else{
3962                         assert(FFABS(new_level)==1);
3963
3964                         if(analyze_gradient){
3965                             int g= d1[ scantable[i] ];
3966                             if(g && (g^new_level) >= 0)
3967                                 continue;
3968                         }
3969
3970                         if(i < last_non_zero){
3971                             int next_i= i + run2 + 1;
3972                             int next_level= block[ perm_scantable[next_i] ] + 64;
3973
3974                             if(next_level&(~127))
3975                                 next_level= 0;
3976
3977                             if(next_i < last_non_zero)
3978                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3979                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3980                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3981                             else
3982                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3983                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3984                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3985                         }else{
3986                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3987                             if(prev_level){
3988                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3989                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3990                             }
3991                         }
3992                     }
3993                 }else{
3994                     new_coeff=0;
3995                     assert(FFABS(level)==1);
3996
3997                     if(i < last_non_zero){
3998                         int next_i= i + run2 + 1;
3999                         int next_level= block[ perm_scantable[next_i] ] + 64;
4000
4001                         if(next_level&(~127))
4002                             next_level= 0;
4003
4004                         if(next_i < last_non_zero)
4005                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4006                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4007                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4008                         else
4009                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4010                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4011                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4012                     }else{
4013                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4014                         if(prev_level){
4015                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4016                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4017                         }
4018                     }
4019                 }
4020
4021                 score *= lambda;
4022
4023                 unquant_change= new_coeff - old_coeff;
4024                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4025
4026                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4027                 if(score<best_score){
4028                     best_score= score;
4029                     best_coeff= i;
4030                     best_change= change;
4031                     best_unquant_change= unquant_change;
4032                 }
4033             }
4034             if(level){
4035                 prev_level= level + 64;
4036                 if(prev_level&(~127))
4037                     prev_level= 0;
4038                 prev_run= run;
4039                 run=0;
4040             }else{
4041                 run++;
4042             }
4043         }
4044 #ifdef REFINE_STATS
4045 STOP_TIMER("iterative step")}
4046 #endif
4047
4048         if(best_change){
4049             int j= perm_scantable[ best_coeff ];
4050
4051             block[j] += best_change;
4052
4053             if(best_coeff > last_non_zero){
4054                 last_non_zero= best_coeff;
4055                 assert(block[j]);
4056 #ifdef REFINE_STATS
4057 after_last++;
4058 #endif
4059             }else{
4060 #ifdef REFINE_STATS
4061 if(block[j]){
4062     if(block[j] - best_change){
4063         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4064             raise++;
4065         }else{
4066             lower++;
4067         }
4068     }else{
4069         from_zero++;
4070     }
4071 }else{
4072     to_zero++;
4073 }
4074 #endif
4075                 for(; last_non_zero>=start_i; last_non_zero--){
4076                     if(block[perm_scantable[last_non_zero]])
4077                         break;
4078                 }
4079             }
4080 #ifdef REFINE_STATS
4081 count++;
4082 if(256*256*256*64 % count == 0){
4083     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4084 }
4085 #endif
4086             run=0;
4087             rle_index=0;
4088             for(i=start_i; i<=last_non_zero; i++){
4089                 int j= perm_scantable[i];
4090                 const int level= block[j];
4091
4092                  if(level){
4093                      run_tab[rle_index++]=run;
4094                      run=0;
4095                  }else{
4096                      run++;
4097                  }
4098             }
4099
4100             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4101         }else{
4102             break;
4103         }
4104     }
4105 #ifdef REFINE_STATS
4106 if(last_non_zero>0){
4107 STOP_TIMER("iterative search")
4108 }
4109 }
4110 #endif
4111
4112     return last_non_zero;
4113 }
4114
4115 int ff_dct_quantize_c(MpegEncContext *s,
4116                         int16_t *block, int n,
4117                         int qscale, int *overflow)
4118 {
4119     int i, j, level, last_non_zero, q, start_i;
4120     const int *qmat;
4121     const uint8_t *scantable= s->intra_scantable.scantable;
4122     int bias;
4123     int max=0;
4124     unsigned int threshold1, threshold2;
4125
4126     s->dsp.fdct (block);
4127
4128     if(s->dct_error_sum)
4129         s->denoise_dct(s, block);
4130
4131     if (s->mb_intra) {
4132         if (!s->h263_aic) {
4133             if (n < 4)
4134                 q = s->y_dc_scale;
4135             else
4136                 q = s->c_dc_scale;
4137             q = q << 3;
4138         } else
4139             /* For AIC we skip quant/dequant of INTRADC */
4140             q = 1 << 3;
4141
4142         /* note: block[0] is assumed to be positive */
4143         block[0] = (block[0] + (q >> 1)) / q;
4144         start_i = 1;
4145         last_non_zero = 0;
4146         qmat = s->q_intra_matrix[qscale];
4147         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4148     } else {
4149         start_i = 0;
4150         last_non_zero = -1;
4151         qmat = s->q_inter_matrix[qscale];
4152         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4153     }
4154     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4155     threshold2= (threshold1<<1);
4156     for(i=63;i>=start_i;i--) {
4157         j = scantable[i];
4158         level = block[j] * qmat[j];
4159
4160         if(((unsigned)(level+threshold1))>threshold2){
4161             last_non_zero = i;
4162             break;
4163         }else{
4164             block[j]=0;
4165         }
4166     }
4167     for(i=start_i; i<=last_non_zero; i++) {
4168         j = scantable[i];
4169         level = block[j] * qmat[j];
4170
4171 //        if(   bias+level >= (1<<QMAT_SHIFT)
4172 //           || bias-level >= (1<<QMAT_SHIFT)){
4173         if(((unsigned)(level+threshold1))>threshold2){
4174             if(level>0){
4175                 level= (bias + level)>>QMAT_SHIFT;
4176                 block[j]= level;
4177             }else{
4178                 level= (bias - level)>>QMAT_SHIFT;
4179                 block[j]= -level;
4180             }
4181             max |=level;
4182         }else{
4183             block[j]=0;
4184         }
4185     }
4186     *overflow= s->max_qcoeff < max; //overflow might have happened
4187
4188     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4189     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4190         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4191
4192     return last_non_zero;
4193 }
4194
4195 #define OFFSET(x) offsetof(MpegEncContext, x)
4196 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4197 static const AVOption h263_options[] = {
4198     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4199     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4200     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4201     FF_MPV_COMMON_OPTS
4202     { NULL },
4203 };
4204
4205 static const AVClass h263_class = {
4206     .class_name = "H.263 encoder",
4207     .item_name  = av_default_item_name,
4208     .option     = h263_options,
4209     .version    = LIBAVUTIL_VERSION_INT,
4210 };
4211
4212 AVCodec ff_h263_encoder = {
4213     .name           = "h263",
4214     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4215     .type           = AVMEDIA_TYPE_VIDEO,
4216     .id             = AV_CODEC_ID_H263,
4217     .priv_data_size = sizeof(MpegEncContext),
4218     .init           = ff_MPV_encode_init,
4219     .encode2        = ff_MPV_encode_picture,
4220     .close          = ff_MPV_encode_end,
4221     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4222     .priv_class     = &h263_class,
4223 };
4224
4225 static const AVOption h263p_options[] = {
4226     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4227     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4228     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4229     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4230     FF_MPV_COMMON_OPTS
4231     { NULL },
4232 };
4233 static const AVClass h263p_class = {
4234     .class_name = "H.263p encoder",
4235     .item_name  = av_default_item_name,
4236     .option     = h263p_options,
4237     .version    = LIBAVUTIL_VERSION_INT,
4238 };
4239
4240 AVCodec ff_h263p_encoder = {
4241     .name           = "h263p",
4242     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4243     .type           = AVMEDIA_TYPE_VIDEO,
4244     .id             = AV_CODEC_ID_H263P,
4245     .priv_data_size = sizeof(MpegEncContext),
4246     .init           = ff_MPV_encode_init,
4247     .encode2        = ff_MPV_encode_picture,
4248     .close          = ff_MPV_encode_end,
4249     .capabilities   = CODEC_CAP_SLICE_THREADS,
4250     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4251     .priv_class     = &h263p_class,
4252 };
4253
4254 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4255
4256 AVCodec ff_msmpeg4v2_encoder = {
4257     .name           = "msmpeg4v2",
4258     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4259     .type           = AVMEDIA_TYPE_VIDEO,
4260     .id             = AV_CODEC_ID_MSMPEG4V2,
4261     .priv_data_size = sizeof(MpegEncContext),
4262     .init           = ff_MPV_encode_init,
4263     .encode2        = ff_MPV_encode_picture,
4264     .close          = ff_MPV_encode_end,
4265     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4266     .priv_class     = &msmpeg4v2_class,
4267 };
4268
4269 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4270
4271 AVCodec ff_msmpeg4v3_encoder = {
4272     .name           = "msmpeg4",
4273     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4274     .type           = AVMEDIA_TYPE_VIDEO,
4275     .id             = AV_CODEC_ID_MSMPEG4V3,
4276     .priv_data_size = sizeof(MpegEncContext),
4277     .init           = ff_MPV_encode_init,
4278     .encode2        = ff_MPV_encode_picture,
4279     .close          = ff_MPV_encode_end,
4280     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4281     .priv_class     = &msmpeg4v3_class,
4282 };
4283
4284 FF_MPV_GENERIC_CLASS(wmv1)
4285
4286 AVCodec ff_wmv1_encoder = {
4287     .name           = "wmv1",
4288     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4289     .type           = AVMEDIA_TYPE_VIDEO,
4290     .id             = AV_CODEC_ID_WMV1,
4291     .priv_data_size = sizeof(MpegEncContext),
4292     .init           = ff_MPV_encode_init,
4293     .encode2        = ff_MPV_encode_picture,
4294     .close          = ff_MPV_encode_end,
4295     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4296     .priv_class     = &wmv1_class,
4297 };