]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
avutil: remove timer.h include from internal.h
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "dsputil.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mathops.h"
46 #include "mjpegenc.h"
47 #include "msmpeg4.h"
48 #include "faandct.h"
49 #include "thread.h"
50 #include "aandcttab.h"
51 #include "flv.h"
52 #include "mpeg4video.h"
53 #include "internal.h"
54 #include "bytestream.h"
55 #include <limits.h>
56
57 static int encode_picture(MpegEncContext *s, int picture_number);
58 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
59 static int sse_mb(MpegEncContext *s);
60 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
61 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
62
63 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
64 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
65
66 const AVOption ff_mpv_generic_options[] = {
67     FF_MPV_COMMON_OPTS
68     { NULL },
69 };
70
71 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
72                        uint16_t (*qmat16)[2][64],
73                        const uint16_t *quant_matrix,
74                        int bias, int qmin, int qmax, int intra)
75 {
76     int qscale;
77     int shift = 0;
78
79     for (qscale = qmin; qscale <= qmax; qscale++) {
80         int i;
81         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
82             dsp->fdct == ff_jpeg_fdct_islow_10 ||
83             dsp->fdct == ff_faandct) {
84             for (i = 0; i < 64; i++) {
85                 const int j = dsp->idct_permutation[i];
86                 /* 16 <= qscale * quant_matrix[i] <= 7905
87                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
88                  *             19952 <=              x  <= 249205026
89                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
90                  *           3444240 >= (1 << 36) / (x) >= 275 */
91
92                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
93                                         (qscale * quant_matrix[j]));
94             }
95         } else if (dsp->fdct == ff_fdct_ifast) {
96             for (i = 0; i < 64; i++) {
97                 const int j = dsp->idct_permutation[i];
98                 /* 16 <= qscale * quant_matrix[i] <= 7905
99                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
100                  *             19952 <=              x  <= 249205026
101                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
102                  *           3444240 >= (1 << 36) / (x) >= 275 */
103
104                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
105                                         (ff_aanscales[i] * qscale *
106                                          quant_matrix[j]));
107             }
108         } else {
109             for (i = 0; i < 64; i++) {
110                 const int j = dsp->idct_permutation[i];
111                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
112                  * Assume x = qscale * quant_matrix[i]
113                  * So             16 <=              x  <= 7905
114                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
115                  * so          32768 >= (1 << 19) / (x) >= 67 */
116                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
117                                         (qscale * quant_matrix[j]));
118                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
119                 //                    (qscale * quant_matrix[i]);
120                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
121                                        (qscale * quant_matrix[j]);
122
123                 if (qmat16[qscale][0][i] == 0 ||
124                     qmat16[qscale][0][i] == 128 * 256)
125                     qmat16[qscale][0][i] = 128 * 256 - 1;
126                 qmat16[qscale][1][i] =
127                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
128                                 qmat16[qscale][0][i]);
129             }
130         }
131
132         for (i = intra; i < 64; i++) {
133             int64_t max = 8191;
134             if (dsp->fdct == ff_fdct_ifast) {
135                 max = (8191LL * ff_aanscales[i]) >> 14;
136             }
137             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
138                 shift++;
139             }
140         }
141     }
142     if (shift) {
143         av_log(NULL, AV_LOG_INFO,
144                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
145                QMAT_SHIFT - shift);
146     }
147 }
148
149 static inline void update_qscale(MpegEncContext *s)
150 {
151     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
152                 (FF_LAMBDA_SHIFT + 7);
153     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
154
155     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
156                  FF_LAMBDA_SHIFT;
157 }
158
159 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
160 {
161     int i;
162
163     if (matrix) {
164         put_bits(pb, 1, 1);
165         for (i = 0; i < 64; i++) {
166             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
167         }
168     } else
169         put_bits(pb, 1, 0);
170 }
171
172 /**
173  * init s->current_picture.qscale_table from s->lambda_table
174  */
175 void ff_init_qscale_tab(MpegEncContext *s)
176 {
177     int8_t * const qscale_table = s->current_picture.qscale_table;
178     int i;
179
180     for (i = 0; i < s->mb_num; i++) {
181         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
182         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
183         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
184                                                   s->avctx->qmax);
185     }
186 }
187
188 static void update_duplicate_context_after_me(MpegEncContext *dst,
189                                               MpegEncContext *src)
190 {
191 #define COPY(a) dst->a= src->a
192     COPY(pict_type);
193     COPY(current_picture);
194     COPY(f_code);
195     COPY(b_code);
196     COPY(qscale);
197     COPY(lambda);
198     COPY(lambda2);
199     COPY(picture_in_gop_number);
200     COPY(gop_picture_number);
201     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
202     COPY(progressive_frame);    // FIXME don't set in encode_header
203     COPY(partitioned_frame);    // FIXME don't set in encode_header
204 #undef COPY
205 }
206
207 /**
208  * Set the given MpegEncContext to defaults for encoding.
209  * the changed fields will not depend upon the prior state of the MpegEncContext.
210  */
211 static void MPV_encode_defaults(MpegEncContext *s)
212 {
213     int i;
214     ff_MPV_common_defaults(s);
215
216     for (i = -16; i < 16; i++) {
217         default_fcode_tab[i + MAX_MV] = 1;
218     }
219     s->me.mv_penalty = default_mv_penalty;
220     s->fcode_tab     = default_fcode_tab;
221
222     s->input_picture_number  = 0;
223     s->picture_in_gop_number = 0;
224 }
225
226 /* init video encoder */
227 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
228 {
229     MpegEncContext *s = avctx->priv_data;
230     int i, ret;
231
232     MPV_encode_defaults(s);
233
234     switch (avctx->codec_id) {
235     case AV_CODEC_ID_MPEG2VIDEO:
236         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
237             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
238             av_log(avctx, AV_LOG_ERROR,
239                    "only YUV420 and YUV422 are supported\n");
240             return -1;
241         }
242         break;
243     case AV_CODEC_ID_MJPEG:
244         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
245             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
246             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
247               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
248              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
249             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
250             return -1;
251         }
252         break;
253     default:
254         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
255             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
256             return -1;
257         }
258     }
259
260     switch (avctx->pix_fmt) {
261     case AV_PIX_FMT_YUVJ422P:
262     case AV_PIX_FMT_YUV422P:
263         s->chroma_format = CHROMA_422;
264         break;
265     case AV_PIX_FMT_YUVJ420P:
266     case AV_PIX_FMT_YUV420P:
267     default:
268         s->chroma_format = CHROMA_420;
269         break;
270     }
271
272     s->bit_rate = avctx->bit_rate;
273     s->width    = avctx->width;
274     s->height   = avctx->height;
275     if (avctx->gop_size > 600 &&
276         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
277         av_log(avctx, AV_LOG_ERROR,
278                "Warning keyframe interval too large! reducing it ...\n");
279         avctx->gop_size = 600;
280     }
281     s->gop_size     = avctx->gop_size;
282     s->avctx        = avctx;
283     s->flags        = avctx->flags;
284     s->flags2       = avctx->flags2;
285     if (avctx->max_b_frames > MAX_B_FRAMES) {
286         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
287                "is %d.\n", MAX_B_FRAMES);
288     }
289     s->max_b_frames = avctx->max_b_frames;
290     s->codec_id     = avctx->codec->id;
291     s->strict_std_compliance = avctx->strict_std_compliance;
292     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
293     s->mpeg_quant         = avctx->mpeg_quant;
294     s->rtp_mode           = !!avctx->rtp_payload_size;
295     s->intra_dc_precision = avctx->intra_dc_precision;
296     s->user_specified_pts = AV_NOPTS_VALUE;
297
298     if (s->gop_size <= 1) {
299         s->intra_only = 1;
300         s->gop_size   = 12;
301     } else {
302         s->intra_only = 0;
303     }
304
305     s->me_method = avctx->me_method;
306
307     /* Fixed QSCALE */
308     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
309
310     s->adaptive_quant = (s->avctx->lumi_masking ||
311                          s->avctx->dark_masking ||
312                          s->avctx->temporal_cplx_masking ||
313                          s->avctx->spatial_cplx_masking  ||
314                          s->avctx->p_masking      ||
315                          s->avctx->border_masking ||
316                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
317                         !s->fixed_qscale;
318
319     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
320
321     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
322         av_log(avctx, AV_LOG_ERROR,
323                "a vbv buffer size is needed, "
324                "for encoding with a maximum bitrate\n");
325         return -1;
326     }
327
328     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
329         av_log(avctx, AV_LOG_INFO,
330                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
331     }
332
333     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
334         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
335         return -1;
336     }
337
338     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
339         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
340         return -1;
341     }
342
343     if (avctx->rc_max_rate &&
344         avctx->rc_max_rate == avctx->bit_rate &&
345         avctx->rc_max_rate != avctx->rc_min_rate) {
346         av_log(avctx, AV_LOG_INFO,
347                "impossible bitrate constraints, this will fail\n");
348     }
349
350     if (avctx->rc_buffer_size &&
351         avctx->bit_rate * (int64_t)avctx->time_base.num >
352             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
353         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
354         return -1;
355     }
356
357     if (!s->fixed_qscale &&
358         avctx->bit_rate * av_q2d(avctx->time_base) >
359             avctx->bit_rate_tolerance) {
360         av_log(avctx, AV_LOG_ERROR,
361                "bitrate tolerance too small for bitrate\n");
362         return -1;
363     }
364
365     if (s->avctx->rc_max_rate &&
366         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
367         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
368          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
369         90000LL * (avctx->rc_buffer_size - 1) >
370             s->avctx->rc_max_rate * 0xFFFFLL) {
371         av_log(avctx, AV_LOG_INFO,
372                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
373                "specified vbv buffer is too large for the given bitrate!\n");
374     }
375
376     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
377         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
378         s->codec_id != AV_CODEC_ID_FLV1) {
379         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
380         return -1;
381     }
382
383     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
384         av_log(avctx, AV_LOG_ERROR,
385                "OBMC is only supported with simple mb decision\n");
386         return -1;
387     }
388
389     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
390         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
391         return -1;
392     }
393
394     if (s->max_b_frames                    &&
395         s->codec_id != AV_CODEC_ID_MPEG4      &&
396         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
397         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
398         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
399         return -1;
400     }
401
402     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
403          s->codec_id == AV_CODEC_ID_H263  ||
404          s->codec_id == AV_CODEC_ID_H263P) &&
405         (avctx->sample_aspect_ratio.num > 255 ||
406          avctx->sample_aspect_ratio.den > 255)) {
407         av_log(avctx, AV_LOG_ERROR,
408                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
409                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
410         return -1;
411     }
412
413     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
414         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
415         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
416         return -1;
417     }
418
419     // FIXME mpeg2 uses that too
420     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
421         av_log(avctx, AV_LOG_ERROR,
422                "mpeg2 style quantization not supported by codec\n");
423         return -1;
424     }
425
426     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
427         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
428         return -1;
429     }
430
431     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
432         s->avctx->mb_decision != FF_MB_DECISION_RD) {
433         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
434         return -1;
435     }
436
437     if (s->avctx->scenechange_threshold < 1000000000 &&
438         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
439         av_log(avctx, AV_LOG_ERROR,
440                "closed gop with scene change detection are not supported yet, "
441                "set threshold to 1000000000\n");
442         return -1;
443     }
444
445     if (s->flags & CODEC_FLAG_LOW_DELAY) {
446         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
447             av_log(avctx, AV_LOG_ERROR,
448                   "low delay forcing is only available for mpeg2\n");
449             return -1;
450         }
451         if (s->max_b_frames != 0) {
452             av_log(avctx, AV_LOG_ERROR,
453                    "b frames cannot be used with low delay\n");
454             return -1;
455         }
456     }
457
458     if (s->q_scale_type == 1) {
459         if (avctx->qmax > 12) {
460             av_log(avctx, AV_LOG_ERROR,
461                    "non linear quant only supports qmax <= 12 currently\n");
462             return -1;
463         }
464     }
465
466     if (s->avctx->thread_count > 1         &&
467         s->codec_id != AV_CODEC_ID_MPEG4      &&
468         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
469         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
470         (s->codec_id != AV_CODEC_ID_H263P)) {
471         av_log(avctx, AV_LOG_ERROR,
472                "multi threaded encoding not supported by codec\n");
473         return -1;
474     }
475
476     if (s->avctx->thread_count < 1) {
477         av_log(avctx, AV_LOG_ERROR,
478                "automatic thread number detection not supported by codec,"
479                "patch welcome\n");
480         return -1;
481     }
482
483     if (s->avctx->thread_count > 1)
484         s->rtp_mode = 1;
485
486     if (!avctx->time_base.den || !avctx->time_base.num) {
487         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
488         return -1;
489     }
490
491     i = (INT_MAX / 2 + 128) >> 8;
492     if (avctx->mb_threshold >= i) {
493         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
494                i - 1);
495         return -1;
496     }
497
498     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
499         av_log(avctx, AV_LOG_INFO,
500                "notice: b_frame_strategy only affects the first pass\n");
501         avctx->b_frame_strategy = 0;
502     }
503
504     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
505     if (i > 1) {
506         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
507         avctx->time_base.den /= i;
508         avctx->time_base.num /= i;
509         //return -1;
510     }
511
512     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
513         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
514         // (a + x * 3 / 8) / x
515         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
516         s->inter_quant_bias = 0;
517     } else {
518         s->intra_quant_bias = 0;
519         // (a - x / 4) / x
520         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
521     }
522
523     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
524         s->intra_quant_bias = avctx->intra_quant_bias;
525     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
526         s->inter_quant_bias = avctx->inter_quant_bias;
527
528     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
529         s->avctx->time_base.den > (1 << 16) - 1) {
530         av_log(avctx, AV_LOG_ERROR,
531                "timebase %d/%d not supported by MPEG 4 standard, "
532                "the maximum admitted value for the timebase denominator "
533                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
534                (1 << 16) - 1);
535         return -1;
536     }
537     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
538
539     switch (avctx->codec->id) {
540     case AV_CODEC_ID_MPEG1VIDEO:
541         s->out_format = FMT_MPEG1;
542         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
543         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
544         break;
545     case AV_CODEC_ID_MPEG2VIDEO:
546         s->out_format = FMT_MPEG1;
547         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
548         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
549         s->rtp_mode   = 1;
550         break;
551     case AV_CODEC_ID_MJPEG:
552         s->out_format = FMT_MJPEG;
553         s->intra_only = 1; /* force intra only for jpeg */
554         if (!CONFIG_MJPEG_ENCODER ||
555             ff_mjpeg_encode_init(s) < 0)
556             return -1;
557         avctx->delay = 0;
558         s->low_delay = 1;
559         break;
560     case AV_CODEC_ID_H261:
561         if (!CONFIG_H261_ENCODER)
562             return -1;
563         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
564             av_log(avctx, AV_LOG_ERROR,
565                    "The specified picture size of %dx%d is not valid for the "
566                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
567                     s->width, s->height);
568             return -1;
569         }
570         s->out_format = FMT_H261;
571         avctx->delay  = 0;
572         s->low_delay  = 1;
573         break;
574     case AV_CODEC_ID_H263:
575         if (!CONFIG_H263_ENCODER)
576         return -1;
577         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
578                              s->width, s->height) == 8) {
579             av_log(avctx, AV_LOG_INFO,
580                    "The specified picture size of %dx%d is not valid for "
581                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
582                    "352x288, 704x576, and 1408x1152."
583                    "Try H.263+.\n", s->width, s->height);
584             return -1;
585         }
586         s->out_format = FMT_H263;
587         avctx->delay  = 0;
588         s->low_delay  = 1;
589         break;
590     case AV_CODEC_ID_H263P:
591         s->out_format = FMT_H263;
592         s->h263_plus  = 1;
593         /* Fx */
594         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
595         s->modified_quant  = s->h263_aic;
596         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
597         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
598
599         /* /Fx */
600         /* These are just to be sure */
601         avctx->delay = 0;
602         s->low_delay = 1;
603         break;
604     case AV_CODEC_ID_FLV1:
605         s->out_format      = FMT_H263;
606         s->h263_flv        = 2; /* format = 1; 11-bit codes */
607         s->unrestricted_mv = 1;
608         s->rtp_mode  = 0; /* don't allow GOB */
609         avctx->delay = 0;
610         s->low_delay = 1;
611         break;
612     case AV_CODEC_ID_RV10:
613         s->out_format = FMT_H263;
614         avctx->delay  = 0;
615         s->low_delay  = 1;
616         break;
617     case AV_CODEC_ID_RV20:
618         s->out_format      = FMT_H263;
619         avctx->delay       = 0;
620         s->low_delay       = 1;
621         s->modified_quant  = 1;
622         s->h263_aic        = 1;
623         s->h263_plus       = 1;
624         s->loop_filter     = 1;
625         s->unrestricted_mv = 0;
626         break;
627     case AV_CODEC_ID_MPEG4:
628         s->out_format      = FMT_H263;
629         s->h263_pred       = 1;
630         s->unrestricted_mv = 1;
631         s->low_delay       = s->max_b_frames ? 0 : 1;
632         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
633         break;
634     case AV_CODEC_ID_MSMPEG4V2:
635         s->out_format      = FMT_H263;
636         s->h263_pred       = 1;
637         s->unrestricted_mv = 1;
638         s->msmpeg4_version = 2;
639         avctx->delay       = 0;
640         s->low_delay       = 1;
641         break;
642     case AV_CODEC_ID_MSMPEG4V3:
643         s->out_format        = FMT_H263;
644         s->h263_pred         = 1;
645         s->unrestricted_mv   = 1;
646         s->msmpeg4_version   = 3;
647         s->flipflop_rounding = 1;
648         avctx->delay         = 0;
649         s->low_delay         = 1;
650         break;
651     case AV_CODEC_ID_WMV1:
652         s->out_format        = FMT_H263;
653         s->h263_pred         = 1;
654         s->unrestricted_mv   = 1;
655         s->msmpeg4_version   = 4;
656         s->flipflop_rounding = 1;
657         avctx->delay         = 0;
658         s->low_delay         = 1;
659         break;
660     case AV_CODEC_ID_WMV2:
661         s->out_format        = FMT_H263;
662         s->h263_pred         = 1;
663         s->unrestricted_mv   = 1;
664         s->msmpeg4_version   = 5;
665         s->flipflop_rounding = 1;
666         avctx->delay         = 0;
667         s->low_delay         = 1;
668         break;
669     default:
670         return -1;
671     }
672
673     avctx->has_b_frames = !s->low_delay;
674
675     s->encoding = 1;
676
677     s->progressive_frame    =
678     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
679                                                 CODEC_FLAG_INTERLACED_ME) ||
680                                 s->alternate_scan);
681
682     /* init */
683     if (ff_MPV_common_init(s) < 0)
684         return -1;
685
686     if (ARCH_X86)
687         ff_MPV_encode_init_x86(s);
688
689     s->avctx->coded_frame = &s->current_picture.f;
690
691     if (s->msmpeg4_version) {
692         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
693                           2 * 2 * (MAX_LEVEL + 1) *
694                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
695     }
696     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
697
698     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
699     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
700     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
701     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
702     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
703                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
704     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
705                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
706
707     if (s->avctx->noise_reduction) {
708         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
709                           2 * 64 * sizeof(uint16_t), fail);
710     }
711
712     if (CONFIG_H263_ENCODER)
713         ff_h263dsp_init(&s->h263dsp);
714     if (!s->dct_quantize)
715         s->dct_quantize = ff_dct_quantize_c;
716     if (!s->denoise_dct)
717         s->denoise_dct  = denoise_dct_c;
718     s->fast_dct_quantize = s->dct_quantize;
719     if (avctx->trellis)
720         s->dct_quantize  = dct_quantize_trellis_c;
721
722     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
723         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
724
725     s->quant_precision = 5;
726
727     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
728     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
729
730     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
731         ff_h261_encode_init(s);
732     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
733         ff_h263_encode_init(s);
734     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
735         ff_msmpeg4_encode_init(s);
736     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
737         && s->out_format == FMT_MPEG1)
738         ff_mpeg1_encode_init(s);
739
740     /* init q matrix */
741     for (i = 0; i < 64; i++) {
742         int j = s->dsp.idct_permutation[i];
743         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
744             s->mpeg_quant) {
745             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
746             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
747         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
748             s->intra_matrix[j] =
749             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
750         } else {
751             /* mpeg1/2 */
752             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
753             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
754         }
755         if (s->avctx->intra_matrix)
756             s->intra_matrix[j] = s->avctx->intra_matrix[i];
757         if (s->avctx->inter_matrix)
758             s->inter_matrix[j] = s->avctx->inter_matrix[i];
759     }
760
761     /* precompute matrix */
762     /* for mjpeg, we do include qscale in the matrix */
763     if (s->out_format != FMT_MJPEG) {
764         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
765                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
766                           31, 1);
767         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
768                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
769                           31, 0);
770     }
771
772     if (ff_rate_control_init(s) < 0)
773         return -1;
774
775 #if FF_API_ERROR_RATE
776     FF_DISABLE_DEPRECATION_WARNINGS
777     if (avctx->error_rate)
778         s->error_rate = avctx->error_rate;
779     FF_ENABLE_DEPRECATION_WARNINGS;
780 #endif
781
782     if (avctx->b_frame_strategy == 2) {
783         for (i = 0; i < s->max_b_frames + 2; i++) {
784             s->tmp_frames[i] = av_frame_alloc();
785             if (!s->tmp_frames[i])
786                 return AVERROR(ENOMEM);
787
788             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
789             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
790             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
791
792             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
793             if (ret < 0)
794                 return ret;
795         }
796     }
797
798     return 0;
799 fail:
800     ff_MPV_encode_end(avctx);
801     return AVERROR_UNKNOWN;
802 }
803
804 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
805 {
806     MpegEncContext *s = avctx->priv_data;
807     int i;
808
809     ff_rate_control_uninit(s);
810
811     ff_MPV_common_end(s);
812     if (CONFIG_MJPEG_ENCODER &&
813         s->out_format == FMT_MJPEG)
814         ff_mjpeg_encode_close(s);
815
816     av_freep(&avctx->extradata);
817
818     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
819         av_frame_free(&s->tmp_frames[i]);
820
821     ff_free_picture_tables(&s->new_picture);
822     ff_mpeg_unref_picture(s, &s->new_picture);
823
824     av_freep(&s->avctx->stats_out);
825     av_freep(&s->ac_stats);
826
827     av_freep(&s->q_intra_matrix);
828     av_freep(&s->q_inter_matrix);
829     av_freep(&s->q_intra_matrix16);
830     av_freep(&s->q_inter_matrix16);
831     av_freep(&s->input_picture);
832     av_freep(&s->reordered_input_picture);
833     av_freep(&s->dct_offset);
834
835     return 0;
836 }
837
838 static int get_sae(uint8_t *src, int ref, int stride)
839 {
840     int x,y;
841     int acc = 0;
842
843     for (y = 0; y < 16; y++) {
844         for (x = 0; x < 16; x++) {
845             acc += FFABS(src[x + y * stride] - ref);
846         }
847     }
848
849     return acc;
850 }
851
852 static int get_intra_count(MpegEncContext *s, uint8_t *src,
853                            uint8_t *ref, int stride)
854 {
855     int x, y, w, h;
856     int acc = 0;
857
858     w = s->width  & ~15;
859     h = s->height & ~15;
860
861     for (y = 0; y < h; y += 16) {
862         for (x = 0; x < w; x += 16) {
863             int offset = x + y * stride;
864             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
865                                      16);
866             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
867             int sae  = get_sae(src + offset, mean, stride);
868
869             acc += sae + 500 < sad;
870         }
871     }
872     return acc;
873 }
874
875
876 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
877 {
878     Picture *pic = NULL;
879     int64_t pts;
880     int i, display_picture_number = 0, ret;
881     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
882                                                  (s->low_delay ? 0 : 1);
883     int direct = 1;
884
885     if (pic_arg) {
886         pts = pic_arg->pts;
887         display_picture_number = s->input_picture_number++;
888
889         if (pts != AV_NOPTS_VALUE) {
890             if (s->user_specified_pts != AV_NOPTS_VALUE) {
891                 int64_t time = pts;
892                 int64_t last = s->user_specified_pts;
893
894                 if (time <= last) {
895                     av_log(s->avctx, AV_LOG_ERROR,
896                            "Error, Invalid timestamp=%"PRId64", "
897                            "last=%"PRId64"\n", pts, s->user_specified_pts);
898                     return -1;
899                 }
900
901                 if (!s->low_delay && display_picture_number == 1)
902                     s->dts_delta = time - last;
903             }
904             s->user_specified_pts = pts;
905         } else {
906             if (s->user_specified_pts != AV_NOPTS_VALUE) {
907                 s->user_specified_pts =
908                 pts = s->user_specified_pts + 1;
909                 av_log(s->avctx, AV_LOG_INFO,
910                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
911                        pts);
912             } else {
913                 pts = display_picture_number;
914             }
915         }
916     }
917
918     if (pic_arg) {
919         if (!pic_arg->buf[0]);
920             direct = 0;
921         if (pic_arg->linesize[0] != s->linesize)
922             direct = 0;
923         if (pic_arg->linesize[1] != s->uvlinesize)
924             direct = 0;
925         if (pic_arg->linesize[2] != s->uvlinesize)
926             direct = 0;
927
928         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
929                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
930
931         if (direct) {
932             i = ff_find_unused_picture(s, 1);
933             if (i < 0)
934                 return i;
935
936             pic = &s->picture[i];
937             pic->reference = 3;
938
939             if ((ret = av_frame_ref(&pic->f, pic_arg)) < 0)
940                 return ret;
941             if (ff_alloc_picture(s, pic, 1) < 0) {
942                 return -1;
943             }
944         } else {
945             i = ff_find_unused_picture(s, 0);
946             if (i < 0)
947                 return i;
948
949             pic = &s->picture[i];
950             pic->reference = 3;
951
952             if (ff_alloc_picture(s, pic, 0) < 0) {
953                 return -1;
954             }
955
956             if (pic->f.data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
957                 pic->f.data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
958                 pic->f.data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
959                 // empty
960             } else {
961                 int h_chroma_shift, v_chroma_shift;
962                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
963                                                  &h_chroma_shift,
964                                                  &v_chroma_shift);
965
966                 for (i = 0; i < 3; i++) {
967                     int src_stride = pic_arg->linesize[i];
968                     int dst_stride = i ? s->uvlinesize : s->linesize;
969                     int h_shift = i ? h_chroma_shift : 0;
970                     int v_shift = i ? v_chroma_shift : 0;
971                     int w = s->width  >> h_shift;
972                     int h = s->height >> v_shift;
973                     uint8_t *src = pic_arg->data[i];
974                     uint8_t *dst = pic->f.data[i];
975
976                     if (!s->avctx->rc_buffer_size)
977                         dst += INPLACE_OFFSET;
978
979                     if (src_stride == dst_stride)
980                         memcpy(dst, src, src_stride * h);
981                     else {
982                         while (h--) {
983                             memcpy(dst, src, w);
984                             dst += dst_stride;
985                             src += src_stride;
986                         }
987                     }
988                 }
989             }
990         }
991         ret = av_frame_copy_props(&pic->f, pic_arg);
992         if (ret < 0)
993             return ret;
994
995         pic->f.display_picture_number = display_picture_number;
996         pic->f.pts = pts; // we set this here to avoid modifiying pic_arg
997     }
998
999     /* shift buffer entries */
1000     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1001         s->input_picture[i - 1] = s->input_picture[i];
1002
1003     s->input_picture[encoding_delay] = (Picture*) pic;
1004
1005     return 0;
1006 }
1007
1008 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1009 {
1010     int x, y, plane;
1011     int score = 0;
1012     int64_t score64 = 0;
1013
1014     for (plane = 0; plane < 3; plane++) {
1015         const int stride = p->f.linesize[plane];
1016         const int bw = plane ? 1 : 2;
1017         for (y = 0; y < s->mb_height * bw; y++) {
1018             for (x = 0; x < s->mb_width * bw; x++) {
1019                 int off = p->shared ? 0 : 16;
1020                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1021                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1022                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1023
1024                 switch (s->avctx->frame_skip_exp) {
1025                 case 0: score    =  FFMAX(score, v);          break;
1026                 case 1: score   += FFABS(v);                  break;
1027                 case 2: score   += v * v;                     break;
1028                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1029                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1030                 }
1031             }
1032         }
1033     }
1034
1035     if (score)
1036         score64 = score;
1037
1038     if (score64 < s->avctx->frame_skip_threshold)
1039         return 1;
1040     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1041         return 1;
1042     return 0;
1043 }
1044
1045 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1046 {
1047     AVPacket pkt = { 0 };
1048     int ret, got_output;
1049
1050     av_init_packet(&pkt);
1051     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1052     if (ret < 0)
1053         return ret;
1054
1055     ret = pkt.size;
1056     av_free_packet(&pkt);
1057     return ret;
1058 }
1059
1060 static int estimate_best_b_count(MpegEncContext *s)
1061 {
1062     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1063     AVCodecContext *c = avcodec_alloc_context3(NULL);
1064     const int scale = s->avctx->brd_scale;
1065     int i, j, out_size, p_lambda, b_lambda, lambda2;
1066     int64_t best_rd  = INT64_MAX;
1067     int best_b_count = -1;
1068
1069     assert(scale >= 0 && scale <= 3);
1070
1071     //emms_c();
1072     //s->next_picture_ptr->quality;
1073     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1074     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1075     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1076     if (!b_lambda) // FIXME we should do this somewhere else
1077         b_lambda = p_lambda;
1078     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1079                FF_LAMBDA_SHIFT;
1080
1081     c->width        = s->width  >> scale;
1082     c->height       = s->height >> scale;
1083     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1084                       CODEC_FLAG_INPUT_PRESERVED;
1085     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1086     c->mb_decision  = s->avctx->mb_decision;
1087     c->me_cmp       = s->avctx->me_cmp;
1088     c->mb_cmp       = s->avctx->mb_cmp;
1089     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1090     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1091     c->time_base    = s->avctx->time_base;
1092     c->max_b_frames = s->max_b_frames;
1093
1094     if (avcodec_open2(c, codec, NULL) < 0)
1095         return -1;
1096
1097     for (i = 0; i < s->max_b_frames + 2; i++) {
1098         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1099                                                 s->next_picture_ptr;
1100
1101         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1102             pre_input = *pre_input_ptr;
1103
1104             if (!pre_input.shared && i) {
1105                 pre_input.f.data[0] += INPLACE_OFFSET;
1106                 pre_input.f.data[1] += INPLACE_OFFSET;
1107                 pre_input.f.data[2] += INPLACE_OFFSET;
1108             }
1109
1110             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1111                                  pre_input.f.data[0], pre_input.f.linesize[0],
1112                                  c->width,      c->height);
1113             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1114                                  pre_input.f.data[1], pre_input.f.linesize[1],
1115                                  c->width >> 1, c->height >> 1);
1116             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1117                                  pre_input.f.data[2], pre_input.f.linesize[2],
1118                                  c->width >> 1, c->height >> 1);
1119         }
1120     }
1121
1122     for (j = 0; j < s->max_b_frames + 1; j++) {
1123         int64_t rd = 0;
1124
1125         if (!s->input_picture[j])
1126             break;
1127
1128         c->error[0] = c->error[1] = c->error[2] = 0;
1129
1130         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1131         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1132
1133         out_size = encode_frame(c, s->tmp_frames[0]);
1134
1135         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1136
1137         for (i = 0; i < s->max_b_frames + 1; i++) {
1138             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1139
1140             s->tmp_frames[i + 1]->pict_type = is_p ?
1141                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1142             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1143
1144             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1145
1146             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1147         }
1148
1149         /* get the delayed frames */
1150         while (out_size) {
1151             out_size = encode_frame(c, NULL);
1152             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1153         }
1154
1155         rd += c->error[0] + c->error[1] + c->error[2];
1156
1157         if (rd < best_rd) {
1158             best_rd = rd;
1159             best_b_count = j;
1160         }
1161     }
1162
1163     avcodec_close(c);
1164     av_freep(&c);
1165
1166     return best_b_count;
1167 }
1168
1169 static int select_input_picture(MpegEncContext *s)
1170 {
1171     int i, ret;
1172
1173     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1174         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1175     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1176
1177     /* set next picture type & ordering */
1178     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1179         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1180             s->next_picture_ptr == NULL || s->intra_only) {
1181             s->reordered_input_picture[0] = s->input_picture[0];
1182             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1183             s->reordered_input_picture[0]->f.coded_picture_number =
1184                 s->coded_picture_number++;
1185         } else {
1186             int b_frames;
1187
1188             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1189                 if (s->picture_in_gop_number < s->gop_size &&
1190                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1191                     // FIXME check that te gop check above is +-1 correct
1192                     av_frame_unref(&s->input_picture[0]->f);
1193
1194                     emms_c();
1195                     ff_vbv_update(s, 0);
1196
1197                     goto no_output_pic;
1198                 }
1199             }
1200
1201             if (s->flags & CODEC_FLAG_PASS2) {
1202                 for (i = 0; i < s->max_b_frames + 1; i++) {
1203                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1204
1205                     if (pict_num >= s->rc_context.num_entries)
1206                         break;
1207                     if (!s->input_picture[i]) {
1208                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1209                         break;
1210                     }
1211
1212                     s->input_picture[i]->f.pict_type =
1213                         s->rc_context.entry[pict_num].new_pict_type;
1214                 }
1215             }
1216
1217             if (s->avctx->b_frame_strategy == 0) {
1218                 b_frames = s->max_b_frames;
1219                 while (b_frames && !s->input_picture[b_frames])
1220                     b_frames--;
1221             } else if (s->avctx->b_frame_strategy == 1) {
1222                 for (i = 1; i < s->max_b_frames + 1; i++) {
1223                     if (s->input_picture[i] &&
1224                         s->input_picture[i]->b_frame_score == 0) {
1225                         s->input_picture[i]->b_frame_score =
1226                             get_intra_count(s,
1227                                             s->input_picture[i    ]->f.data[0],
1228                                             s->input_picture[i - 1]->f.data[0],
1229                                             s->linesize) + 1;
1230                     }
1231                 }
1232                 for (i = 0; i < s->max_b_frames + 1; i++) {
1233                     if (s->input_picture[i] == NULL ||
1234                         s->input_picture[i]->b_frame_score - 1 >
1235                             s->mb_num / s->avctx->b_sensitivity)
1236                         break;
1237                 }
1238
1239                 b_frames = FFMAX(0, i - 1);
1240
1241                 /* reset scores */
1242                 for (i = 0; i < b_frames + 1; i++) {
1243                     s->input_picture[i]->b_frame_score = 0;
1244                 }
1245             } else if (s->avctx->b_frame_strategy == 2) {
1246                 b_frames = estimate_best_b_count(s);
1247             } else {
1248                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1249                 b_frames = 0;
1250             }
1251
1252             emms_c();
1253
1254             for (i = b_frames - 1; i >= 0; i--) {
1255                 int type = s->input_picture[i]->f.pict_type;
1256                 if (type && type != AV_PICTURE_TYPE_B)
1257                     b_frames = i;
1258             }
1259             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1260                 b_frames == s->max_b_frames) {
1261                 av_log(s->avctx, AV_LOG_ERROR,
1262                        "warning, too many b frames in a row\n");
1263             }
1264
1265             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1266                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1267                     s->gop_size > s->picture_in_gop_number) {
1268                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1269                 } else {
1270                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1271                         b_frames = 0;
1272                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1273                 }
1274             }
1275
1276             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1277                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1278                 b_frames--;
1279
1280             s->reordered_input_picture[0] = s->input_picture[b_frames];
1281             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1282                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1283             s->reordered_input_picture[0]->f.coded_picture_number =
1284                 s->coded_picture_number++;
1285             for (i = 0; i < b_frames; i++) {
1286                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1287                 s->reordered_input_picture[i + 1]->f.pict_type =
1288                     AV_PICTURE_TYPE_B;
1289                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1290                     s->coded_picture_number++;
1291             }
1292         }
1293     }
1294 no_output_pic:
1295     if (s->reordered_input_picture[0]) {
1296         s->reordered_input_picture[0]->reference =
1297            s->reordered_input_picture[0]->f.pict_type !=
1298                AV_PICTURE_TYPE_B ? 3 : 0;
1299
1300         ff_mpeg_unref_picture(s, &s->new_picture);
1301         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1302             return ret;
1303
1304         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1305             // input is a shared pix, so we can't modifiy it -> alloc a new
1306             // one & ensure that the shared one is reuseable
1307
1308             Picture *pic;
1309             int i = ff_find_unused_picture(s, 0);
1310             if (i < 0)
1311                 return i;
1312             pic = &s->picture[i];
1313
1314             pic->reference = s->reordered_input_picture[0]->reference;
1315             if (ff_alloc_picture(s, pic, 0) < 0) {
1316                 return -1;
1317             }
1318
1319             ret = av_frame_copy_props(&pic->f, &s->reordered_input_picture[0]->f);
1320             if (ret < 0)
1321                 return ret;
1322
1323             /* mark us unused / free shared pic */
1324             av_frame_unref(&s->reordered_input_picture[0]->f);
1325             s->reordered_input_picture[0]->shared = 0;
1326
1327             s->current_picture_ptr = pic;
1328         } else {
1329             // input is not a shared pix -> reuse buffer for current_pix
1330             s->current_picture_ptr = s->reordered_input_picture[0];
1331             for (i = 0; i < 4; i++) {
1332                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1333             }
1334         }
1335         ff_mpeg_unref_picture(s, &s->current_picture);
1336         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1337                                        s->current_picture_ptr)) < 0)
1338             return ret;
1339
1340         s->picture_number = s->new_picture.f.display_picture_number;
1341     } else {
1342         ff_mpeg_unref_picture(s, &s->new_picture);
1343     }
1344     return 0;
1345 }
1346
1347 static void frame_end(MpegEncContext *s)
1348 {
1349     int i;
1350
1351     if (s->unrestricted_mv &&
1352         s->current_picture.reference &&
1353         !s->intra_only) {
1354         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1355         int hshift = desc->log2_chroma_w;
1356         int vshift = desc->log2_chroma_h;
1357         s->dsp.draw_edges(s->current_picture.f.data[0], s->linesize,
1358                           s->h_edge_pos, s->v_edge_pos,
1359                           EDGE_WIDTH, EDGE_WIDTH,
1360                           EDGE_TOP | EDGE_BOTTOM);
1361         s->dsp.draw_edges(s->current_picture.f.data[1], s->uvlinesize,
1362                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1363                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1364                           EDGE_TOP | EDGE_BOTTOM);
1365         s->dsp.draw_edges(s->current_picture.f.data[2], s->uvlinesize,
1366                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1367                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1368                           EDGE_TOP | EDGE_BOTTOM);
1369     }
1370
1371     emms_c();
1372
1373     s->last_pict_type                 = s->pict_type;
1374     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f.quality;
1375     if (s->pict_type!= AV_PICTURE_TYPE_B)
1376         s->last_non_b_pict_type = s->pict_type;
1377
1378     if (s->encoding) {
1379         /* release non-reference frames */
1380         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1381             if (!s->picture[i].reference)
1382                 ff_mpeg_unref_picture(s, &s->picture[i]);
1383         }
1384     }
1385
1386     s->avctx->coded_frame = &s->current_picture_ptr->f;
1387
1388 }
1389
1390 static void update_noise_reduction(MpegEncContext *s)
1391 {
1392     int intra, i;
1393
1394     for (intra = 0; intra < 2; intra++) {
1395         if (s->dct_count[intra] > (1 << 16)) {
1396             for (i = 0; i < 64; i++) {
1397                 s->dct_error_sum[intra][i] >>= 1;
1398             }
1399             s->dct_count[intra] >>= 1;
1400         }
1401
1402         for (i = 0; i < 64; i++) {
1403             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1404                                        s->dct_count[intra] +
1405                                        s->dct_error_sum[intra][i] / 2) /
1406                                       (s->dct_error_sum[intra][i] + 1);
1407         }
1408     }
1409 }
1410
1411 static int frame_start(MpegEncContext *s)
1412 {
1413     int ret;
1414
1415     /* mark & release old frames */
1416     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1417         s->last_picture_ptr != s->next_picture_ptr &&
1418         s->last_picture_ptr->f.buf[0]) {
1419         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1420     }
1421
1422     s->current_picture_ptr->f.pict_type = s->pict_type;
1423     s->current_picture_ptr->f.key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1424
1425     ff_mpeg_unref_picture(s, &s->current_picture);
1426     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1427                                    s->current_picture_ptr)) < 0)
1428         return ret;
1429
1430     if (s->pict_type != AV_PICTURE_TYPE_B) {
1431         s->last_picture_ptr = s->next_picture_ptr;
1432         if (!s->droppable)
1433             s->next_picture_ptr = s->current_picture_ptr;
1434     }
1435
1436     if (s->last_picture_ptr) {
1437         ff_mpeg_unref_picture(s, &s->last_picture);
1438         if (s->last_picture_ptr->f.buf[0] &&
1439             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1440                                        s->last_picture_ptr)) < 0)
1441             return ret;
1442     }
1443     if (s->next_picture_ptr) {
1444         ff_mpeg_unref_picture(s, &s->next_picture);
1445         if (s->next_picture_ptr->f.buf[0] &&
1446             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1447                                        s->next_picture_ptr)) < 0)
1448             return ret;
1449     }
1450
1451     if (s->picture_structure!= PICT_FRAME) {
1452         int i;
1453         for (i = 0; i < 4; i++) {
1454             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1455                 s->current_picture.f.data[i] +=
1456                     s->current_picture.f.linesize[i];
1457             }
1458             s->current_picture.f.linesize[i] *= 2;
1459             s->last_picture.f.linesize[i]    *= 2;
1460             s->next_picture.f.linesize[i]    *= 2;
1461         }
1462     }
1463
1464     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1465         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1466         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1467     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1468         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1469         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1470     } else {
1471         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1472         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1473     }
1474
1475     if (s->dct_error_sum) {
1476         assert(s->avctx->noise_reduction && s->encoding);
1477         update_noise_reduction(s);
1478     }
1479
1480     return 0;
1481 }
1482
1483 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1484                           const AVFrame *pic_arg, int *got_packet)
1485 {
1486     MpegEncContext *s = avctx->priv_data;
1487     int i, stuffing_count, ret;
1488     int context_count = s->slice_context_count;
1489
1490     s->picture_in_gop_number++;
1491
1492     if (load_input_picture(s, pic_arg) < 0)
1493         return -1;
1494
1495     if (select_input_picture(s) < 0) {
1496         return -1;
1497     }
1498
1499     /* output? */
1500     if (s->new_picture.f.data[0]) {
1501         if (!pkt->data &&
1502             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1503             return ret;
1504         if (s->mb_info) {
1505             s->mb_info_ptr = av_packet_new_side_data(pkt,
1506                                  AV_PKT_DATA_H263_MB_INFO,
1507                                  s->mb_width*s->mb_height*12);
1508             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1509         }
1510
1511         for (i = 0; i < context_count; i++) {
1512             int start_y = s->thread_context[i]->start_mb_y;
1513             int   end_y = s->thread_context[i]->  end_mb_y;
1514             int h       = s->mb_height;
1515             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1516             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1517
1518             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1519         }
1520
1521         s->pict_type = s->new_picture.f.pict_type;
1522         //emms_c();
1523         ret = frame_start(s);
1524         if (ret < 0)
1525             return ret;
1526 vbv_retry:
1527         if (encode_picture(s, s->picture_number) < 0)
1528             return -1;
1529
1530         avctx->header_bits = s->header_bits;
1531         avctx->mv_bits     = s->mv_bits;
1532         avctx->misc_bits   = s->misc_bits;
1533         avctx->i_tex_bits  = s->i_tex_bits;
1534         avctx->p_tex_bits  = s->p_tex_bits;
1535         avctx->i_count     = s->i_count;
1536         // FIXME f/b_count in avctx
1537         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1538         avctx->skip_count  = s->skip_count;
1539
1540         frame_end(s);
1541
1542         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1543             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1544
1545         if (avctx->rc_buffer_size) {
1546             RateControlContext *rcc = &s->rc_context;
1547             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1548
1549             if (put_bits_count(&s->pb) > max_size &&
1550                 s->lambda < s->avctx->lmax) {
1551                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1552                                        (s->qscale + 1) / s->qscale);
1553                 if (s->adaptive_quant) {
1554                     int i;
1555                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1556                         s->lambda_table[i] =
1557                             FFMAX(s->lambda_table[i] + 1,
1558                                   s->lambda_table[i] * (s->qscale + 1) /
1559                                   s->qscale);
1560                 }
1561                 s->mb_skipped = 0;        // done in frame_start()
1562                 // done in encode_picture() so we must undo it
1563                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1564                     if (s->flipflop_rounding          ||
1565                         s->codec_id == AV_CODEC_ID_H263P ||
1566                         s->codec_id == AV_CODEC_ID_MPEG4)
1567                         s->no_rounding ^= 1;
1568                 }
1569                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1570                     s->time_base       = s->last_time_base;
1571                     s->last_non_b_time = s->time - s->pp_time;
1572                 }
1573                 for (i = 0; i < context_count; i++) {
1574                     PutBitContext *pb = &s->thread_context[i]->pb;
1575                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1576                 }
1577                 goto vbv_retry;
1578             }
1579
1580             assert(s->avctx->rc_max_rate);
1581         }
1582
1583         if (s->flags & CODEC_FLAG_PASS1)
1584             ff_write_pass1_stats(s);
1585
1586         for (i = 0; i < 4; i++) {
1587             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1588             avctx->error[i] += s->current_picture_ptr->f.error[i];
1589         }
1590
1591         if (s->flags & CODEC_FLAG_PASS1)
1592             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1593                    avctx->i_tex_bits + avctx->p_tex_bits ==
1594                        put_bits_count(&s->pb));
1595         flush_put_bits(&s->pb);
1596         s->frame_bits  = put_bits_count(&s->pb);
1597
1598         stuffing_count = ff_vbv_update(s, s->frame_bits);
1599         if (stuffing_count) {
1600             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1601                     stuffing_count + 50) {
1602                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1603                 return -1;
1604             }
1605
1606             switch (s->codec_id) {
1607             case AV_CODEC_ID_MPEG1VIDEO:
1608             case AV_CODEC_ID_MPEG2VIDEO:
1609                 while (stuffing_count--) {
1610                     put_bits(&s->pb, 8, 0);
1611                 }
1612             break;
1613             case AV_CODEC_ID_MPEG4:
1614                 put_bits(&s->pb, 16, 0);
1615                 put_bits(&s->pb, 16, 0x1C3);
1616                 stuffing_count -= 4;
1617                 while (stuffing_count--) {
1618                     put_bits(&s->pb, 8, 0xFF);
1619                 }
1620             break;
1621             default:
1622                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1623             }
1624             flush_put_bits(&s->pb);
1625             s->frame_bits  = put_bits_count(&s->pb);
1626         }
1627
1628         /* update mpeg1/2 vbv_delay for CBR */
1629         if (s->avctx->rc_max_rate                          &&
1630             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1631             s->out_format == FMT_MPEG1                     &&
1632             90000LL * (avctx->rc_buffer_size - 1) <=
1633                 s->avctx->rc_max_rate * 0xFFFFLL) {
1634             int vbv_delay, min_delay;
1635             double inbits  = s->avctx->rc_max_rate *
1636                              av_q2d(s->avctx->time_base);
1637             int    minbits = s->frame_bits - 8 *
1638                              (s->vbv_delay_ptr - s->pb.buf - 1);
1639             double bits    = s->rc_context.buffer_index + minbits - inbits;
1640
1641             if (bits < 0)
1642                 av_log(s->avctx, AV_LOG_ERROR,
1643                        "Internal error, negative bits\n");
1644
1645             assert(s->repeat_first_field == 0);
1646
1647             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1648             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1649                         s->avctx->rc_max_rate;
1650
1651             vbv_delay = FFMAX(vbv_delay, min_delay);
1652
1653             assert(vbv_delay < 0xFFFF);
1654
1655             s->vbv_delay_ptr[0] &= 0xF8;
1656             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1657             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1658             s->vbv_delay_ptr[2] &= 0x07;
1659             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1660             avctx->vbv_delay     = vbv_delay * 300;
1661         }
1662         s->total_bits     += s->frame_bits;
1663         avctx->frame_bits  = s->frame_bits;
1664
1665         pkt->pts = s->current_picture.f.pts;
1666         if (!s->low_delay) {
1667             if (!s->current_picture.f.coded_picture_number)
1668                 pkt->dts = pkt->pts - s->dts_delta;
1669             else
1670                 pkt->dts = s->reordered_pts;
1671             s->reordered_pts = s->input_picture[0]->f.pts;
1672         } else
1673             pkt->dts = pkt->pts;
1674         if (s->current_picture.f.key_frame)
1675             pkt->flags |= AV_PKT_FLAG_KEY;
1676         if (s->mb_info)
1677             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1678     } else {
1679         s->frame_bits = 0;
1680     }
1681     assert((s->frame_bits & 7) == 0);
1682
1683     pkt->size = s->frame_bits / 8;
1684     *got_packet = !!pkt->size;
1685     return 0;
1686 }
1687
1688 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1689                                                 int n, int threshold)
1690 {
1691     static const char tab[64] = {
1692         3, 2, 2, 1, 1, 1, 1, 1,
1693         1, 1, 1, 1, 1, 1, 1, 1,
1694         1, 1, 1, 1, 1, 1, 1, 1,
1695         0, 0, 0, 0, 0, 0, 0, 0,
1696         0, 0, 0, 0, 0, 0, 0, 0,
1697         0, 0, 0, 0, 0, 0, 0, 0,
1698         0, 0, 0, 0, 0, 0, 0, 0,
1699         0, 0, 0, 0, 0, 0, 0, 0
1700     };
1701     int score = 0;
1702     int run = 0;
1703     int i;
1704     int16_t *block = s->block[n];
1705     const int last_index = s->block_last_index[n];
1706     int skip_dc;
1707
1708     if (threshold < 0) {
1709         skip_dc = 0;
1710         threshold = -threshold;
1711     } else
1712         skip_dc = 1;
1713
1714     /* Are all we could set to zero already zero? */
1715     if (last_index <= skip_dc - 1)
1716         return;
1717
1718     for (i = 0; i <= last_index; i++) {
1719         const int j = s->intra_scantable.permutated[i];
1720         const int level = FFABS(block[j]);
1721         if (level == 1) {
1722             if (skip_dc && i == 0)
1723                 continue;
1724             score += tab[run];
1725             run = 0;
1726         } else if (level > 1) {
1727             return;
1728         } else {
1729             run++;
1730         }
1731     }
1732     if (score >= threshold)
1733         return;
1734     for (i = skip_dc; i <= last_index; i++) {
1735         const int j = s->intra_scantable.permutated[i];
1736         block[j] = 0;
1737     }
1738     if (block[0])
1739         s->block_last_index[n] = 0;
1740     else
1741         s->block_last_index[n] = -1;
1742 }
1743
1744 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1745                                int last_index)
1746 {
1747     int i;
1748     const int maxlevel = s->max_qcoeff;
1749     const int minlevel = s->min_qcoeff;
1750     int overflow = 0;
1751
1752     if (s->mb_intra) {
1753         i = 1; // skip clipping of intra dc
1754     } else
1755         i = 0;
1756
1757     for (; i <= last_index; i++) {
1758         const int j = s->intra_scantable.permutated[i];
1759         int level = block[j];
1760
1761         if (level > maxlevel) {
1762             level = maxlevel;
1763             overflow++;
1764         } else if (level < minlevel) {
1765             level = minlevel;
1766             overflow++;
1767         }
1768
1769         block[j] = level;
1770     }
1771
1772     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1773         av_log(s->avctx, AV_LOG_INFO,
1774                "warning, clipping %d dct coefficients to %d..%d\n",
1775                overflow, minlevel, maxlevel);
1776 }
1777
1778 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1779 {
1780     int x, y;
1781     // FIXME optimize
1782     for (y = 0; y < 8; y++) {
1783         for (x = 0; x < 8; x++) {
1784             int x2, y2;
1785             int sum = 0;
1786             int sqr = 0;
1787             int count = 0;
1788
1789             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1790                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1791                     int v = ptr[x2 + y2 * stride];
1792                     sum += v;
1793                     sqr += v * v;
1794                     count++;
1795                 }
1796             }
1797             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1798         }
1799     }
1800 }
1801
1802 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1803                                                 int motion_x, int motion_y,
1804                                                 int mb_block_height,
1805                                                 int mb_block_count)
1806 {
1807     int16_t weight[8][64];
1808     int16_t orig[8][64];
1809     const int mb_x = s->mb_x;
1810     const int mb_y = s->mb_y;
1811     int i;
1812     int skip_dct[8];
1813     int dct_offset = s->linesize * 8; // default for progressive frames
1814     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1815     ptrdiff_t wrap_y, wrap_c;
1816
1817     for (i = 0; i < mb_block_count; i++)
1818         skip_dct[i] = s->skipdct;
1819
1820     if (s->adaptive_quant) {
1821         const int last_qp = s->qscale;
1822         const int mb_xy = mb_x + mb_y * s->mb_stride;
1823
1824         s->lambda = s->lambda_table[mb_xy];
1825         update_qscale(s);
1826
1827         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1828             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1829             s->dquant = s->qscale - last_qp;
1830
1831             if (s->out_format == FMT_H263) {
1832                 s->dquant = av_clip(s->dquant, -2, 2);
1833
1834                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1835                     if (!s->mb_intra) {
1836                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1837                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1838                                 s->dquant = 0;
1839                         }
1840                         if (s->mv_type == MV_TYPE_8X8)
1841                             s->dquant = 0;
1842                     }
1843                 }
1844             }
1845         }
1846         ff_set_qscale(s, last_qp + s->dquant);
1847     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1848         ff_set_qscale(s, s->qscale + s->dquant);
1849
1850     wrap_y = s->linesize;
1851     wrap_c = s->uvlinesize;
1852     ptr_y  = s->new_picture.f.data[0] +
1853              (mb_y * 16 * wrap_y)              + mb_x * 16;
1854     ptr_cb = s->new_picture.f.data[1] +
1855              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1856     ptr_cr = s->new_picture.f.data[2] +
1857              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1858
1859     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1860         uint8_t *ebuf = s->edge_emu_buffer + 32;
1861         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1862                                  wrap_y, wrap_y,
1863                                  16, 16, mb_x * 16, mb_y * 16,
1864                                  s->width, s->height);
1865         ptr_y = ebuf;
1866         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1867                                  wrap_c, wrap_c,
1868                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1869                                  s->width >> 1, s->height >> 1);
1870         ptr_cb = ebuf + 18 * wrap_y;
1871         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1872                                  wrap_c, wrap_c,
1873                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1874                                  s->width >> 1, s->height >> 1);
1875         ptr_cr = ebuf + 18 * wrap_y + 8;
1876     }
1877
1878     if (s->mb_intra) {
1879         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1880             int progressive_score, interlaced_score;
1881
1882             s->interlaced_dct = 0;
1883             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1884                                                     NULL, wrap_y, 8) +
1885                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1886                                                     NULL, wrap_y, 8) - 400;
1887
1888             if (progressive_score > 0) {
1889                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1890                                                        NULL, wrap_y * 2, 8) +
1891                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1892                                                        NULL, wrap_y * 2, 8);
1893                 if (progressive_score > interlaced_score) {
1894                     s->interlaced_dct = 1;
1895
1896                     dct_offset = wrap_y;
1897                     wrap_y <<= 1;
1898                     if (s->chroma_format == CHROMA_422)
1899                         wrap_c <<= 1;
1900                 }
1901             }
1902         }
1903
1904         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1905         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1906         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1907         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1908
1909         if (s->flags & CODEC_FLAG_GRAY) {
1910             skip_dct[4] = 1;
1911             skip_dct[5] = 1;
1912         } else {
1913             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1914             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1915             if (!s->chroma_y_shift) { /* 422 */
1916                 s->dsp.get_pixels(s->block[6],
1917                                   ptr_cb + (dct_offset >> 1), wrap_c);
1918                 s->dsp.get_pixels(s->block[7],
1919                                   ptr_cr + (dct_offset >> 1), wrap_c);
1920             }
1921         }
1922     } else {
1923         op_pixels_func (*op_pix)[4];
1924         qpel_mc_func (*op_qpix)[16];
1925         uint8_t *dest_y, *dest_cb, *dest_cr;
1926
1927         dest_y  = s->dest[0];
1928         dest_cb = s->dest[1];
1929         dest_cr = s->dest[2];
1930
1931         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1932             op_pix  = s->hdsp.put_pixels_tab;
1933             op_qpix = s->dsp.put_qpel_pixels_tab;
1934         } else {
1935             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1936             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1937         }
1938
1939         if (s->mv_dir & MV_DIR_FORWARD) {
1940             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1941                           s->last_picture.f.data,
1942                           op_pix, op_qpix);
1943             op_pix  = s->hdsp.avg_pixels_tab;
1944             op_qpix = s->dsp.avg_qpel_pixels_tab;
1945         }
1946         if (s->mv_dir & MV_DIR_BACKWARD) {
1947             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1948                           s->next_picture.f.data,
1949                           op_pix, op_qpix);
1950         }
1951
1952         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1953             int progressive_score, interlaced_score;
1954
1955             s->interlaced_dct = 0;
1956             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1957                                                     ptr_y,              wrap_y,
1958                                                     8) +
1959                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1960                                                     ptr_y + wrap_y * 8, wrap_y,
1961                                                     8) - 400;
1962
1963             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1964                 progressive_score -= 400;
1965
1966             if (progressive_score > 0) {
1967                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1968                                                        ptr_y,
1969                                                        wrap_y * 2, 8) +
1970                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1971                                                        ptr_y + wrap_y,
1972                                                        wrap_y * 2, 8);
1973
1974                 if (progressive_score > interlaced_score) {
1975                     s->interlaced_dct = 1;
1976
1977                     dct_offset = wrap_y;
1978                     wrap_y <<= 1;
1979                     if (s->chroma_format == CHROMA_422)
1980                         wrap_c <<= 1;
1981                 }
1982             }
1983         }
1984
1985         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1986         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1987         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1988                            dest_y + dct_offset, wrap_y);
1989         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1990                            dest_y + dct_offset + 8, wrap_y);
1991
1992         if (s->flags & CODEC_FLAG_GRAY) {
1993             skip_dct[4] = 1;
1994             skip_dct[5] = 1;
1995         } else {
1996             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1997             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1998             if (!s->chroma_y_shift) { /* 422 */
1999                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2000                                    dest_cb + (dct_offset >> 1), wrap_c);
2001                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2002                                    dest_cr + (dct_offset >> 1), wrap_c);
2003             }
2004         }
2005         /* pre quantization */
2006         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2007                 2 * s->qscale * s->qscale) {
2008             // FIXME optimize
2009             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2010                               wrap_y, 8) < 20 * s->qscale)
2011                 skip_dct[0] = 1;
2012             if (s->dsp.sad[1](NULL, ptr_y + 8,
2013                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2014                 skip_dct[1] = 1;
2015             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2016                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2017                 skip_dct[2] = 1;
2018             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2019                               dest_y + dct_offset + 8,
2020                               wrap_y, 8) < 20 * s->qscale)
2021                 skip_dct[3] = 1;
2022             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2023                               wrap_c, 8) < 20 * s->qscale)
2024                 skip_dct[4] = 1;
2025             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2026                               wrap_c, 8) < 20 * s->qscale)
2027                 skip_dct[5] = 1;
2028             if (!s->chroma_y_shift) { /* 422 */
2029                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2030                                   dest_cb + (dct_offset >> 1),
2031                                   wrap_c, 8) < 20 * s->qscale)
2032                     skip_dct[6] = 1;
2033                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2034                                   dest_cr + (dct_offset >> 1),
2035                                   wrap_c, 8) < 20 * s->qscale)
2036                     skip_dct[7] = 1;
2037             }
2038         }
2039     }
2040
2041     if (s->quantizer_noise_shaping) {
2042         if (!skip_dct[0])
2043             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2044         if (!skip_dct[1])
2045             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2046         if (!skip_dct[2])
2047             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2048         if (!skip_dct[3])
2049             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2050         if (!skip_dct[4])
2051             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2052         if (!skip_dct[5])
2053             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2054         if (!s->chroma_y_shift) { /* 422 */
2055             if (!skip_dct[6])
2056                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2057                                   wrap_c);
2058             if (!skip_dct[7])
2059                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2060                                   wrap_c);
2061         }
2062         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2063     }
2064
2065     /* DCT & quantize */
2066     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2067     {
2068         for (i = 0; i < mb_block_count; i++) {
2069             if (!skip_dct[i]) {
2070                 int overflow;
2071                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2072                 // FIXME we could decide to change to quantizer instead of
2073                 // clipping
2074                 // JS: I don't think that would be a good idea it could lower
2075                 //     quality instead of improve it. Just INTRADC clipping
2076                 //     deserves changes in quantizer
2077                 if (overflow)
2078                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2079             } else
2080                 s->block_last_index[i] = -1;
2081         }
2082         if (s->quantizer_noise_shaping) {
2083             for (i = 0; i < mb_block_count; i++) {
2084                 if (!skip_dct[i]) {
2085                     s->block_last_index[i] =
2086                         dct_quantize_refine(s, s->block[i], weight[i],
2087                                             orig[i], i, s->qscale);
2088                 }
2089             }
2090         }
2091
2092         if (s->luma_elim_threshold && !s->mb_intra)
2093             for (i = 0; i < 4; i++)
2094                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2095         if (s->chroma_elim_threshold && !s->mb_intra)
2096             for (i = 4; i < mb_block_count; i++)
2097                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2098
2099         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2100             for (i = 0; i < mb_block_count; i++) {
2101                 if (s->block_last_index[i] == -1)
2102                     s->coded_score[i] = INT_MAX / 256;
2103             }
2104         }
2105     }
2106
2107     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2108         s->block_last_index[4] =
2109         s->block_last_index[5] = 0;
2110         s->block[4][0] =
2111         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2112     }
2113
2114     // non c quantize code returns incorrect block_last_index FIXME
2115     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2116         for (i = 0; i < mb_block_count; i++) {
2117             int j;
2118             if (s->block_last_index[i] > 0) {
2119                 for (j = 63; j > 0; j--) {
2120                     if (s->block[i][s->intra_scantable.permutated[j]])
2121                         break;
2122                 }
2123                 s->block_last_index[i] = j;
2124             }
2125         }
2126     }
2127
2128     /* huffman encode */
2129     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2130     case AV_CODEC_ID_MPEG1VIDEO:
2131     case AV_CODEC_ID_MPEG2VIDEO:
2132         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2133             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2134         break;
2135     case AV_CODEC_ID_MPEG4:
2136         if (CONFIG_MPEG4_ENCODER)
2137             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2138         break;
2139     case AV_CODEC_ID_MSMPEG4V2:
2140     case AV_CODEC_ID_MSMPEG4V3:
2141     case AV_CODEC_ID_WMV1:
2142         if (CONFIG_MSMPEG4_ENCODER)
2143             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2144         break;
2145     case AV_CODEC_ID_WMV2:
2146         if (CONFIG_WMV2_ENCODER)
2147             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2148         break;
2149     case AV_CODEC_ID_H261:
2150         if (CONFIG_H261_ENCODER)
2151             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2152         break;
2153     case AV_CODEC_ID_H263:
2154     case AV_CODEC_ID_H263P:
2155     case AV_CODEC_ID_FLV1:
2156     case AV_CODEC_ID_RV10:
2157     case AV_CODEC_ID_RV20:
2158         if (CONFIG_H263_ENCODER)
2159             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2160         break;
2161     case AV_CODEC_ID_MJPEG:
2162         if (CONFIG_MJPEG_ENCODER)
2163             ff_mjpeg_encode_mb(s, s->block);
2164         break;
2165     default:
2166         assert(0);
2167     }
2168 }
2169
2170 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2171 {
2172     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2173     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2174 }
2175
2176 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2177     int i;
2178
2179     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2180
2181     /* mpeg1 */
2182     d->mb_skip_run= s->mb_skip_run;
2183     for(i=0; i<3; i++)
2184         d->last_dc[i] = s->last_dc[i];
2185
2186     /* statistics */
2187     d->mv_bits= s->mv_bits;
2188     d->i_tex_bits= s->i_tex_bits;
2189     d->p_tex_bits= s->p_tex_bits;
2190     d->i_count= s->i_count;
2191     d->f_count= s->f_count;
2192     d->b_count= s->b_count;
2193     d->skip_count= s->skip_count;
2194     d->misc_bits= s->misc_bits;
2195     d->last_bits= 0;
2196
2197     d->mb_skipped= 0;
2198     d->qscale= s->qscale;
2199     d->dquant= s->dquant;
2200
2201     d->esc3_level_length= s->esc3_level_length;
2202 }
2203
2204 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2205     int i;
2206
2207     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2208     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2209
2210     /* mpeg1 */
2211     d->mb_skip_run= s->mb_skip_run;
2212     for(i=0; i<3; i++)
2213         d->last_dc[i] = s->last_dc[i];
2214
2215     /* statistics */
2216     d->mv_bits= s->mv_bits;
2217     d->i_tex_bits= s->i_tex_bits;
2218     d->p_tex_bits= s->p_tex_bits;
2219     d->i_count= s->i_count;
2220     d->f_count= s->f_count;
2221     d->b_count= s->b_count;
2222     d->skip_count= s->skip_count;
2223     d->misc_bits= s->misc_bits;
2224
2225     d->mb_intra= s->mb_intra;
2226     d->mb_skipped= s->mb_skipped;
2227     d->mv_type= s->mv_type;
2228     d->mv_dir= s->mv_dir;
2229     d->pb= s->pb;
2230     if(s->data_partitioning){
2231         d->pb2= s->pb2;
2232         d->tex_pb= s->tex_pb;
2233     }
2234     d->block= s->block;
2235     for(i=0; i<8; i++)
2236         d->block_last_index[i]= s->block_last_index[i];
2237     d->interlaced_dct= s->interlaced_dct;
2238     d->qscale= s->qscale;
2239
2240     d->esc3_level_length= s->esc3_level_length;
2241 }
2242
2243 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2244                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2245                            int *dmin, int *next_block, int motion_x, int motion_y)
2246 {
2247     int score;
2248     uint8_t *dest_backup[3];
2249
2250     copy_context_before_encode(s, backup, type);
2251
2252     s->block= s->blocks[*next_block];
2253     s->pb= pb[*next_block];
2254     if(s->data_partitioning){
2255         s->pb2   = pb2   [*next_block];
2256         s->tex_pb= tex_pb[*next_block];
2257     }
2258
2259     if(*next_block){
2260         memcpy(dest_backup, s->dest, sizeof(s->dest));
2261         s->dest[0] = s->rd_scratchpad;
2262         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2263         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2264         assert(s->linesize >= 32); //FIXME
2265     }
2266
2267     encode_mb(s, motion_x, motion_y);
2268
2269     score= put_bits_count(&s->pb);
2270     if(s->data_partitioning){
2271         score+= put_bits_count(&s->pb2);
2272         score+= put_bits_count(&s->tex_pb);
2273     }
2274
2275     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2276         ff_MPV_decode_mb(s, s->block);
2277
2278         score *= s->lambda2;
2279         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2280     }
2281
2282     if(*next_block){
2283         memcpy(s->dest, dest_backup, sizeof(s->dest));
2284     }
2285
2286     if(score<*dmin){
2287         *dmin= score;
2288         *next_block^=1;
2289
2290         copy_context_after_encode(best, s, type);
2291     }
2292 }
2293
2294 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2295     uint32_t *sq = ff_squareTbl + 256;
2296     int acc=0;
2297     int x,y;
2298
2299     if(w==16 && h==16)
2300         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2301     else if(w==8 && h==8)
2302         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2303
2304     for(y=0; y<h; y++){
2305         for(x=0; x<w; x++){
2306             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2307         }
2308     }
2309
2310     assert(acc>=0);
2311
2312     return acc;
2313 }
2314
2315 static int sse_mb(MpegEncContext *s){
2316     int w= 16;
2317     int h= 16;
2318
2319     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2320     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2321
2322     if(w==16 && h==16)
2323       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2324         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2325                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2326                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2327       }else{
2328         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2329                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2330                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2331       }
2332     else
2333         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2334                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2335                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2336 }
2337
2338 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2339     MpegEncContext *s= *(void**)arg;
2340
2341
2342     s->me.pre_pass=1;
2343     s->me.dia_size= s->avctx->pre_dia_size;
2344     s->first_slice_line=1;
2345     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2346         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2347             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2348         }
2349         s->first_slice_line=0;
2350     }
2351
2352     s->me.pre_pass=0;
2353
2354     return 0;
2355 }
2356
2357 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2358     MpegEncContext *s= *(void**)arg;
2359
2360     ff_check_alignment();
2361
2362     s->me.dia_size= s->avctx->dia_size;
2363     s->first_slice_line=1;
2364     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2365         s->mb_x=0; //for block init below
2366         ff_init_block_index(s);
2367         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2368             s->block_index[0]+=2;
2369             s->block_index[1]+=2;
2370             s->block_index[2]+=2;
2371             s->block_index[3]+=2;
2372
2373             /* compute motion vector & mb_type and store in context */
2374             if(s->pict_type==AV_PICTURE_TYPE_B)
2375                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2376             else
2377                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2378         }
2379         s->first_slice_line=0;
2380     }
2381     return 0;
2382 }
2383
2384 static int mb_var_thread(AVCodecContext *c, void *arg){
2385     MpegEncContext *s= *(void**)arg;
2386     int mb_x, mb_y;
2387
2388     ff_check_alignment();
2389
2390     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2391         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2392             int xx = mb_x * 16;
2393             int yy = mb_y * 16;
2394             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2395             int varc;
2396             int sum = s->dsp.pix_sum(pix, s->linesize);
2397
2398             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2399
2400             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2401             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2402             s->me.mb_var_sum_temp    += varc;
2403         }
2404     }
2405     return 0;
2406 }
2407
2408 static void write_slice_end(MpegEncContext *s){
2409     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2410         if(s->partitioned_frame){
2411             ff_mpeg4_merge_partitions(s);
2412         }
2413
2414         ff_mpeg4_stuffing(&s->pb);
2415     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2416         ff_mjpeg_encode_stuffing(&s->pb);
2417     }
2418
2419     avpriv_align_put_bits(&s->pb);
2420     flush_put_bits(&s->pb);
2421
2422     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2423         s->misc_bits+= get_bits_diff(s);
2424 }
2425
2426 static void write_mb_info(MpegEncContext *s)
2427 {
2428     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2429     int offset = put_bits_count(&s->pb);
2430     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2431     int gobn = s->mb_y / s->gob_index;
2432     int pred_x, pred_y;
2433     if (CONFIG_H263_ENCODER)
2434         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2435     bytestream_put_le32(&ptr, offset);
2436     bytestream_put_byte(&ptr, s->qscale);
2437     bytestream_put_byte(&ptr, gobn);
2438     bytestream_put_le16(&ptr, mba);
2439     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2440     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2441     /* 4MV not implemented */
2442     bytestream_put_byte(&ptr, 0); /* hmv2 */
2443     bytestream_put_byte(&ptr, 0); /* vmv2 */
2444 }
2445
2446 static void update_mb_info(MpegEncContext *s, int startcode)
2447 {
2448     if (!s->mb_info)
2449         return;
2450     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2451         s->mb_info_size += 12;
2452         s->prev_mb_info = s->last_mb_info;
2453     }
2454     if (startcode) {
2455         s->prev_mb_info = put_bits_count(&s->pb)/8;
2456         /* This might have incremented mb_info_size above, and we return without
2457          * actually writing any info into that slot yet. But in that case,
2458          * this will be called again at the start of the after writing the
2459          * start code, actually writing the mb info. */
2460         return;
2461     }
2462
2463     s->last_mb_info = put_bits_count(&s->pb)/8;
2464     if (!s->mb_info_size)
2465         s->mb_info_size += 12;
2466     write_mb_info(s);
2467 }
2468
2469 static int encode_thread(AVCodecContext *c, void *arg){
2470     MpegEncContext *s= *(void**)arg;
2471     int mb_x, mb_y, pdif = 0;
2472     int chr_h= 16>>s->chroma_y_shift;
2473     int i, j;
2474     MpegEncContext best_s, backup_s;
2475     uint8_t bit_buf[2][MAX_MB_BYTES];
2476     uint8_t bit_buf2[2][MAX_MB_BYTES];
2477     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2478     PutBitContext pb[2], pb2[2], tex_pb[2];
2479
2480     ff_check_alignment();
2481
2482     for(i=0; i<2; i++){
2483         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2484         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2485         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2486     }
2487
2488     s->last_bits= put_bits_count(&s->pb);
2489     s->mv_bits=0;
2490     s->misc_bits=0;
2491     s->i_tex_bits=0;
2492     s->p_tex_bits=0;
2493     s->i_count=0;
2494     s->f_count=0;
2495     s->b_count=0;
2496     s->skip_count=0;
2497
2498     for(i=0; i<3; i++){
2499         /* init last dc values */
2500         /* note: quant matrix value (8) is implied here */
2501         s->last_dc[i] = 128 << s->intra_dc_precision;
2502
2503         s->current_picture.f.error[i] = 0;
2504     }
2505     s->mb_skip_run = 0;
2506     memset(s->last_mv, 0, sizeof(s->last_mv));
2507
2508     s->last_mv_dir = 0;
2509
2510     switch(s->codec_id){
2511     case AV_CODEC_ID_H263:
2512     case AV_CODEC_ID_H263P:
2513     case AV_CODEC_ID_FLV1:
2514         if (CONFIG_H263_ENCODER)
2515             s->gob_index = ff_h263_get_gob_height(s);
2516         break;
2517     case AV_CODEC_ID_MPEG4:
2518         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2519             ff_mpeg4_init_partitions(s);
2520         break;
2521     }
2522
2523     s->resync_mb_x=0;
2524     s->resync_mb_y=0;
2525     s->first_slice_line = 1;
2526     s->ptr_lastgob = s->pb.buf;
2527     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2528         s->mb_x=0;
2529         s->mb_y= mb_y;
2530
2531         ff_set_qscale(s, s->qscale);
2532         ff_init_block_index(s);
2533
2534         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2535             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2536             int mb_type= s->mb_type[xy];
2537 //            int d;
2538             int dmin= INT_MAX;
2539             int dir;
2540
2541             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2542                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2543                 return -1;
2544             }
2545             if(s->data_partitioning){
2546                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2547                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2548                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2549                     return -1;
2550                 }
2551             }
2552
2553             s->mb_x = mb_x;
2554             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2555             ff_update_block_index(s);
2556
2557             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2558                 ff_h261_reorder_mb_index(s);
2559                 xy= s->mb_y*s->mb_stride + s->mb_x;
2560                 mb_type= s->mb_type[xy];
2561             }
2562
2563             /* write gob / video packet header  */
2564             if(s->rtp_mode){
2565                 int current_packet_size, is_gob_start;
2566
2567                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2568
2569                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2570
2571                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2572
2573                 switch(s->codec_id){
2574                 case AV_CODEC_ID_H263:
2575                 case AV_CODEC_ID_H263P:
2576                     if(!s->h263_slice_structured)
2577                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2578                     break;
2579                 case AV_CODEC_ID_MPEG2VIDEO:
2580                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2581                 case AV_CODEC_ID_MPEG1VIDEO:
2582                     if(s->mb_skip_run) is_gob_start=0;
2583                     break;
2584                 }
2585
2586                 if(is_gob_start){
2587                     if(s->start_mb_y != mb_y || mb_x!=0){
2588                         write_slice_end(s);
2589
2590                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2591                             ff_mpeg4_init_partitions(s);
2592                         }
2593                     }
2594
2595                     assert((put_bits_count(&s->pb)&7) == 0);
2596                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2597
2598                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2599                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2600                         int d = 100 / s->error_rate;
2601                         if(r % d == 0){
2602                             current_packet_size=0;
2603                             s->pb.buf_ptr= s->ptr_lastgob;
2604                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2605                         }
2606                     }
2607
2608                     if (s->avctx->rtp_callback){
2609                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2610                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2611                     }
2612                     update_mb_info(s, 1);
2613
2614                     switch(s->codec_id){
2615                     case AV_CODEC_ID_MPEG4:
2616                         if (CONFIG_MPEG4_ENCODER) {
2617                             ff_mpeg4_encode_video_packet_header(s);
2618                             ff_mpeg4_clean_buffers(s);
2619                         }
2620                     break;
2621                     case AV_CODEC_ID_MPEG1VIDEO:
2622                     case AV_CODEC_ID_MPEG2VIDEO:
2623                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2624                             ff_mpeg1_encode_slice_header(s);
2625                             ff_mpeg1_clean_buffers(s);
2626                         }
2627                     break;
2628                     case AV_CODEC_ID_H263:
2629                     case AV_CODEC_ID_H263P:
2630                         if (CONFIG_H263_ENCODER)
2631                             ff_h263_encode_gob_header(s, mb_y);
2632                     break;
2633                     }
2634
2635                     if(s->flags&CODEC_FLAG_PASS1){
2636                         int bits= put_bits_count(&s->pb);
2637                         s->misc_bits+= bits - s->last_bits;
2638                         s->last_bits= bits;
2639                     }
2640
2641                     s->ptr_lastgob += current_packet_size;
2642                     s->first_slice_line=1;
2643                     s->resync_mb_x=mb_x;
2644                     s->resync_mb_y=mb_y;
2645                 }
2646             }
2647
2648             if(  (s->resync_mb_x   == s->mb_x)
2649                && s->resync_mb_y+1 == s->mb_y){
2650                 s->first_slice_line=0;
2651             }
2652
2653             s->mb_skipped=0;
2654             s->dquant=0; //only for QP_RD
2655
2656             update_mb_info(s, 0);
2657
2658             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2659                 int next_block=0;
2660                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2661
2662                 copy_context_before_encode(&backup_s, s, -1);
2663                 backup_s.pb= s->pb;
2664                 best_s.data_partitioning= s->data_partitioning;
2665                 best_s.partitioned_frame= s->partitioned_frame;
2666                 if(s->data_partitioning){
2667                     backup_s.pb2= s->pb2;
2668                     backup_s.tex_pb= s->tex_pb;
2669                 }
2670
2671                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2672                     s->mv_dir = MV_DIR_FORWARD;
2673                     s->mv_type = MV_TYPE_16X16;
2674                     s->mb_intra= 0;
2675                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2676                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2677                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2678                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2679                 }
2680                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2681                     s->mv_dir = MV_DIR_FORWARD;
2682                     s->mv_type = MV_TYPE_FIELD;
2683                     s->mb_intra= 0;
2684                     for(i=0; i<2; i++){
2685                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2686                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2687                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2688                     }
2689                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2690                                  &dmin, &next_block, 0, 0);
2691                 }
2692                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2693                     s->mv_dir = MV_DIR_FORWARD;
2694                     s->mv_type = MV_TYPE_16X16;
2695                     s->mb_intra= 0;
2696                     s->mv[0][0][0] = 0;
2697                     s->mv[0][0][1] = 0;
2698                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2699                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2700                 }
2701                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2702                     s->mv_dir = MV_DIR_FORWARD;
2703                     s->mv_type = MV_TYPE_8X8;
2704                     s->mb_intra= 0;
2705                     for(i=0; i<4; i++){
2706                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2707                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2708                     }
2709                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2710                                  &dmin, &next_block, 0, 0);
2711                 }
2712                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2713                     s->mv_dir = MV_DIR_FORWARD;
2714                     s->mv_type = MV_TYPE_16X16;
2715                     s->mb_intra= 0;
2716                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2717                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2718                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2719                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2720                 }
2721                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2722                     s->mv_dir = MV_DIR_BACKWARD;
2723                     s->mv_type = MV_TYPE_16X16;
2724                     s->mb_intra= 0;
2725                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2726                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2727                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2728                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2729                 }
2730                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2731                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2732                     s->mv_type = MV_TYPE_16X16;
2733                     s->mb_intra= 0;
2734                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2735                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2736                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2737                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2738                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2739                                  &dmin, &next_block, 0, 0);
2740                 }
2741                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2742                     s->mv_dir = MV_DIR_FORWARD;
2743                     s->mv_type = MV_TYPE_FIELD;
2744                     s->mb_intra= 0;
2745                     for(i=0; i<2; i++){
2746                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2747                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2748                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2749                     }
2750                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2751                                  &dmin, &next_block, 0, 0);
2752                 }
2753                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2754                     s->mv_dir = MV_DIR_BACKWARD;
2755                     s->mv_type = MV_TYPE_FIELD;
2756                     s->mb_intra= 0;
2757                     for(i=0; i<2; i++){
2758                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2759                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2760                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2761                     }
2762                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2763                                  &dmin, &next_block, 0, 0);
2764                 }
2765                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2766                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2767                     s->mv_type = MV_TYPE_FIELD;
2768                     s->mb_intra= 0;
2769                     for(dir=0; dir<2; dir++){
2770                         for(i=0; i<2; i++){
2771                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2772                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2773                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2774                         }
2775                     }
2776                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2777                                  &dmin, &next_block, 0, 0);
2778                 }
2779                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2780                     s->mv_dir = 0;
2781                     s->mv_type = MV_TYPE_16X16;
2782                     s->mb_intra= 1;
2783                     s->mv[0][0][0] = 0;
2784                     s->mv[0][0][1] = 0;
2785                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2786                                  &dmin, &next_block, 0, 0);
2787                     if(s->h263_pred || s->h263_aic){
2788                         if(best_s.mb_intra)
2789                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2790                         else
2791                             ff_clean_intra_table_entries(s); //old mode?
2792                     }
2793                 }
2794
2795                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2796                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2797                         const int last_qp= backup_s.qscale;
2798                         int qpi, qp, dc[6];
2799                         int16_t ac[6][16];
2800                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2801                         static const int dquant_tab[4]={-1,1,-2,2};
2802
2803                         assert(backup_s.dquant == 0);
2804
2805                         //FIXME intra
2806                         s->mv_dir= best_s.mv_dir;
2807                         s->mv_type = MV_TYPE_16X16;
2808                         s->mb_intra= best_s.mb_intra;
2809                         s->mv[0][0][0] = best_s.mv[0][0][0];
2810                         s->mv[0][0][1] = best_s.mv[0][0][1];
2811                         s->mv[1][0][0] = best_s.mv[1][0][0];
2812                         s->mv[1][0][1] = best_s.mv[1][0][1];
2813
2814                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2815                         for(; qpi<4; qpi++){
2816                             int dquant= dquant_tab[qpi];
2817                             qp= last_qp + dquant;
2818                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2819                                 continue;
2820                             backup_s.dquant= dquant;
2821                             if(s->mb_intra && s->dc_val[0]){
2822                                 for(i=0; i<6; i++){
2823                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2824                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2825                                 }
2826                             }
2827
2828                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2829                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2830                             if(best_s.qscale != qp){
2831                                 if(s->mb_intra && s->dc_val[0]){
2832                                     for(i=0; i<6; i++){
2833                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2834                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2835                                     }
2836                                 }
2837                             }
2838                         }
2839                     }
2840                 }
2841                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2842                     int mx= s->b_direct_mv_table[xy][0];
2843                     int my= s->b_direct_mv_table[xy][1];
2844
2845                     backup_s.dquant = 0;
2846                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2847                     s->mb_intra= 0;
2848                     ff_mpeg4_set_direct_mv(s, mx, my);
2849                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2850                                  &dmin, &next_block, mx, my);
2851                 }
2852                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2853                     backup_s.dquant = 0;
2854                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2855                     s->mb_intra= 0;
2856                     ff_mpeg4_set_direct_mv(s, 0, 0);
2857                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2858                                  &dmin, &next_block, 0, 0);
2859                 }
2860                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2861                     int coded=0;
2862                     for(i=0; i<6; i++)
2863                         coded |= s->block_last_index[i];
2864                     if(coded){
2865                         int mx,my;
2866                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2867                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2868                             mx=my=0; //FIXME find the one we actually used
2869                             ff_mpeg4_set_direct_mv(s, mx, my);
2870                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2871                             mx= s->mv[1][0][0];
2872                             my= s->mv[1][0][1];
2873                         }else{
2874                             mx= s->mv[0][0][0];
2875                             my= s->mv[0][0][1];
2876                         }
2877
2878                         s->mv_dir= best_s.mv_dir;
2879                         s->mv_type = best_s.mv_type;
2880                         s->mb_intra= 0;
2881 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2882                         s->mv[0][0][1] = best_s.mv[0][0][1];
2883                         s->mv[1][0][0] = best_s.mv[1][0][0];
2884                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2885                         backup_s.dquant= 0;
2886                         s->skipdct=1;
2887                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2888                                         &dmin, &next_block, mx, my);
2889                         s->skipdct=0;
2890                     }
2891                 }
2892
2893                 s->current_picture.qscale_table[xy] = best_s.qscale;
2894
2895                 copy_context_after_encode(s, &best_s, -1);
2896
2897                 pb_bits_count= put_bits_count(&s->pb);
2898                 flush_put_bits(&s->pb);
2899                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2900                 s->pb= backup_s.pb;
2901
2902                 if(s->data_partitioning){
2903                     pb2_bits_count= put_bits_count(&s->pb2);
2904                     flush_put_bits(&s->pb2);
2905                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2906                     s->pb2= backup_s.pb2;
2907
2908                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2909                     flush_put_bits(&s->tex_pb);
2910                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2911                     s->tex_pb= backup_s.tex_pb;
2912                 }
2913                 s->last_bits= put_bits_count(&s->pb);
2914
2915                 if (CONFIG_H263_ENCODER &&
2916                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2917                     ff_h263_update_motion_val(s);
2918
2919                 if(next_block==0){ //FIXME 16 vs linesize16
2920                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2921                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2922                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2923                 }
2924
2925                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2926                     ff_MPV_decode_mb(s, s->block);
2927             } else {
2928                 int motion_x = 0, motion_y = 0;
2929                 s->mv_type=MV_TYPE_16X16;
2930                 // only one MB-Type possible
2931
2932                 switch(mb_type){
2933                 case CANDIDATE_MB_TYPE_INTRA:
2934                     s->mv_dir = 0;
2935                     s->mb_intra= 1;
2936                     motion_x= s->mv[0][0][0] = 0;
2937                     motion_y= s->mv[0][0][1] = 0;
2938                     break;
2939                 case CANDIDATE_MB_TYPE_INTER:
2940                     s->mv_dir = MV_DIR_FORWARD;
2941                     s->mb_intra= 0;
2942                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2943                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2944                     break;
2945                 case CANDIDATE_MB_TYPE_INTER_I:
2946                     s->mv_dir = MV_DIR_FORWARD;
2947                     s->mv_type = MV_TYPE_FIELD;
2948                     s->mb_intra= 0;
2949                     for(i=0; i<2; i++){
2950                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2951                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2952                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2953                     }
2954                     break;
2955                 case CANDIDATE_MB_TYPE_INTER4V:
2956                     s->mv_dir = MV_DIR_FORWARD;
2957                     s->mv_type = MV_TYPE_8X8;
2958                     s->mb_intra= 0;
2959                     for(i=0; i<4; i++){
2960                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2961                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2962                     }
2963                     break;
2964                 case CANDIDATE_MB_TYPE_DIRECT:
2965                     if (CONFIG_MPEG4_ENCODER) {
2966                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2967                         s->mb_intra= 0;
2968                         motion_x=s->b_direct_mv_table[xy][0];
2969                         motion_y=s->b_direct_mv_table[xy][1];
2970                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2971                     }
2972                     break;
2973                 case CANDIDATE_MB_TYPE_DIRECT0:
2974                     if (CONFIG_MPEG4_ENCODER) {
2975                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2976                         s->mb_intra= 0;
2977                         ff_mpeg4_set_direct_mv(s, 0, 0);
2978                     }
2979                     break;
2980                 case CANDIDATE_MB_TYPE_BIDIR:
2981                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2982                     s->mb_intra= 0;
2983                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2984                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2985                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2986                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2987                     break;
2988                 case CANDIDATE_MB_TYPE_BACKWARD:
2989                     s->mv_dir = MV_DIR_BACKWARD;
2990                     s->mb_intra= 0;
2991                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2992                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2993                     break;
2994                 case CANDIDATE_MB_TYPE_FORWARD:
2995                     s->mv_dir = MV_DIR_FORWARD;
2996                     s->mb_intra= 0;
2997                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2998                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2999                     break;
3000                 case CANDIDATE_MB_TYPE_FORWARD_I:
3001                     s->mv_dir = MV_DIR_FORWARD;
3002                     s->mv_type = MV_TYPE_FIELD;
3003                     s->mb_intra= 0;
3004                     for(i=0; i<2; i++){
3005                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3006                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3007                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3008                     }
3009                     break;
3010                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3011                     s->mv_dir = MV_DIR_BACKWARD;
3012                     s->mv_type = MV_TYPE_FIELD;
3013                     s->mb_intra= 0;
3014                     for(i=0; i<2; i++){
3015                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3016                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3017                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3018                     }
3019                     break;
3020                 case CANDIDATE_MB_TYPE_BIDIR_I:
3021                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3022                     s->mv_type = MV_TYPE_FIELD;
3023                     s->mb_intra= 0;
3024                     for(dir=0; dir<2; dir++){
3025                         for(i=0; i<2; i++){
3026                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3027                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3028                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3029                         }
3030                     }
3031                     break;
3032                 default:
3033                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3034                 }
3035
3036                 encode_mb(s, motion_x, motion_y);
3037
3038                 // RAL: Update last macroblock type
3039                 s->last_mv_dir = s->mv_dir;
3040
3041                 if (CONFIG_H263_ENCODER &&
3042                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3043                     ff_h263_update_motion_val(s);
3044
3045                 ff_MPV_decode_mb(s, s->block);
3046             }
3047
3048             /* clean the MV table in IPS frames for direct mode in B frames */
3049             if(s->mb_intra /* && I,P,S_TYPE */){
3050                 s->p_mv_table[xy][0]=0;
3051                 s->p_mv_table[xy][1]=0;
3052             }
3053
3054             if(s->flags&CODEC_FLAG_PSNR){
3055                 int w= 16;
3056                 int h= 16;
3057
3058                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3059                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3060
3061                 s->current_picture.f.error[0] += sse(
3062                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3063                     s->dest[0], w, h, s->linesize);
3064                 s->current_picture.f.error[1] += sse(
3065                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3066                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3067                 s->current_picture.f.error[2] += sse(
3068                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3069                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3070             }
3071             if(s->loop_filter){
3072                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3073                     ff_h263_loop_filter(s);
3074             }
3075             av_dlog(s->avctx, "MB %d %d bits\n",
3076                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3077         }
3078     }
3079
3080     //not beautiful here but we must write it before flushing so it has to be here
3081     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3082         ff_msmpeg4_encode_ext_header(s);
3083
3084     write_slice_end(s);
3085
3086     /* Send the last GOB if RTP */
3087     if (s->avctx->rtp_callback) {
3088         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3089         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3090         /* Call the RTP callback to send the last GOB */
3091         emms_c();
3092         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3093     }
3094
3095     return 0;
3096 }
3097
3098 #define MERGE(field) dst->field += src->field; src->field=0
3099 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3100     MERGE(me.scene_change_score);
3101     MERGE(me.mc_mb_var_sum_temp);
3102     MERGE(me.mb_var_sum_temp);
3103 }
3104
3105 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3106     int i;
3107
3108     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3109     MERGE(dct_count[1]);
3110     MERGE(mv_bits);
3111     MERGE(i_tex_bits);
3112     MERGE(p_tex_bits);
3113     MERGE(i_count);
3114     MERGE(f_count);
3115     MERGE(b_count);
3116     MERGE(skip_count);
3117     MERGE(misc_bits);
3118     MERGE(er.error_count);
3119     MERGE(padding_bug_score);
3120     MERGE(current_picture.f.error[0]);
3121     MERGE(current_picture.f.error[1]);
3122     MERGE(current_picture.f.error[2]);
3123
3124     if(dst->avctx->noise_reduction){
3125         for(i=0; i<64; i++){
3126             MERGE(dct_error_sum[0][i]);
3127             MERGE(dct_error_sum[1][i]);
3128         }
3129     }
3130
3131     assert(put_bits_count(&src->pb) % 8 ==0);
3132     assert(put_bits_count(&dst->pb) % 8 ==0);
3133     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3134     flush_put_bits(&dst->pb);
3135 }
3136
3137 static int estimate_qp(MpegEncContext *s, int dry_run){
3138     if (s->next_lambda){
3139         s->current_picture_ptr->f.quality =
3140         s->current_picture.f.quality = s->next_lambda;
3141         if(!dry_run) s->next_lambda= 0;
3142     } else if (!s->fixed_qscale) {
3143         s->current_picture_ptr->f.quality =
3144         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3145         if (s->current_picture.f.quality < 0)
3146             return -1;
3147     }
3148
3149     if(s->adaptive_quant){
3150         switch(s->codec_id){
3151         case AV_CODEC_ID_MPEG4:
3152             if (CONFIG_MPEG4_ENCODER)
3153                 ff_clean_mpeg4_qscales(s);
3154             break;
3155         case AV_CODEC_ID_H263:
3156         case AV_CODEC_ID_H263P:
3157         case AV_CODEC_ID_FLV1:
3158             if (CONFIG_H263_ENCODER)
3159                 ff_clean_h263_qscales(s);
3160             break;
3161         default:
3162             ff_init_qscale_tab(s);
3163         }
3164
3165         s->lambda= s->lambda_table[0];
3166         //FIXME broken
3167     }else
3168         s->lambda = s->current_picture.f.quality;
3169     update_qscale(s);
3170     return 0;
3171 }
3172
3173 /* must be called before writing the header */
3174 static void set_frame_distances(MpegEncContext * s){
3175     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3176     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3177
3178     if(s->pict_type==AV_PICTURE_TYPE_B){
3179         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3180         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3181     }else{
3182         s->pp_time= s->time - s->last_non_b_time;
3183         s->last_non_b_time= s->time;
3184         assert(s->picture_number==0 || s->pp_time > 0);
3185     }
3186 }
3187
3188 static int encode_picture(MpegEncContext *s, int picture_number)
3189 {
3190     int i, ret;
3191     int bits;
3192     int context_count = s->slice_context_count;
3193
3194     s->picture_number = picture_number;
3195
3196     /* Reset the average MB variance */
3197     s->me.mb_var_sum_temp    =
3198     s->me.mc_mb_var_sum_temp = 0;
3199
3200     /* we need to initialize some time vars before we can encode b-frames */
3201     // RAL: Condition added for MPEG1VIDEO
3202     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3203         set_frame_distances(s);
3204     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3205         ff_set_mpeg4_time(s);
3206
3207     s->me.scene_change_score=0;
3208
3209 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3210
3211     if(s->pict_type==AV_PICTURE_TYPE_I){
3212         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3213         else                        s->no_rounding=0;
3214     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3215         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3216             s->no_rounding ^= 1;
3217     }
3218
3219     if(s->flags & CODEC_FLAG_PASS2){
3220         if (estimate_qp(s,1) < 0)
3221             return -1;
3222         ff_get_2pass_fcode(s);
3223     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3224         if(s->pict_type==AV_PICTURE_TYPE_B)
3225             s->lambda= s->last_lambda_for[s->pict_type];
3226         else
3227             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3228         update_qscale(s);
3229     }
3230
3231     s->mb_intra=0; //for the rate distortion & bit compare functions
3232     for(i=1; i<context_count; i++){
3233         ret = ff_update_duplicate_context(s->thread_context[i], s);
3234         if (ret < 0)
3235             return ret;
3236     }
3237
3238     if(ff_init_me(s)<0)
3239         return -1;
3240
3241     /* Estimate motion for every MB */
3242     if(s->pict_type != AV_PICTURE_TYPE_I){
3243         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3244         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3245         if (s->pict_type != AV_PICTURE_TYPE_B) {
3246             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3247                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3248             }
3249         }
3250
3251         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3252     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3253         /* I-Frame */
3254         for(i=0; i<s->mb_stride*s->mb_height; i++)
3255             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3256
3257         if(!s->fixed_qscale){
3258             /* finding spatial complexity for I-frame rate control */
3259             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3260         }
3261     }
3262     for(i=1; i<context_count; i++){
3263         merge_context_after_me(s, s->thread_context[i]);
3264     }
3265     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3266     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3267     emms_c();
3268
3269     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3270         s->pict_type= AV_PICTURE_TYPE_I;
3271         for(i=0; i<s->mb_stride*s->mb_height; i++)
3272             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3273         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3274                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3275     }
3276
3277     if(!s->umvplus){
3278         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3279             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3280
3281             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3282                 int a,b;
3283                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3284                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3285                 s->f_code= FFMAX3(s->f_code, a, b);
3286             }
3287
3288             ff_fix_long_p_mvs(s);
3289             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3290             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3291                 int j;
3292                 for(i=0; i<2; i++){
3293                     for(j=0; j<2; j++)
3294                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3295                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3296                 }
3297             }
3298         }
3299
3300         if(s->pict_type==AV_PICTURE_TYPE_B){
3301             int a, b;
3302
3303             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3304             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3305             s->f_code = FFMAX(a, b);
3306
3307             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3308             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3309             s->b_code = FFMAX(a, b);
3310
3311             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3312             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3313             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3314             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3315             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3316                 int dir, j;
3317                 for(dir=0; dir<2; dir++){
3318                     for(i=0; i<2; i++){
3319                         for(j=0; j<2; j++){
3320                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3321                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3322                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3323                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3324                         }
3325                     }
3326                 }
3327             }
3328         }
3329     }
3330
3331     if (estimate_qp(s, 0) < 0)
3332         return -1;
3333
3334     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3335         s->qscale= 3; //reduce clipping problems
3336
3337     if (s->out_format == FMT_MJPEG) {
3338         /* for mjpeg, we do include qscale in the matrix */
3339         for(i=1;i<64;i++){
3340             int j= s->dsp.idct_permutation[i];
3341
3342             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3343         }
3344         s->y_dc_scale_table=
3345         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3346         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3347         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3348                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3349         s->qscale= 8;
3350     }
3351
3352     //FIXME var duplication
3353     s->current_picture_ptr->f.key_frame =
3354     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3355     s->current_picture_ptr->f.pict_type =
3356     s->current_picture.f.pict_type = s->pict_type;
3357
3358     if (s->current_picture.f.key_frame)
3359         s->picture_in_gop_number=0;
3360
3361     s->last_bits= put_bits_count(&s->pb);
3362     switch(s->out_format) {
3363     case FMT_MJPEG:
3364         if (CONFIG_MJPEG_ENCODER)
3365             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3366                                            s->intra_matrix);
3367         break;
3368     case FMT_H261:
3369         if (CONFIG_H261_ENCODER)
3370             ff_h261_encode_picture_header(s, picture_number);
3371         break;
3372     case FMT_H263:
3373         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3374             ff_wmv2_encode_picture_header(s, picture_number);
3375         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3376             ff_msmpeg4_encode_picture_header(s, picture_number);
3377         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3378             ff_mpeg4_encode_picture_header(s, picture_number);
3379         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3380             ff_rv10_encode_picture_header(s, picture_number);
3381         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3382             ff_rv20_encode_picture_header(s, picture_number);
3383         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3384             ff_flv_encode_picture_header(s, picture_number);
3385         else if (CONFIG_H263_ENCODER)
3386             ff_h263_encode_picture_header(s, picture_number);
3387         break;
3388     case FMT_MPEG1:
3389         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3390             ff_mpeg1_encode_picture_header(s, picture_number);
3391         break;
3392     default:
3393         assert(0);
3394     }
3395     bits= put_bits_count(&s->pb);
3396     s->header_bits= bits - s->last_bits;
3397
3398     for(i=1; i<context_count; i++){
3399         update_duplicate_context_after_me(s->thread_context[i], s);
3400     }
3401     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3402     for(i=1; i<context_count; i++){
3403         merge_context_after_encode(s, s->thread_context[i]);
3404     }
3405     emms_c();
3406     return 0;
3407 }
3408
3409 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3410     const int intra= s->mb_intra;
3411     int i;
3412
3413     s->dct_count[intra]++;
3414
3415     for(i=0; i<64; i++){
3416         int level= block[i];
3417
3418         if(level){
3419             if(level>0){
3420                 s->dct_error_sum[intra][i] += level;
3421                 level -= s->dct_offset[intra][i];
3422                 if(level<0) level=0;
3423             }else{
3424                 s->dct_error_sum[intra][i] -= level;
3425                 level += s->dct_offset[intra][i];
3426                 if(level>0) level=0;
3427             }
3428             block[i]= level;
3429         }
3430     }
3431 }
3432
3433 static int dct_quantize_trellis_c(MpegEncContext *s,
3434                                   int16_t *block, int n,
3435                                   int qscale, int *overflow){
3436     const int *qmat;
3437     const uint8_t *scantable= s->intra_scantable.scantable;
3438     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3439     int max=0;
3440     unsigned int threshold1, threshold2;
3441     int bias=0;
3442     int run_tab[65];
3443     int level_tab[65];
3444     int score_tab[65];
3445     int survivor[65];
3446     int survivor_count;
3447     int last_run=0;
3448     int last_level=0;
3449     int last_score= 0;
3450     int last_i;
3451     int coeff[2][64];
3452     int coeff_count[64];
3453     int qmul, qadd, start_i, last_non_zero, i, dc;
3454     const int esc_length= s->ac_esc_length;
3455     uint8_t * length;
3456     uint8_t * last_length;
3457     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3458
3459     s->dsp.fdct (block);
3460
3461     if(s->dct_error_sum)
3462         s->denoise_dct(s, block);
3463     qmul= qscale*16;
3464     qadd= ((qscale-1)|1)*8;
3465
3466     if (s->mb_intra) {
3467         int q;
3468         if (!s->h263_aic) {
3469             if (n < 4)
3470                 q = s->y_dc_scale;
3471             else
3472                 q = s->c_dc_scale;
3473             q = q << 3;
3474         } else{
3475             /* For AIC we skip quant/dequant of INTRADC */
3476             q = 1 << 3;
3477             qadd=0;
3478         }
3479
3480         /* note: block[0] is assumed to be positive */
3481         block[0] = (block[0] + (q >> 1)) / q;
3482         start_i = 1;
3483         last_non_zero = 0;
3484         qmat = s->q_intra_matrix[qscale];
3485         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3486             bias= 1<<(QMAT_SHIFT-1);
3487         length     = s->intra_ac_vlc_length;
3488         last_length= s->intra_ac_vlc_last_length;
3489     } else {
3490         start_i = 0;
3491         last_non_zero = -1;
3492         qmat = s->q_inter_matrix[qscale];
3493         length     = s->inter_ac_vlc_length;
3494         last_length= s->inter_ac_vlc_last_length;
3495     }
3496     last_i= start_i;
3497
3498     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3499     threshold2= (threshold1<<1);
3500
3501     for(i=63; i>=start_i; i--) {
3502         const int j = scantable[i];
3503         int level = block[j] * qmat[j];
3504
3505         if(((unsigned)(level+threshold1))>threshold2){
3506             last_non_zero = i;
3507             break;
3508         }
3509     }
3510
3511     for(i=start_i; i<=last_non_zero; i++) {
3512         const int j = scantable[i];
3513         int level = block[j] * qmat[j];
3514
3515 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3516 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3517         if(((unsigned)(level+threshold1))>threshold2){
3518             if(level>0){
3519                 level= (bias + level)>>QMAT_SHIFT;
3520                 coeff[0][i]= level;
3521                 coeff[1][i]= level-1;
3522 //                coeff[2][k]= level-2;
3523             }else{
3524                 level= (bias - level)>>QMAT_SHIFT;
3525                 coeff[0][i]= -level;
3526                 coeff[1][i]= -level+1;
3527 //                coeff[2][k]= -level+2;
3528             }
3529             coeff_count[i]= FFMIN(level, 2);
3530             assert(coeff_count[i]);
3531             max |=level;
3532         }else{
3533             coeff[0][i]= (level>>31)|1;
3534             coeff_count[i]= 1;
3535         }
3536     }
3537
3538     *overflow= s->max_qcoeff < max; //overflow might have happened
3539
3540     if(last_non_zero < start_i){
3541         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3542         return last_non_zero;
3543     }
3544
3545     score_tab[start_i]= 0;
3546     survivor[0]= start_i;
3547     survivor_count= 1;
3548
3549     for(i=start_i; i<=last_non_zero; i++){
3550         int level_index, j, zero_distortion;
3551         int dct_coeff= FFABS(block[ scantable[i] ]);
3552         int best_score=256*256*256*120;
3553
3554         if (s->dsp.fdct == ff_fdct_ifast)
3555             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3556         zero_distortion= dct_coeff*dct_coeff;
3557
3558         for(level_index=0; level_index < coeff_count[i]; level_index++){
3559             int distortion;
3560             int level= coeff[level_index][i];
3561             const int alevel= FFABS(level);
3562             int unquant_coeff;
3563
3564             assert(level);
3565
3566             if(s->out_format == FMT_H263){
3567                 unquant_coeff= alevel*qmul + qadd;
3568             }else{ //MPEG1
3569                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3570                 if(s->mb_intra){
3571                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3572                         unquant_coeff =   (unquant_coeff - 1) | 1;
3573                 }else{
3574                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3575                         unquant_coeff =   (unquant_coeff - 1) | 1;
3576                 }
3577                 unquant_coeff<<= 3;
3578             }
3579
3580             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3581             level+=64;
3582             if((level&(~127)) == 0){
3583                 for(j=survivor_count-1; j>=0; j--){
3584                     int run= i - survivor[j];
3585                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3586                     score += score_tab[i-run];
3587
3588                     if(score < best_score){
3589                         best_score= score;
3590                         run_tab[i+1]= run;
3591                         level_tab[i+1]= level-64;
3592                     }
3593                 }
3594
3595                 if(s->out_format == FMT_H263){
3596                     for(j=survivor_count-1; j>=0; j--){
3597                         int run= i - survivor[j];
3598                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3599                         score += score_tab[i-run];
3600                         if(score < last_score){
3601                             last_score= score;
3602                             last_run= run;
3603                             last_level= level-64;
3604                             last_i= i+1;
3605                         }
3606                     }
3607                 }
3608             }else{
3609                 distortion += esc_length*lambda;
3610                 for(j=survivor_count-1; j>=0; j--){
3611                     int run= i - survivor[j];
3612                     int score= distortion + score_tab[i-run];
3613
3614                     if(score < best_score){
3615                         best_score= score;
3616                         run_tab[i+1]= run;
3617                         level_tab[i+1]= level-64;
3618                     }
3619                 }
3620
3621                 if(s->out_format == FMT_H263){
3622                   for(j=survivor_count-1; j>=0; j--){
3623                         int run= i - survivor[j];
3624                         int score= distortion + score_tab[i-run];
3625                         if(score < last_score){
3626                             last_score= score;
3627                             last_run= run;
3628                             last_level= level-64;
3629                             last_i= i+1;
3630                         }
3631                     }
3632                 }
3633             }
3634         }
3635
3636         score_tab[i+1]= best_score;
3637
3638         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3639         if(last_non_zero <= 27){
3640             for(; survivor_count; survivor_count--){
3641                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3642                     break;
3643             }
3644         }else{
3645             for(; survivor_count; survivor_count--){
3646                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3647                     break;
3648             }
3649         }
3650
3651         survivor[ survivor_count++ ]= i+1;
3652     }
3653
3654     if(s->out_format != FMT_H263){
3655         last_score= 256*256*256*120;
3656         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3657             int score= score_tab[i];
3658             if(i) score += lambda*2; //FIXME exacter?
3659
3660             if(score < last_score){
3661                 last_score= score;
3662                 last_i= i;
3663                 last_level= level_tab[i];
3664                 last_run= run_tab[i];
3665             }
3666         }
3667     }
3668
3669     s->coded_score[n] = last_score;
3670
3671     dc= FFABS(block[0]);
3672     last_non_zero= last_i - 1;
3673     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3674
3675     if(last_non_zero < start_i)
3676         return last_non_zero;
3677
3678     if(last_non_zero == 0 && start_i == 0){
3679         int best_level= 0;
3680         int best_score= dc * dc;
3681
3682         for(i=0; i<coeff_count[0]; i++){
3683             int level= coeff[i][0];
3684             int alevel= FFABS(level);
3685             int unquant_coeff, score, distortion;
3686
3687             if(s->out_format == FMT_H263){
3688                     unquant_coeff= (alevel*qmul + qadd)>>3;
3689             }else{ //MPEG1
3690                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3691                     unquant_coeff =   (unquant_coeff - 1) | 1;
3692             }
3693             unquant_coeff = (unquant_coeff + 4) >> 3;
3694             unquant_coeff<<= 3 + 3;
3695
3696             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3697             level+=64;
3698             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3699             else                    score= distortion + esc_length*lambda;
3700
3701             if(score < best_score){
3702                 best_score= score;
3703                 best_level= level - 64;
3704             }
3705         }
3706         block[0]= best_level;
3707         s->coded_score[n] = best_score - dc*dc;
3708         if(best_level == 0) return -1;
3709         else                return last_non_zero;
3710     }
3711
3712     i= last_i;
3713     assert(last_level);
3714
3715     block[ perm_scantable[last_non_zero] ]= last_level;
3716     i -= last_run + 1;
3717
3718     for(; i>start_i; i -= run_tab[i] + 1){
3719         block[ perm_scantable[i-1] ]= level_tab[i];
3720     }
3721
3722     return last_non_zero;
3723 }
3724
3725 //#define REFINE_STATS 1
3726 static int16_t basis[64][64];
3727
3728 static void build_basis(uint8_t *perm){
3729     int i, j, x, y;
3730     emms_c();
3731     for(i=0; i<8; i++){
3732         for(j=0; j<8; j++){
3733             for(y=0; y<8; y++){
3734                 for(x=0; x<8; x++){
3735                     double s= 0.25*(1<<BASIS_SHIFT);
3736                     int index= 8*i + j;
3737                     int perm_index= perm[index];
3738                     if(i==0) s*= sqrt(0.5);
3739                     if(j==0) s*= sqrt(0.5);
3740                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3741                 }
3742             }
3743         }
3744     }
3745 }
3746
3747 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3748                         int16_t *block, int16_t *weight, int16_t *orig,
3749                         int n, int qscale){
3750     int16_t rem[64];
3751     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3752     const uint8_t *scantable= s->intra_scantable.scantable;
3753     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3754 //    unsigned int threshold1, threshold2;
3755 //    int bias=0;
3756     int run_tab[65];
3757     int prev_run=0;
3758     int prev_level=0;
3759     int qmul, qadd, start_i, last_non_zero, i, dc;
3760     uint8_t * length;
3761     uint8_t * last_length;
3762     int lambda;
3763     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3764 #ifdef REFINE_STATS
3765 static int count=0;
3766 static int after_last=0;
3767 static int to_zero=0;
3768 static int from_zero=0;
3769 static int raise=0;
3770 static int lower=0;
3771 static int messed_sign=0;
3772 #endif
3773
3774     if(basis[0][0] == 0)
3775         build_basis(s->dsp.idct_permutation);
3776
3777     qmul= qscale*2;
3778     qadd= (qscale-1)|1;
3779     if (s->mb_intra) {
3780         if (!s->h263_aic) {
3781             if (n < 4)
3782                 q = s->y_dc_scale;
3783             else
3784                 q = s->c_dc_scale;
3785         } else{
3786             /* For AIC we skip quant/dequant of INTRADC */
3787             q = 1;
3788             qadd=0;
3789         }
3790         q <<= RECON_SHIFT-3;
3791         /* note: block[0] is assumed to be positive */
3792         dc= block[0]*q;
3793 //        block[0] = (block[0] + (q >> 1)) / q;
3794         start_i = 1;
3795 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3796 //            bias= 1<<(QMAT_SHIFT-1);
3797         length     = s->intra_ac_vlc_length;
3798         last_length= s->intra_ac_vlc_last_length;
3799     } else {
3800         dc= 0;
3801         start_i = 0;
3802         length     = s->inter_ac_vlc_length;
3803         last_length= s->inter_ac_vlc_last_length;
3804     }
3805     last_non_zero = s->block_last_index[n];
3806
3807 #ifdef REFINE_STATS
3808 {START_TIMER
3809 #endif
3810     dc += (1<<(RECON_SHIFT-1));
3811     for(i=0; i<64; i++){
3812         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3813     }
3814 #ifdef REFINE_STATS
3815 STOP_TIMER("memset rem[]")}
3816 #endif
3817     sum=0;
3818     for(i=0; i<64; i++){
3819         int one= 36;
3820         int qns=4;
3821         int w;
3822
3823         w= FFABS(weight[i]) + qns*one;
3824         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3825
3826         weight[i] = w;
3827 //        w=weight[i] = (63*qns + (w/2)) / w;
3828
3829         assert(w>0);
3830         assert(w<(1<<6));
3831         sum += w*w;
3832     }
3833     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3834 #ifdef REFINE_STATS
3835 {START_TIMER
3836 #endif
3837     run=0;
3838     rle_index=0;
3839     for(i=start_i; i<=last_non_zero; i++){
3840         int j= perm_scantable[i];
3841         const int level= block[j];
3842         int coeff;
3843
3844         if(level){
3845             if(level<0) coeff= qmul*level - qadd;
3846             else        coeff= qmul*level + qadd;
3847             run_tab[rle_index++]=run;
3848             run=0;
3849
3850             s->dsp.add_8x8basis(rem, basis[j], coeff);
3851         }else{
3852             run++;
3853         }
3854     }
3855 #ifdef REFINE_STATS
3856 if(last_non_zero>0){
3857 STOP_TIMER("init rem[]")
3858 }
3859 }
3860
3861 {START_TIMER
3862 #endif
3863     for(;;){
3864         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3865         int best_coeff=0;
3866         int best_change=0;
3867         int run2, best_unquant_change=0, analyze_gradient;
3868 #ifdef REFINE_STATS
3869 {START_TIMER
3870 #endif
3871         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3872
3873         if(analyze_gradient){
3874 #ifdef REFINE_STATS
3875 {START_TIMER
3876 #endif
3877             for(i=0; i<64; i++){
3878                 int w= weight[i];
3879
3880                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3881             }
3882 #ifdef REFINE_STATS
3883 STOP_TIMER("rem*w*w")}
3884 {START_TIMER
3885 #endif
3886             s->dsp.fdct(d1);
3887 #ifdef REFINE_STATS
3888 STOP_TIMER("dct")}
3889 #endif
3890         }
3891
3892         if(start_i){
3893             const int level= block[0];
3894             int change, old_coeff;
3895
3896             assert(s->mb_intra);
3897
3898             old_coeff= q*level;
3899
3900             for(change=-1; change<=1; change+=2){
3901                 int new_level= level + change;
3902                 int score, new_coeff;
3903
3904                 new_coeff= q*new_level;
3905                 if(new_coeff >= 2048 || new_coeff < 0)
3906                     continue;
3907
3908                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3909                 if(score<best_score){
3910                     best_score= score;
3911                     best_coeff= 0;
3912                     best_change= change;
3913                     best_unquant_change= new_coeff - old_coeff;
3914                 }
3915             }
3916         }
3917
3918         run=0;
3919         rle_index=0;
3920         run2= run_tab[rle_index++];
3921         prev_level=0;
3922         prev_run=0;
3923
3924         for(i=start_i; i<64; i++){
3925             int j= perm_scantable[i];
3926             const int level= block[j];
3927             int change, old_coeff;
3928
3929             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3930                 break;
3931
3932             if(level){
3933                 if(level<0) old_coeff= qmul*level - qadd;
3934                 else        old_coeff= qmul*level + qadd;
3935                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3936             }else{
3937                 old_coeff=0;
3938                 run2--;
3939                 assert(run2>=0 || i >= last_non_zero );
3940             }
3941
3942             for(change=-1; change<=1; change+=2){
3943                 int new_level= level + change;
3944                 int score, new_coeff, unquant_change;
3945
3946                 score=0;
3947                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3948                    continue;
3949
3950                 if(new_level){
3951                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3952                     else            new_coeff= qmul*new_level + qadd;
3953                     if(new_coeff >= 2048 || new_coeff <= -2048)
3954                         continue;
3955                     //FIXME check for overflow
3956
3957                     if(level){
3958                         if(level < 63 && level > -63){
3959                             if(i < last_non_zero)
3960                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3961                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3962                             else
3963                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3964                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3965                         }
3966                     }else{
3967                         assert(FFABS(new_level)==1);
3968
3969                         if(analyze_gradient){
3970                             int g= d1[ scantable[i] ];
3971                             if(g && (g^new_level) >= 0)
3972                                 continue;
3973                         }
3974
3975                         if(i < last_non_zero){
3976                             int next_i= i + run2 + 1;
3977                             int next_level= block[ perm_scantable[next_i] ] + 64;
3978
3979                             if(next_level&(~127))
3980                                 next_level= 0;
3981
3982                             if(next_i < last_non_zero)
3983                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3984                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3985                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3986                             else
3987                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3988                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3989                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3990                         }else{
3991                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3992                             if(prev_level){
3993                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3994                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3995                             }
3996                         }
3997                     }
3998                 }else{
3999                     new_coeff=0;
4000                     assert(FFABS(level)==1);
4001
4002                     if(i < last_non_zero){
4003                         int next_i= i + run2 + 1;
4004                         int next_level= block[ perm_scantable[next_i] ] + 64;
4005
4006                         if(next_level&(~127))
4007                             next_level= 0;
4008
4009                         if(next_i < last_non_zero)
4010                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4011                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4012                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4013                         else
4014                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4015                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4016                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4017                     }else{
4018                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4019                         if(prev_level){
4020                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4021                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4022                         }
4023                     }
4024                 }
4025
4026                 score *= lambda;
4027
4028                 unquant_change= new_coeff - old_coeff;
4029                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4030
4031                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4032                 if(score<best_score){
4033                     best_score= score;
4034                     best_coeff= i;
4035                     best_change= change;
4036                     best_unquant_change= unquant_change;
4037                 }
4038             }
4039             if(level){
4040                 prev_level= level + 64;
4041                 if(prev_level&(~127))
4042                     prev_level= 0;
4043                 prev_run= run;
4044                 run=0;
4045             }else{
4046                 run++;
4047             }
4048         }
4049 #ifdef REFINE_STATS
4050 STOP_TIMER("iterative step")}
4051 #endif
4052
4053         if(best_change){
4054             int j= perm_scantable[ best_coeff ];
4055
4056             block[j] += best_change;
4057
4058             if(best_coeff > last_non_zero){
4059                 last_non_zero= best_coeff;
4060                 assert(block[j]);
4061 #ifdef REFINE_STATS
4062 after_last++;
4063 #endif
4064             }else{
4065 #ifdef REFINE_STATS
4066 if(block[j]){
4067     if(block[j] - best_change){
4068         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4069             raise++;
4070         }else{
4071             lower++;
4072         }
4073     }else{
4074         from_zero++;
4075     }
4076 }else{
4077     to_zero++;
4078 }
4079 #endif
4080                 for(; last_non_zero>=start_i; last_non_zero--){
4081                     if(block[perm_scantable[last_non_zero]])
4082                         break;
4083                 }
4084             }
4085 #ifdef REFINE_STATS
4086 count++;
4087 if(256*256*256*64 % count == 0){
4088     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4089 }
4090 #endif
4091             run=0;
4092             rle_index=0;
4093             for(i=start_i; i<=last_non_zero; i++){
4094                 int j= perm_scantable[i];
4095                 const int level= block[j];
4096
4097                  if(level){
4098                      run_tab[rle_index++]=run;
4099                      run=0;
4100                  }else{
4101                      run++;
4102                  }
4103             }
4104
4105             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4106         }else{
4107             break;
4108         }
4109     }
4110 #ifdef REFINE_STATS
4111 if(last_non_zero>0){
4112 STOP_TIMER("iterative search")
4113 }
4114 }
4115 #endif
4116
4117     return last_non_zero;
4118 }
4119
4120 int ff_dct_quantize_c(MpegEncContext *s,
4121                         int16_t *block, int n,
4122                         int qscale, int *overflow)
4123 {
4124     int i, j, level, last_non_zero, q, start_i;
4125     const int *qmat;
4126     const uint8_t *scantable= s->intra_scantable.scantable;
4127     int bias;
4128     int max=0;
4129     unsigned int threshold1, threshold2;
4130
4131     s->dsp.fdct (block);
4132
4133     if(s->dct_error_sum)
4134         s->denoise_dct(s, block);
4135
4136     if (s->mb_intra) {
4137         if (!s->h263_aic) {
4138             if (n < 4)
4139                 q = s->y_dc_scale;
4140             else
4141                 q = s->c_dc_scale;
4142             q = q << 3;
4143         } else
4144             /* For AIC we skip quant/dequant of INTRADC */
4145             q = 1 << 3;
4146
4147         /* note: block[0] is assumed to be positive */
4148         block[0] = (block[0] + (q >> 1)) / q;
4149         start_i = 1;
4150         last_non_zero = 0;
4151         qmat = s->q_intra_matrix[qscale];
4152         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4153     } else {
4154         start_i = 0;
4155         last_non_zero = -1;
4156         qmat = s->q_inter_matrix[qscale];
4157         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4158     }
4159     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4160     threshold2= (threshold1<<1);
4161     for(i=63;i>=start_i;i--) {
4162         j = scantable[i];
4163         level = block[j] * qmat[j];
4164
4165         if(((unsigned)(level+threshold1))>threshold2){
4166             last_non_zero = i;
4167             break;
4168         }else{
4169             block[j]=0;
4170         }
4171     }
4172     for(i=start_i; i<=last_non_zero; i++) {
4173         j = scantable[i];
4174         level = block[j] * qmat[j];
4175
4176 //        if(   bias+level >= (1<<QMAT_SHIFT)
4177 //           || bias-level >= (1<<QMAT_SHIFT)){
4178         if(((unsigned)(level+threshold1))>threshold2){
4179             if(level>0){
4180                 level= (bias + level)>>QMAT_SHIFT;
4181                 block[j]= level;
4182             }else{
4183                 level= (bias - level)>>QMAT_SHIFT;
4184                 block[j]= -level;
4185             }
4186             max |=level;
4187         }else{
4188             block[j]=0;
4189         }
4190     }
4191     *overflow= s->max_qcoeff < max; //overflow might have happened
4192
4193     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4194     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4195         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4196
4197     return last_non_zero;
4198 }
4199
4200 #define OFFSET(x) offsetof(MpegEncContext, x)
4201 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4202 static const AVOption h263_options[] = {
4203     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4204     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4205     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4206     FF_MPV_COMMON_OPTS
4207     { NULL },
4208 };
4209
4210 static const AVClass h263_class = {
4211     .class_name = "H.263 encoder",
4212     .item_name  = av_default_item_name,
4213     .option     = h263_options,
4214     .version    = LIBAVUTIL_VERSION_INT,
4215 };
4216
4217 AVCodec ff_h263_encoder = {
4218     .name           = "h263",
4219     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4220     .type           = AVMEDIA_TYPE_VIDEO,
4221     .id             = AV_CODEC_ID_H263,
4222     .priv_data_size = sizeof(MpegEncContext),
4223     .init           = ff_MPV_encode_init,
4224     .encode2        = ff_MPV_encode_picture,
4225     .close          = ff_MPV_encode_end,
4226     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4227     .priv_class     = &h263_class,
4228 };
4229
4230 static const AVOption h263p_options[] = {
4231     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4232     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4233     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4234     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4235     FF_MPV_COMMON_OPTS
4236     { NULL },
4237 };
4238 static const AVClass h263p_class = {
4239     .class_name = "H.263p encoder",
4240     .item_name  = av_default_item_name,
4241     .option     = h263p_options,
4242     .version    = LIBAVUTIL_VERSION_INT,
4243 };
4244
4245 AVCodec ff_h263p_encoder = {
4246     .name           = "h263p",
4247     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4248     .type           = AVMEDIA_TYPE_VIDEO,
4249     .id             = AV_CODEC_ID_H263P,
4250     .priv_data_size = sizeof(MpegEncContext),
4251     .init           = ff_MPV_encode_init,
4252     .encode2        = ff_MPV_encode_picture,
4253     .close          = ff_MPV_encode_end,
4254     .capabilities   = CODEC_CAP_SLICE_THREADS,
4255     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4256     .priv_class     = &h263p_class,
4257 };
4258
4259 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4260
4261 AVCodec ff_msmpeg4v2_encoder = {
4262     .name           = "msmpeg4v2",
4263     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4264     .type           = AVMEDIA_TYPE_VIDEO,
4265     .id             = AV_CODEC_ID_MSMPEG4V2,
4266     .priv_data_size = sizeof(MpegEncContext),
4267     .init           = ff_MPV_encode_init,
4268     .encode2        = ff_MPV_encode_picture,
4269     .close          = ff_MPV_encode_end,
4270     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4271     .priv_class     = &msmpeg4v2_class,
4272 };
4273
4274 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4275
4276 AVCodec ff_msmpeg4v3_encoder = {
4277     .name           = "msmpeg4",
4278     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4279     .type           = AVMEDIA_TYPE_VIDEO,
4280     .id             = AV_CODEC_ID_MSMPEG4V3,
4281     .priv_data_size = sizeof(MpegEncContext),
4282     .init           = ff_MPV_encode_init,
4283     .encode2        = ff_MPV_encode_picture,
4284     .close          = ff_MPV_encode_end,
4285     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4286     .priv_class     = &msmpeg4v3_class,
4287 };
4288
4289 FF_MPV_GENERIC_CLASS(wmv1)
4290
4291 AVCodec ff_wmv1_encoder = {
4292     .name           = "wmv1",
4293     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4294     .type           = AVMEDIA_TYPE_VIDEO,
4295     .id             = AV_CODEC_ID_WMV1,
4296     .priv_data_size = sizeof(MpegEncContext),
4297     .init           = ff_MPV_encode_init,
4298     .encode2        = ff_MPV_encode_picture,
4299     .close          = ff_MPV_encode_end,
4300     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4301     .priv_class     = &wmv1_class,
4302 };