]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
lavc: Edge emulation with dst/src linesize
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/internal.h"
31 #include "libavutil/intmath.h"
32 #include "libavutil/mathematics.h"
33 #include "libavutil/pixdesc.h"
34 #include "libavutil/opt.h"
35 #include "avcodec.h"
36 #include "dct.h"
37 #include "dsputil.h"
38 #include "mpeg12.h"
39 #include "mpegvideo.h"
40 #include "h261.h"
41 #include "h263.h"
42 #include "mathops.h"
43 #include "mjpegenc.h"
44 #include "msmpeg4.h"
45 #include "faandct.h"
46 #include "thread.h"
47 #include "aandcttab.h"
48 #include "flv.h"
49 #include "mpeg4video.h"
50 #include "internal.h"
51 #include "bytestream.h"
52 #include <limits.h>
53
54 static int encode_picture(MpegEncContext *s, int picture_number);
55 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
56 static int sse_mb(MpegEncContext *s);
57 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
58 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
59
60 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
61 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
62
63 const AVOption ff_mpv_generic_options[] = {
64     FF_MPV_COMMON_OPTS
65     { NULL },
66 };
67
68 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
69                        uint16_t (*qmat16)[2][64],
70                        const uint16_t *quant_matrix,
71                        int bias, int qmin, int qmax, int intra)
72 {
73     int qscale;
74     int shift = 0;
75
76     for (qscale = qmin; qscale <= qmax; qscale++) {
77         int i;
78         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
79             dsp->fdct == ff_jpeg_fdct_islow_10 ||
80             dsp->fdct == ff_faandct) {
81             for (i = 0; i < 64; i++) {
82                 const int j = dsp->idct_permutation[i];
83                 /* 16 <= qscale * quant_matrix[i] <= 7905
84                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
85                  *             19952 <=              x  <= 249205026
86                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
87                  *           3444240 >= (1 << 36) / (x) >= 275 */
88
89                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
90                                         (qscale * quant_matrix[j]));
91             }
92         } else if (dsp->fdct == ff_fdct_ifast) {
93             for (i = 0; i < 64; i++) {
94                 const int j = dsp->idct_permutation[i];
95                 /* 16 <= qscale * quant_matrix[i] <= 7905
96                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
97                  *             19952 <=              x  <= 249205026
98                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
99                  *           3444240 >= (1 << 36) / (x) >= 275 */
100
101                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
102                                         (ff_aanscales[i] * qscale *
103                                          quant_matrix[j]));
104             }
105         } else {
106             for (i = 0; i < 64; i++) {
107                 const int j = dsp->idct_permutation[i];
108                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
109                  * Assume x = qscale * quant_matrix[i]
110                  * So             16 <=              x  <= 7905
111                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
112                  * so          32768 >= (1 << 19) / (x) >= 67 */
113                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
114                                         (qscale * quant_matrix[j]));
115                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
116                 //                    (qscale * quant_matrix[i]);
117                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
118                                        (qscale * quant_matrix[j]);
119
120                 if (qmat16[qscale][0][i] == 0 ||
121                     qmat16[qscale][0][i] == 128 * 256)
122                     qmat16[qscale][0][i] = 128 * 256 - 1;
123                 qmat16[qscale][1][i] =
124                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
125                                 qmat16[qscale][0][i]);
126             }
127         }
128
129         for (i = intra; i < 64; i++) {
130             int64_t max = 8191;
131             if (dsp->fdct == ff_fdct_ifast) {
132                 max = (8191LL * ff_aanscales[i]) >> 14;
133             }
134             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
135                 shift++;
136             }
137         }
138     }
139     if (shift) {
140         av_log(NULL, AV_LOG_INFO,
141                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
142                QMAT_SHIFT - shift);
143     }
144 }
145
146 static inline void update_qscale(MpegEncContext *s)
147 {
148     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
149                 (FF_LAMBDA_SHIFT + 7);
150     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
151
152     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
153                  FF_LAMBDA_SHIFT;
154 }
155
156 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
157 {
158     int i;
159
160     if (matrix) {
161         put_bits(pb, 1, 1);
162         for (i = 0; i < 64; i++) {
163             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
164         }
165     } else
166         put_bits(pb, 1, 0);
167 }
168
169 /**
170  * init s->current_picture.qscale_table from s->lambda_table
171  */
172 void ff_init_qscale_tab(MpegEncContext *s)
173 {
174     int8_t * const qscale_table = s->current_picture.qscale_table;
175     int i;
176
177     for (i = 0; i < s->mb_num; i++) {
178         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
179         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
180         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
181                                                   s->avctx->qmax);
182     }
183 }
184
185 static void update_duplicate_context_after_me(MpegEncContext *dst,
186                                               MpegEncContext *src)
187 {
188 #define COPY(a) dst->a= src->a
189     COPY(pict_type);
190     COPY(current_picture);
191     COPY(f_code);
192     COPY(b_code);
193     COPY(qscale);
194     COPY(lambda);
195     COPY(lambda2);
196     COPY(picture_in_gop_number);
197     COPY(gop_picture_number);
198     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
199     COPY(progressive_frame);    // FIXME don't set in encode_header
200     COPY(partitioned_frame);    // FIXME don't set in encode_header
201 #undef COPY
202 }
203
204 /**
205  * Set the given MpegEncContext to defaults for encoding.
206  * the changed fields will not depend upon the prior state of the MpegEncContext.
207  */
208 static void MPV_encode_defaults(MpegEncContext *s)
209 {
210     int i;
211     ff_MPV_common_defaults(s);
212
213     for (i = -16; i < 16; i++) {
214         default_fcode_tab[i + MAX_MV] = 1;
215     }
216     s->me.mv_penalty = default_mv_penalty;
217     s->fcode_tab     = default_fcode_tab;
218 }
219
220 /* init video encoder */
221 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
222 {
223     MpegEncContext *s = avctx->priv_data;
224     int i;
225     int chroma_h_shift, chroma_v_shift;
226
227     MPV_encode_defaults(s);
228
229     switch (avctx->codec_id) {
230     case AV_CODEC_ID_MPEG2VIDEO:
231         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
232             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
233             av_log(avctx, AV_LOG_ERROR,
234                    "only YUV420 and YUV422 are supported\n");
235             return -1;
236         }
237         break;
238     case AV_CODEC_ID_LJPEG:
239         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
240             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
241             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
242             avctx->pix_fmt != AV_PIX_FMT_BGRA     &&
243             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
244               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
245               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
246              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
247             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
248             return -1;
249         }
250         break;
251     case AV_CODEC_ID_MJPEG:
252         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
253             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
254             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
255               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
256              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
257             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
258             return -1;
259         }
260         break;
261     default:
262         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
263             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
264             return -1;
265         }
266     }
267
268     switch (avctx->pix_fmt) {
269     case AV_PIX_FMT_YUVJ422P:
270     case AV_PIX_FMT_YUV422P:
271         s->chroma_format = CHROMA_422;
272         break;
273     case AV_PIX_FMT_YUVJ420P:
274     case AV_PIX_FMT_YUV420P:
275     default:
276         s->chroma_format = CHROMA_420;
277         break;
278     }
279
280     s->bit_rate = avctx->bit_rate;
281     s->width    = avctx->width;
282     s->height   = avctx->height;
283     if (avctx->gop_size > 600 &&
284         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
285         av_log(avctx, AV_LOG_ERROR,
286                "Warning keyframe interval too large! reducing it ...\n");
287         avctx->gop_size = 600;
288     }
289     s->gop_size     = avctx->gop_size;
290     s->avctx        = avctx;
291     s->flags        = avctx->flags;
292     s->flags2       = avctx->flags2;
293     if (avctx->max_b_frames > MAX_B_FRAMES) {
294         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
295                "is %d.\n", MAX_B_FRAMES);
296     }
297     s->max_b_frames = avctx->max_b_frames;
298     s->codec_id     = avctx->codec->id;
299     s->strict_std_compliance = avctx->strict_std_compliance;
300     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
301     s->mpeg_quant         = avctx->mpeg_quant;
302     s->rtp_mode           = !!avctx->rtp_payload_size;
303     s->intra_dc_precision = avctx->intra_dc_precision;
304     s->user_specified_pts = AV_NOPTS_VALUE;
305
306     if (s->gop_size <= 1) {
307         s->intra_only = 1;
308         s->gop_size   = 12;
309     } else {
310         s->intra_only = 0;
311     }
312
313     s->me_method = avctx->me_method;
314
315     /* Fixed QSCALE */
316     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
317
318     s->adaptive_quant = (s->avctx->lumi_masking ||
319                          s->avctx->dark_masking ||
320                          s->avctx->temporal_cplx_masking ||
321                          s->avctx->spatial_cplx_masking  ||
322                          s->avctx->p_masking      ||
323                          s->avctx->border_masking ||
324                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
325                         !s->fixed_qscale;
326
327     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
328
329     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
330         av_log(avctx, AV_LOG_ERROR,
331                "a vbv buffer size is needed, "
332                "for encoding with a maximum bitrate\n");
333         return -1;
334     }
335
336     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
337         av_log(avctx, AV_LOG_INFO,
338                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
339     }
340
341     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
342         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
343         return -1;
344     }
345
346     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
347         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
348         return -1;
349     }
350
351     if (avctx->rc_max_rate &&
352         avctx->rc_max_rate == avctx->bit_rate &&
353         avctx->rc_max_rate != avctx->rc_min_rate) {
354         av_log(avctx, AV_LOG_INFO,
355                "impossible bitrate constraints, this will fail\n");
356     }
357
358     if (avctx->rc_buffer_size &&
359         avctx->bit_rate * (int64_t)avctx->time_base.num >
360             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
361         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
362         return -1;
363     }
364
365     if (!s->fixed_qscale &&
366         avctx->bit_rate * av_q2d(avctx->time_base) >
367             avctx->bit_rate_tolerance) {
368         av_log(avctx, AV_LOG_ERROR,
369                "bitrate tolerance too small for bitrate\n");
370         return -1;
371     }
372
373     if (s->avctx->rc_max_rate &&
374         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
375         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
376          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
377         90000LL * (avctx->rc_buffer_size - 1) >
378             s->avctx->rc_max_rate * 0xFFFFLL) {
379         av_log(avctx, AV_LOG_INFO,
380                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
381                "specified vbv buffer is too large for the given bitrate!\n");
382     }
383
384     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
385         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
386         s->codec_id != AV_CODEC_ID_FLV1) {
387         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
388         return -1;
389     }
390
391     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
392         av_log(avctx, AV_LOG_ERROR,
393                "OBMC is only supported with simple mb decision\n");
394         return -1;
395     }
396
397     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
398         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
399         return -1;
400     }
401
402     if (s->max_b_frames                    &&
403         s->codec_id != AV_CODEC_ID_MPEG4      &&
404         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
405         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
406         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
407         return -1;
408     }
409
410     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
411          s->codec_id == AV_CODEC_ID_H263  ||
412          s->codec_id == AV_CODEC_ID_H263P) &&
413         (avctx->sample_aspect_ratio.num > 255 ||
414          avctx->sample_aspect_ratio.den > 255)) {
415         av_log(avctx, AV_LOG_ERROR,
416                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
417                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
418         return -1;
419     }
420
421     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
422         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
423         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
424         return -1;
425     }
426
427     // FIXME mpeg2 uses that too
428     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
429         av_log(avctx, AV_LOG_ERROR,
430                "mpeg2 style quantization not supported by codec\n");
431         return -1;
432     }
433
434     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
435         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
436         return -1;
437     }
438
439     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
440         s->avctx->mb_decision != FF_MB_DECISION_RD) {
441         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
442         return -1;
443     }
444
445     if (s->avctx->scenechange_threshold < 1000000000 &&
446         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
447         av_log(avctx, AV_LOG_ERROR,
448                "closed gop with scene change detection are not supported yet, "
449                "set threshold to 1000000000\n");
450         return -1;
451     }
452
453     if (s->flags & CODEC_FLAG_LOW_DELAY) {
454         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
455             av_log(avctx, AV_LOG_ERROR,
456                   "low delay forcing is only available for mpeg2\n");
457             return -1;
458         }
459         if (s->max_b_frames != 0) {
460             av_log(avctx, AV_LOG_ERROR,
461                    "b frames cannot be used with low delay\n");
462             return -1;
463         }
464     }
465
466     if (s->q_scale_type == 1) {
467         if (avctx->qmax > 12) {
468             av_log(avctx, AV_LOG_ERROR,
469                    "non linear quant only supports qmax <= 12 currently\n");
470             return -1;
471         }
472     }
473
474     if (s->avctx->thread_count > 1         &&
475         s->codec_id != AV_CODEC_ID_MPEG4      &&
476         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
477         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
478         (s->codec_id != AV_CODEC_ID_H263P)) {
479         av_log(avctx, AV_LOG_ERROR,
480                "multi threaded encoding not supported by codec\n");
481         return -1;
482     }
483
484     if (s->avctx->thread_count < 1) {
485         av_log(avctx, AV_LOG_ERROR,
486                "automatic thread number detection not supported by codec,"
487                "patch welcome\n");
488         return -1;
489     }
490
491     if (s->avctx->thread_count > 1)
492         s->rtp_mode = 1;
493
494     if (!avctx->time_base.den || !avctx->time_base.num) {
495         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
496         return -1;
497     }
498
499     i = (INT_MAX / 2 + 128) >> 8;
500     if (avctx->mb_threshold >= i) {
501         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
502                i - 1);
503         return -1;
504     }
505
506     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
507         av_log(avctx, AV_LOG_INFO,
508                "notice: b_frame_strategy only affects the first pass\n");
509         avctx->b_frame_strategy = 0;
510     }
511
512     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
513     if (i > 1) {
514         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
515         avctx->time_base.den /= i;
516         avctx->time_base.num /= i;
517         //return -1;
518     }
519
520     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
521         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
522         // (a + x * 3 / 8) / x
523         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
524         s->inter_quant_bias = 0;
525     } else {
526         s->intra_quant_bias = 0;
527         // (a - x / 4) / x
528         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
529     }
530
531     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
532         s->intra_quant_bias = avctx->intra_quant_bias;
533     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
534         s->inter_quant_bias = avctx->inter_quant_bias;
535
536     av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
537                                      &chroma_v_shift);
538
539     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
540         s->avctx->time_base.den > (1 << 16) - 1) {
541         av_log(avctx, AV_LOG_ERROR,
542                "timebase %d/%d not supported by MPEG 4 standard, "
543                "the maximum admitted value for the timebase denominator "
544                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
545                (1 << 16) - 1);
546         return -1;
547     }
548     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
549
550     switch (avctx->codec->id) {
551     case AV_CODEC_ID_MPEG1VIDEO:
552         s->out_format = FMT_MPEG1;
553         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
554         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
555         break;
556     case AV_CODEC_ID_MPEG2VIDEO:
557         s->out_format = FMT_MPEG1;
558         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
559         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
560         s->rtp_mode   = 1;
561         break;
562     case AV_CODEC_ID_LJPEG:
563     case AV_CODEC_ID_MJPEG:
564         s->out_format = FMT_MJPEG;
565         s->intra_only = 1; /* force intra only for jpeg */
566         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
567             avctx->pix_fmt   == AV_PIX_FMT_BGRA) {
568             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
569             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
570             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
571         } else {
572             s->mjpeg_vsample[0] = 2;
573             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
574             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
575             s->mjpeg_hsample[0] = 2;
576             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
577             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
578         }
579         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
580             ff_mjpeg_encode_init(s) < 0)
581             return -1;
582         avctx->delay = 0;
583         s->low_delay = 1;
584         break;
585     case AV_CODEC_ID_H261:
586         if (!CONFIG_H261_ENCODER)
587             return -1;
588         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
589             av_log(avctx, AV_LOG_ERROR,
590                    "The specified picture size of %dx%d is not valid for the "
591                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
592                     s->width, s->height);
593             return -1;
594         }
595         s->out_format = FMT_H261;
596         avctx->delay  = 0;
597         s->low_delay  = 1;
598         break;
599     case AV_CODEC_ID_H263:
600         if (!CONFIG_H263_ENCODER)
601         return -1;
602         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
603                              s->width, s->height) == 8) {
604             av_log(avctx, AV_LOG_INFO,
605                    "The specified picture size of %dx%d is not valid for "
606                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
607                    "352x288, 704x576, and 1408x1152."
608                    "Try H.263+.\n", s->width, s->height);
609             return -1;
610         }
611         s->out_format = FMT_H263;
612         avctx->delay  = 0;
613         s->low_delay  = 1;
614         break;
615     case AV_CODEC_ID_H263P:
616         s->out_format = FMT_H263;
617         s->h263_plus  = 1;
618         /* Fx */
619         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
620         s->modified_quant  = s->h263_aic;
621         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
622         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
623
624         /* /Fx */
625         /* These are just to be sure */
626         avctx->delay = 0;
627         s->low_delay = 1;
628         break;
629     case AV_CODEC_ID_FLV1:
630         s->out_format      = FMT_H263;
631         s->h263_flv        = 2; /* format = 1; 11-bit codes */
632         s->unrestricted_mv = 1;
633         s->rtp_mode  = 0; /* don't allow GOB */
634         avctx->delay = 0;
635         s->low_delay = 1;
636         break;
637     case AV_CODEC_ID_RV10:
638         s->out_format = FMT_H263;
639         avctx->delay  = 0;
640         s->low_delay  = 1;
641         break;
642     case AV_CODEC_ID_RV20:
643         s->out_format      = FMT_H263;
644         avctx->delay       = 0;
645         s->low_delay       = 1;
646         s->modified_quant  = 1;
647         s->h263_aic        = 1;
648         s->h263_plus       = 1;
649         s->loop_filter     = 1;
650         s->unrestricted_mv = 0;
651         break;
652     case AV_CODEC_ID_MPEG4:
653         s->out_format      = FMT_H263;
654         s->h263_pred       = 1;
655         s->unrestricted_mv = 1;
656         s->low_delay       = s->max_b_frames ? 0 : 1;
657         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
658         break;
659     case AV_CODEC_ID_MSMPEG4V2:
660         s->out_format      = FMT_H263;
661         s->h263_pred       = 1;
662         s->unrestricted_mv = 1;
663         s->msmpeg4_version = 2;
664         avctx->delay       = 0;
665         s->low_delay       = 1;
666         break;
667     case AV_CODEC_ID_MSMPEG4V3:
668         s->out_format        = FMT_H263;
669         s->h263_pred         = 1;
670         s->unrestricted_mv   = 1;
671         s->msmpeg4_version   = 3;
672         s->flipflop_rounding = 1;
673         avctx->delay         = 0;
674         s->low_delay         = 1;
675         break;
676     case AV_CODEC_ID_WMV1:
677         s->out_format        = FMT_H263;
678         s->h263_pred         = 1;
679         s->unrestricted_mv   = 1;
680         s->msmpeg4_version   = 4;
681         s->flipflop_rounding = 1;
682         avctx->delay         = 0;
683         s->low_delay         = 1;
684         break;
685     case AV_CODEC_ID_WMV2:
686         s->out_format        = FMT_H263;
687         s->h263_pred         = 1;
688         s->unrestricted_mv   = 1;
689         s->msmpeg4_version   = 5;
690         s->flipflop_rounding = 1;
691         avctx->delay         = 0;
692         s->low_delay         = 1;
693         break;
694     default:
695         return -1;
696     }
697
698     avctx->has_b_frames = !s->low_delay;
699
700     s->encoding = 1;
701
702     s->progressive_frame    =
703     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
704                                                 CODEC_FLAG_INTERLACED_ME) ||
705                                 s->alternate_scan);
706
707     /* init */
708     if (ff_MPV_common_init(s) < 0)
709         return -1;
710
711     if (ARCH_X86)
712         ff_MPV_encode_init_x86(s);
713
714     ff_h263dsp_init(&s->h263dsp);
715     if (!s->dct_quantize)
716         s->dct_quantize = ff_dct_quantize_c;
717     if (!s->denoise_dct)
718         s->denoise_dct  = denoise_dct_c;
719     s->fast_dct_quantize = s->dct_quantize;
720     if (avctx->trellis)
721         s->dct_quantize  = dct_quantize_trellis_c;
722
723     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
724         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
725
726     s->quant_precision = 5;
727
728     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
729     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
730
731     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
732         ff_h261_encode_init(s);
733     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
734         ff_h263_encode_init(s);
735     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
736         ff_msmpeg4_encode_init(s);
737     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
738         && s->out_format == FMT_MPEG1)
739         ff_mpeg1_encode_init(s);
740
741     /* init q matrix */
742     for (i = 0; i < 64; i++) {
743         int j = s->dsp.idct_permutation[i];
744         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
745             s->mpeg_quant) {
746             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
747             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
748         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
749             s->intra_matrix[j] =
750             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
751         } else {
752             /* mpeg1/2 */
753             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
754             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
755         }
756         if (s->avctx->intra_matrix)
757             s->intra_matrix[j] = s->avctx->intra_matrix[i];
758         if (s->avctx->inter_matrix)
759             s->inter_matrix[j] = s->avctx->inter_matrix[i];
760     }
761
762     /* precompute matrix */
763     /* for mjpeg, we do include qscale in the matrix */
764     if (s->out_format != FMT_MJPEG) {
765         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
766                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
767                           31, 1);
768         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
769                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
770                           31, 0);
771     }
772
773     if (ff_rate_control_init(s) < 0)
774         return -1;
775
776 #if FF_API_ERROR_RATE
777     FF_DISABLE_DEPRECATION_WARNINGS
778     if (avctx->error_rate)
779         s->error_rate = avctx->error_rate;
780     FF_ENABLE_DEPRECATION_WARNINGS;
781 #endif
782
783     return 0;
784 }
785
786 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
787 {
788     MpegEncContext *s = avctx->priv_data;
789
790     ff_rate_control_uninit(s);
791
792     ff_MPV_common_end(s);
793     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
794         s->out_format == FMT_MJPEG)
795         ff_mjpeg_encode_close(s);
796
797     av_freep(&avctx->extradata);
798
799     return 0;
800 }
801
802 static int get_sae(uint8_t *src, int ref, int stride)
803 {
804     int x,y;
805     int acc = 0;
806
807     for (y = 0; y < 16; y++) {
808         for (x = 0; x < 16; x++) {
809             acc += FFABS(src[x + y * stride] - ref);
810         }
811     }
812
813     return acc;
814 }
815
816 static int get_intra_count(MpegEncContext *s, uint8_t *src,
817                            uint8_t *ref, int stride)
818 {
819     int x, y, w, h;
820     int acc = 0;
821
822     w = s->width  & ~15;
823     h = s->height & ~15;
824
825     for (y = 0; y < h; y += 16) {
826         for (x = 0; x < w; x += 16) {
827             int offset = x + y * stride;
828             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
829                                      16);
830             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
831             int sae  = get_sae(src + offset, mean, stride);
832
833             acc += sae + 500 < sad;
834         }
835     }
836     return acc;
837 }
838
839
840 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
841 {
842     Picture *pic = NULL;
843     int64_t pts;
844     int i, display_picture_number = 0, ret;
845     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
846                                                  (s->low_delay ? 0 : 1);
847     int direct = 1;
848
849     if (pic_arg) {
850         pts = pic_arg->pts;
851         display_picture_number = s->input_picture_number++;
852
853         if (pts != AV_NOPTS_VALUE) {
854             if (s->user_specified_pts != AV_NOPTS_VALUE) {
855                 int64_t time = pts;
856                 int64_t last = s->user_specified_pts;
857
858                 if (time <= last) {
859                     av_log(s->avctx, AV_LOG_ERROR,
860                            "Error, Invalid timestamp=%"PRId64", "
861                            "last=%"PRId64"\n", pts, s->user_specified_pts);
862                     return -1;
863                 }
864
865                 if (!s->low_delay && display_picture_number == 1)
866                     s->dts_delta = time - last;
867             }
868             s->user_specified_pts = pts;
869         } else {
870             if (s->user_specified_pts != AV_NOPTS_VALUE) {
871                 s->user_specified_pts =
872                 pts = s->user_specified_pts + 1;
873                 av_log(s->avctx, AV_LOG_INFO,
874                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
875                        pts);
876             } else {
877                 pts = display_picture_number;
878             }
879         }
880     }
881
882     if (pic_arg) {
883         if (!pic_arg->buf[0]);
884             direct = 0;
885         if (pic_arg->linesize[0] != s->linesize)
886             direct = 0;
887         if (pic_arg->linesize[1] != s->uvlinesize)
888             direct = 0;
889         if (pic_arg->linesize[2] != s->uvlinesize)
890             direct = 0;
891
892         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
893                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
894
895         if (direct) {
896             i = ff_find_unused_picture(s, 1);
897             if (i < 0)
898                 return i;
899
900             pic = &s->picture[i];
901             pic->reference = 3;
902
903             if ((ret = av_frame_ref(&pic->f, pic_arg)) < 0)
904                 return ret;
905             if (ff_alloc_picture(s, pic, 1) < 0) {
906                 return -1;
907             }
908         } else {
909             i = ff_find_unused_picture(s, 0);
910             if (i < 0)
911                 return i;
912
913             pic = &s->picture[i];
914             pic->reference = 3;
915
916             if (ff_alloc_picture(s, pic, 0) < 0) {
917                 return -1;
918             }
919
920             if (pic->f.data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
921                 pic->f.data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
922                 pic->f.data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
923                 // empty
924             } else {
925                 int h_chroma_shift, v_chroma_shift;
926                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
927                                                  &h_chroma_shift,
928                                                  &v_chroma_shift);
929
930                 for (i = 0; i < 3; i++) {
931                     int src_stride = pic_arg->linesize[i];
932                     int dst_stride = i ? s->uvlinesize : s->linesize;
933                     int h_shift = i ? h_chroma_shift : 0;
934                     int v_shift = i ? v_chroma_shift : 0;
935                     int w = s->width  >> h_shift;
936                     int h = s->height >> v_shift;
937                     uint8_t *src = pic_arg->data[i];
938                     uint8_t *dst = pic->f.data[i];
939
940                     if (!s->avctx->rc_buffer_size)
941                         dst += INPLACE_OFFSET;
942
943                     if (src_stride == dst_stride)
944                         memcpy(dst, src, src_stride * h);
945                     else {
946                         while (h--) {
947                             memcpy(dst, src, w);
948                             dst += dst_stride;
949                             src += src_stride;
950                         }
951                     }
952                 }
953             }
954         }
955         ret = av_frame_copy_props(&pic->f, pic_arg);
956         if (ret < 0)
957             return ret;
958
959         pic->f.display_picture_number = display_picture_number;
960         pic->f.pts = pts; // we set this here to avoid modifiying pic_arg
961     }
962
963     /* shift buffer entries */
964     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
965         s->input_picture[i - 1] = s->input_picture[i];
966
967     s->input_picture[encoding_delay] = (Picture*) pic;
968
969     return 0;
970 }
971
972 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
973 {
974     int x, y, plane;
975     int score = 0;
976     int64_t score64 = 0;
977
978     for (plane = 0; plane < 3; plane++) {
979         const int stride = p->f.linesize[plane];
980         const int bw = plane ? 1 : 2;
981         for (y = 0; y < s->mb_height * bw; y++) {
982             for (x = 0; x < s->mb_width * bw; x++) {
983                 int off = p->shared ? 0 : 16;
984                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
985                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
986                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
987
988                 switch (s->avctx->frame_skip_exp) {
989                 case 0: score    =  FFMAX(score, v);          break;
990                 case 1: score   += FFABS(v);                  break;
991                 case 2: score   += v * v;                     break;
992                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
993                 case 4: score64 += v * v * (int64_t)(v * v);  break;
994                 }
995             }
996         }
997     }
998
999     if (score)
1000         score64 = score;
1001
1002     if (score64 < s->avctx->frame_skip_threshold)
1003         return 1;
1004     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1005         return 1;
1006     return 0;
1007 }
1008
1009 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1010 {
1011     AVPacket pkt = { 0 };
1012     int ret, got_output;
1013
1014     av_init_packet(&pkt);
1015     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1016     if (ret < 0)
1017         return ret;
1018
1019     ret = pkt.size;
1020     av_free_packet(&pkt);
1021     return ret;
1022 }
1023
1024 static int estimate_best_b_count(MpegEncContext *s)
1025 {
1026     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1027     AVCodecContext *c = avcodec_alloc_context3(NULL);
1028     AVFrame input[MAX_B_FRAMES + 2];
1029     const int scale = s->avctx->brd_scale;
1030     int i, j, out_size, p_lambda, b_lambda, lambda2;
1031     int64_t best_rd  = INT64_MAX;
1032     int best_b_count = -1;
1033
1034     assert(scale >= 0 && scale <= 3);
1035
1036     //emms_c();
1037     //s->next_picture_ptr->quality;
1038     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1039     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1040     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1041     if (!b_lambda) // FIXME we should do this somewhere else
1042         b_lambda = p_lambda;
1043     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1044                FF_LAMBDA_SHIFT;
1045
1046     c->width        = s->width  >> scale;
1047     c->height       = s->height >> scale;
1048     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1049                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1050     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1051     c->mb_decision  = s->avctx->mb_decision;
1052     c->me_cmp       = s->avctx->me_cmp;
1053     c->mb_cmp       = s->avctx->mb_cmp;
1054     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1055     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1056     c->time_base    = s->avctx->time_base;
1057     c->max_b_frames = s->max_b_frames;
1058
1059     if (avcodec_open2(c, codec, NULL) < 0)
1060         return -1;
1061
1062     for (i = 0; i < s->max_b_frames + 2; i++) {
1063         int ysize = c->width * c->height;
1064         int csize = (c->width / 2) * (c->height / 2);
1065         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1066                                                 s->next_picture_ptr;
1067
1068         avcodec_get_frame_defaults(&input[i]);
1069         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1070         input[i].data[1]     = input[i].data[0] + ysize;
1071         input[i].data[2]     = input[i].data[1] + csize;
1072         input[i].linesize[0] = c->width;
1073         input[i].linesize[1] =
1074         input[i].linesize[2] = c->width / 2;
1075
1076         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1077             pre_input = *pre_input_ptr;
1078
1079             if (!pre_input.shared && i) {
1080                 pre_input.f.data[0] += INPLACE_OFFSET;
1081                 pre_input.f.data[1] += INPLACE_OFFSET;
1082                 pre_input.f.data[2] += INPLACE_OFFSET;
1083             }
1084
1085             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1086                                  pre_input.f.data[0], pre_input.f.linesize[0],
1087                                  c->width,      c->height);
1088             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1089                                  pre_input.f.data[1], pre_input.f.linesize[1],
1090                                  c->width >> 1, c->height >> 1);
1091             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1092                                  pre_input.f.data[2], pre_input.f.linesize[2],
1093                                  c->width >> 1, c->height >> 1);
1094         }
1095     }
1096
1097     for (j = 0; j < s->max_b_frames + 1; j++) {
1098         int64_t rd = 0;
1099
1100         if (!s->input_picture[j])
1101             break;
1102
1103         c->error[0] = c->error[1] = c->error[2] = 0;
1104
1105         input[0].pict_type = AV_PICTURE_TYPE_I;
1106         input[0].quality   = 1 * FF_QP2LAMBDA;
1107
1108         out_size = encode_frame(c, &input[0]);
1109
1110         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1111
1112         for (i = 0; i < s->max_b_frames + 1; i++) {
1113             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1114
1115             input[i + 1].pict_type = is_p ?
1116                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1117             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1118
1119             out_size = encode_frame(c, &input[i + 1]);
1120
1121             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1122         }
1123
1124         /* get the delayed frames */
1125         while (out_size) {
1126             out_size = encode_frame(c, NULL);
1127             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1128         }
1129
1130         rd += c->error[0] + c->error[1] + c->error[2];
1131
1132         if (rd < best_rd) {
1133             best_rd = rd;
1134             best_b_count = j;
1135         }
1136     }
1137
1138     avcodec_close(c);
1139     av_freep(&c);
1140
1141     for (i = 0; i < s->max_b_frames + 2; i++) {
1142         av_freep(&input[i].data[0]);
1143     }
1144
1145     return best_b_count;
1146 }
1147
1148 static int select_input_picture(MpegEncContext *s)
1149 {
1150     int i, ret;
1151
1152     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1153         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1154     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1155
1156     /* set next picture type & ordering */
1157     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1158         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1159             s->next_picture_ptr == NULL || s->intra_only) {
1160             s->reordered_input_picture[0] = s->input_picture[0];
1161             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1162             s->reordered_input_picture[0]->f.coded_picture_number =
1163                 s->coded_picture_number++;
1164         } else {
1165             int b_frames;
1166
1167             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1168                 if (s->picture_in_gop_number < s->gop_size &&
1169                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1170                     // FIXME check that te gop check above is +-1 correct
1171                     av_frame_unref(&s->input_picture[0]->f);
1172
1173                     emms_c();
1174                     ff_vbv_update(s, 0);
1175
1176                     goto no_output_pic;
1177                 }
1178             }
1179
1180             if (s->flags & CODEC_FLAG_PASS2) {
1181                 for (i = 0; i < s->max_b_frames + 1; i++) {
1182                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1183
1184                     if (pict_num >= s->rc_context.num_entries)
1185                         break;
1186                     if (!s->input_picture[i]) {
1187                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1188                         break;
1189                     }
1190
1191                     s->input_picture[i]->f.pict_type =
1192                         s->rc_context.entry[pict_num].new_pict_type;
1193                 }
1194             }
1195
1196             if (s->avctx->b_frame_strategy == 0) {
1197                 b_frames = s->max_b_frames;
1198                 while (b_frames && !s->input_picture[b_frames])
1199                     b_frames--;
1200             } else if (s->avctx->b_frame_strategy == 1) {
1201                 for (i = 1; i < s->max_b_frames + 1; i++) {
1202                     if (s->input_picture[i] &&
1203                         s->input_picture[i]->b_frame_score == 0) {
1204                         s->input_picture[i]->b_frame_score =
1205                             get_intra_count(s,
1206                                             s->input_picture[i    ]->f.data[0],
1207                                             s->input_picture[i - 1]->f.data[0],
1208                                             s->linesize) + 1;
1209                     }
1210                 }
1211                 for (i = 0; i < s->max_b_frames + 1; i++) {
1212                     if (s->input_picture[i] == NULL ||
1213                         s->input_picture[i]->b_frame_score - 1 >
1214                             s->mb_num / s->avctx->b_sensitivity)
1215                         break;
1216                 }
1217
1218                 b_frames = FFMAX(0, i - 1);
1219
1220                 /* reset scores */
1221                 for (i = 0; i < b_frames + 1; i++) {
1222                     s->input_picture[i]->b_frame_score = 0;
1223                 }
1224             } else if (s->avctx->b_frame_strategy == 2) {
1225                 b_frames = estimate_best_b_count(s);
1226             } else {
1227                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1228                 b_frames = 0;
1229             }
1230
1231             emms_c();
1232
1233             for (i = b_frames - 1; i >= 0; i--) {
1234                 int type = s->input_picture[i]->f.pict_type;
1235                 if (type && type != AV_PICTURE_TYPE_B)
1236                     b_frames = i;
1237             }
1238             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1239                 b_frames == s->max_b_frames) {
1240                 av_log(s->avctx, AV_LOG_ERROR,
1241                        "warning, too many b frames in a row\n");
1242             }
1243
1244             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1245                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1246                     s->gop_size > s->picture_in_gop_number) {
1247                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1248                 } else {
1249                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1250                         b_frames = 0;
1251                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1252                 }
1253             }
1254
1255             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1256                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1257                 b_frames--;
1258
1259             s->reordered_input_picture[0] = s->input_picture[b_frames];
1260             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1261                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1262             s->reordered_input_picture[0]->f.coded_picture_number =
1263                 s->coded_picture_number++;
1264             for (i = 0; i < b_frames; i++) {
1265                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1266                 s->reordered_input_picture[i + 1]->f.pict_type =
1267                     AV_PICTURE_TYPE_B;
1268                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1269                     s->coded_picture_number++;
1270             }
1271         }
1272     }
1273 no_output_pic:
1274     if (s->reordered_input_picture[0]) {
1275         s->reordered_input_picture[0]->reference =
1276            s->reordered_input_picture[0]->f.pict_type !=
1277                AV_PICTURE_TYPE_B ? 3 : 0;
1278
1279         ff_mpeg_unref_picture(s, &s->new_picture);
1280         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1281             return ret;
1282
1283         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1284             // input is a shared pix, so we can't modifiy it -> alloc a new
1285             // one & ensure that the shared one is reuseable
1286
1287             Picture *pic;
1288             int i = ff_find_unused_picture(s, 0);
1289             if (i < 0)
1290                 return i;
1291             pic = &s->picture[i];
1292
1293             pic->reference = s->reordered_input_picture[0]->reference;
1294             if (ff_alloc_picture(s, pic, 0) < 0) {
1295                 return -1;
1296             }
1297
1298             ret = av_frame_copy_props(&pic->f, &s->reordered_input_picture[0]->f);
1299             if (ret < 0)
1300                 return ret;
1301
1302             /* mark us unused / free shared pic */
1303             av_frame_unref(&s->reordered_input_picture[0]->f);
1304             s->reordered_input_picture[0]->shared = 0;
1305
1306             s->current_picture_ptr = pic;
1307         } else {
1308             // input is not a shared pix -> reuse buffer for current_pix
1309             s->current_picture_ptr = s->reordered_input_picture[0];
1310             for (i = 0; i < 4; i++) {
1311                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1312             }
1313         }
1314         ff_mpeg_unref_picture(s, &s->current_picture);
1315         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1316                                        s->current_picture_ptr)) < 0)
1317             return ret;
1318
1319         s->picture_number = s->new_picture.f.display_picture_number;
1320     } else {
1321         ff_mpeg_unref_picture(s, &s->new_picture);
1322     }
1323     return 0;
1324 }
1325
1326 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1327                           const AVFrame *pic_arg, int *got_packet)
1328 {
1329     MpegEncContext *s = avctx->priv_data;
1330     int i, stuffing_count, ret;
1331     int context_count = s->slice_context_count;
1332
1333     s->picture_in_gop_number++;
1334
1335     if (load_input_picture(s, pic_arg) < 0)
1336         return -1;
1337
1338     if (select_input_picture(s) < 0) {
1339         return -1;
1340     }
1341
1342     /* output? */
1343     if (s->new_picture.f.data[0]) {
1344         if (!pkt->data &&
1345             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1346             return ret;
1347         if (s->mb_info) {
1348             s->mb_info_ptr = av_packet_new_side_data(pkt,
1349                                  AV_PKT_DATA_H263_MB_INFO,
1350                                  s->mb_width*s->mb_height*12);
1351             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1352         }
1353
1354         for (i = 0; i < context_count; i++) {
1355             int start_y = s->thread_context[i]->start_mb_y;
1356             int   end_y = s->thread_context[i]->  end_mb_y;
1357             int h       = s->mb_height;
1358             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1359             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1360
1361             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1362         }
1363
1364         s->pict_type = s->new_picture.f.pict_type;
1365         //emms_c();
1366         ff_MPV_frame_start(s, avctx);
1367 vbv_retry:
1368         if (encode_picture(s, s->picture_number) < 0)
1369             return -1;
1370
1371         avctx->header_bits = s->header_bits;
1372         avctx->mv_bits     = s->mv_bits;
1373         avctx->misc_bits   = s->misc_bits;
1374         avctx->i_tex_bits  = s->i_tex_bits;
1375         avctx->p_tex_bits  = s->p_tex_bits;
1376         avctx->i_count     = s->i_count;
1377         // FIXME f/b_count in avctx
1378         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1379         avctx->skip_count  = s->skip_count;
1380
1381         ff_MPV_frame_end(s);
1382
1383         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1384             ff_mjpeg_encode_picture_trailer(s);
1385
1386         if (avctx->rc_buffer_size) {
1387             RateControlContext *rcc = &s->rc_context;
1388             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1389
1390             if (put_bits_count(&s->pb) > max_size &&
1391                 s->lambda < s->avctx->lmax) {
1392                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1393                                        (s->qscale + 1) / s->qscale);
1394                 if (s->adaptive_quant) {
1395                     int i;
1396                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1397                         s->lambda_table[i] =
1398                             FFMAX(s->lambda_table[i] + 1,
1399                                   s->lambda_table[i] * (s->qscale + 1) /
1400                                   s->qscale);
1401                 }
1402                 s->mb_skipped = 0;        // done in MPV_frame_start()
1403                 // done in encode_picture() so we must undo it
1404                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1405                     if (s->flipflop_rounding          ||
1406                         s->codec_id == AV_CODEC_ID_H263P ||
1407                         s->codec_id == AV_CODEC_ID_MPEG4)
1408                         s->no_rounding ^= 1;
1409                 }
1410                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1411                     s->time_base       = s->last_time_base;
1412                     s->last_non_b_time = s->time - s->pp_time;
1413                 }
1414                 for (i = 0; i < context_count; i++) {
1415                     PutBitContext *pb = &s->thread_context[i]->pb;
1416                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1417                 }
1418                 goto vbv_retry;
1419             }
1420
1421             assert(s->avctx->rc_max_rate);
1422         }
1423
1424         if (s->flags & CODEC_FLAG_PASS1)
1425             ff_write_pass1_stats(s);
1426
1427         for (i = 0; i < 4; i++) {
1428             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1429             avctx->error[i] += s->current_picture_ptr->f.error[i];
1430         }
1431
1432         if (s->flags & CODEC_FLAG_PASS1)
1433             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1434                    avctx->i_tex_bits + avctx->p_tex_bits ==
1435                        put_bits_count(&s->pb));
1436         flush_put_bits(&s->pb);
1437         s->frame_bits  = put_bits_count(&s->pb);
1438
1439         stuffing_count = ff_vbv_update(s, s->frame_bits);
1440         if (stuffing_count) {
1441             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1442                     stuffing_count + 50) {
1443                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1444                 return -1;
1445             }
1446
1447             switch (s->codec_id) {
1448             case AV_CODEC_ID_MPEG1VIDEO:
1449             case AV_CODEC_ID_MPEG2VIDEO:
1450                 while (stuffing_count--) {
1451                     put_bits(&s->pb, 8, 0);
1452                 }
1453             break;
1454             case AV_CODEC_ID_MPEG4:
1455                 put_bits(&s->pb, 16, 0);
1456                 put_bits(&s->pb, 16, 0x1C3);
1457                 stuffing_count -= 4;
1458                 while (stuffing_count--) {
1459                     put_bits(&s->pb, 8, 0xFF);
1460                 }
1461             break;
1462             default:
1463                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1464             }
1465             flush_put_bits(&s->pb);
1466             s->frame_bits  = put_bits_count(&s->pb);
1467         }
1468
1469         /* update mpeg1/2 vbv_delay for CBR */
1470         if (s->avctx->rc_max_rate                          &&
1471             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1472             s->out_format == FMT_MPEG1                     &&
1473             90000LL * (avctx->rc_buffer_size - 1) <=
1474                 s->avctx->rc_max_rate * 0xFFFFLL) {
1475             int vbv_delay, min_delay;
1476             double inbits  = s->avctx->rc_max_rate *
1477                              av_q2d(s->avctx->time_base);
1478             int    minbits = s->frame_bits - 8 *
1479                              (s->vbv_delay_ptr - s->pb.buf - 1);
1480             double bits    = s->rc_context.buffer_index + minbits - inbits;
1481
1482             if (bits < 0)
1483                 av_log(s->avctx, AV_LOG_ERROR,
1484                        "Internal error, negative bits\n");
1485
1486             assert(s->repeat_first_field == 0);
1487
1488             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1489             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1490                         s->avctx->rc_max_rate;
1491
1492             vbv_delay = FFMAX(vbv_delay, min_delay);
1493
1494             assert(vbv_delay < 0xFFFF);
1495
1496             s->vbv_delay_ptr[0] &= 0xF8;
1497             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1498             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1499             s->vbv_delay_ptr[2] &= 0x07;
1500             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1501             avctx->vbv_delay     = vbv_delay * 300;
1502         }
1503         s->total_bits     += s->frame_bits;
1504         avctx->frame_bits  = s->frame_bits;
1505
1506         pkt->pts = s->current_picture.f.pts;
1507         if (!s->low_delay) {
1508             if (!s->current_picture.f.coded_picture_number)
1509                 pkt->dts = pkt->pts - s->dts_delta;
1510             else
1511                 pkt->dts = s->reordered_pts;
1512             s->reordered_pts = s->input_picture[0]->f.pts;
1513         } else
1514             pkt->dts = pkt->pts;
1515         if (s->current_picture.f.key_frame)
1516             pkt->flags |= AV_PKT_FLAG_KEY;
1517         if (s->mb_info)
1518             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1519     } else {
1520         s->frame_bits = 0;
1521     }
1522     assert((s->frame_bits & 7) == 0);
1523
1524     pkt->size = s->frame_bits / 8;
1525     *got_packet = !!pkt->size;
1526     return 0;
1527 }
1528
1529 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1530                                                 int n, int threshold)
1531 {
1532     static const char tab[64] = {
1533         3, 2, 2, 1, 1, 1, 1, 1,
1534         1, 1, 1, 1, 1, 1, 1, 1,
1535         1, 1, 1, 1, 1, 1, 1, 1,
1536         0, 0, 0, 0, 0, 0, 0, 0,
1537         0, 0, 0, 0, 0, 0, 0, 0,
1538         0, 0, 0, 0, 0, 0, 0, 0,
1539         0, 0, 0, 0, 0, 0, 0, 0,
1540         0, 0, 0, 0, 0, 0, 0, 0
1541     };
1542     int score = 0;
1543     int run = 0;
1544     int i;
1545     int16_t *block = s->block[n];
1546     const int last_index = s->block_last_index[n];
1547     int skip_dc;
1548
1549     if (threshold < 0) {
1550         skip_dc = 0;
1551         threshold = -threshold;
1552     } else
1553         skip_dc = 1;
1554
1555     /* Are all we could set to zero already zero? */
1556     if (last_index <= skip_dc - 1)
1557         return;
1558
1559     for (i = 0; i <= last_index; i++) {
1560         const int j = s->intra_scantable.permutated[i];
1561         const int level = FFABS(block[j]);
1562         if (level == 1) {
1563             if (skip_dc && i == 0)
1564                 continue;
1565             score += tab[run];
1566             run = 0;
1567         } else if (level > 1) {
1568             return;
1569         } else {
1570             run++;
1571         }
1572     }
1573     if (score >= threshold)
1574         return;
1575     for (i = skip_dc; i <= last_index; i++) {
1576         const int j = s->intra_scantable.permutated[i];
1577         block[j] = 0;
1578     }
1579     if (block[0])
1580         s->block_last_index[n] = 0;
1581     else
1582         s->block_last_index[n] = -1;
1583 }
1584
1585 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1586                                int last_index)
1587 {
1588     int i;
1589     const int maxlevel = s->max_qcoeff;
1590     const int minlevel = s->min_qcoeff;
1591     int overflow = 0;
1592
1593     if (s->mb_intra) {
1594         i = 1; // skip clipping of intra dc
1595     } else
1596         i = 0;
1597
1598     for (; i <= last_index; i++) {
1599         const int j = s->intra_scantable.permutated[i];
1600         int level = block[j];
1601
1602         if (level > maxlevel) {
1603             level = maxlevel;
1604             overflow++;
1605         } else if (level < minlevel) {
1606             level = minlevel;
1607             overflow++;
1608         }
1609
1610         block[j] = level;
1611     }
1612
1613     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1614         av_log(s->avctx, AV_LOG_INFO,
1615                "warning, clipping %d dct coefficients to %d..%d\n",
1616                overflow, minlevel, maxlevel);
1617 }
1618
1619 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1620 {
1621     int x, y;
1622     // FIXME optimize
1623     for (y = 0; y < 8; y++) {
1624         for (x = 0; x < 8; x++) {
1625             int x2, y2;
1626             int sum = 0;
1627             int sqr = 0;
1628             int count = 0;
1629
1630             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1631                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1632                     int v = ptr[x2 + y2 * stride];
1633                     sum += v;
1634                     sqr += v * v;
1635                     count++;
1636                 }
1637             }
1638             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1639         }
1640     }
1641 }
1642
1643 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1644                                                 int motion_x, int motion_y,
1645                                                 int mb_block_height,
1646                                                 int mb_block_count)
1647 {
1648     int16_t weight[8][64];
1649     int16_t orig[8][64];
1650     const int mb_x = s->mb_x;
1651     const int mb_y = s->mb_y;
1652     int i;
1653     int skip_dct[8];
1654     int dct_offset = s->linesize * 8; // default for progressive frames
1655     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1656     ptrdiff_t wrap_y, wrap_c;
1657
1658     for (i = 0; i < mb_block_count; i++)
1659         skip_dct[i] = s->skipdct;
1660
1661     if (s->adaptive_quant) {
1662         const int last_qp = s->qscale;
1663         const int mb_xy = mb_x + mb_y * s->mb_stride;
1664
1665         s->lambda = s->lambda_table[mb_xy];
1666         update_qscale(s);
1667
1668         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1669             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1670             s->dquant = s->qscale - last_qp;
1671
1672             if (s->out_format == FMT_H263) {
1673                 s->dquant = av_clip(s->dquant, -2, 2);
1674
1675                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1676                     if (!s->mb_intra) {
1677                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1678                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1679                                 s->dquant = 0;
1680                         }
1681                         if (s->mv_type == MV_TYPE_8X8)
1682                             s->dquant = 0;
1683                     }
1684                 }
1685             }
1686         }
1687         ff_set_qscale(s, last_qp + s->dquant);
1688     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1689         ff_set_qscale(s, s->qscale + s->dquant);
1690
1691     wrap_y = s->linesize;
1692     wrap_c = s->uvlinesize;
1693     ptr_y  = s->new_picture.f.data[0] +
1694              (mb_y * 16 * wrap_y)              + mb_x * 16;
1695     ptr_cb = s->new_picture.f.data[1] +
1696              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1697     ptr_cr = s->new_picture.f.data[2] +
1698              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1699
1700     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1701         uint8_t *ebuf = s->edge_emu_buffer + 32;
1702         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1703                                  wrap_y, wrap_y,
1704                                  16, 16, mb_x * 16, mb_y * 16,
1705                                  s->width, s->height);
1706         ptr_y = ebuf;
1707         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1708                                  wrap_c, wrap_c,
1709                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1710                                  s->width >> 1, s->height >> 1);
1711         ptr_cb = ebuf + 18 * wrap_y;
1712         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1713                                  wrap_c, wrap_c,
1714                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1715                                  s->width >> 1, s->height >> 1);
1716         ptr_cr = ebuf + 18 * wrap_y + 8;
1717     }
1718
1719     if (s->mb_intra) {
1720         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1721             int progressive_score, interlaced_score;
1722
1723             s->interlaced_dct = 0;
1724             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1725                                                     NULL, wrap_y, 8) +
1726                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1727                                                     NULL, wrap_y, 8) - 400;
1728
1729             if (progressive_score > 0) {
1730                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1731                                                        NULL, wrap_y * 2, 8) +
1732                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1733                                                        NULL, wrap_y * 2, 8);
1734                 if (progressive_score > interlaced_score) {
1735                     s->interlaced_dct = 1;
1736
1737                     dct_offset = wrap_y;
1738                     wrap_y <<= 1;
1739                     if (s->chroma_format == CHROMA_422)
1740                         wrap_c <<= 1;
1741                 }
1742             }
1743         }
1744
1745         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1746         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1747         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1748         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1749
1750         if (s->flags & CODEC_FLAG_GRAY) {
1751             skip_dct[4] = 1;
1752             skip_dct[5] = 1;
1753         } else {
1754             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1755             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1756             if (!s->chroma_y_shift) { /* 422 */
1757                 s->dsp.get_pixels(s->block[6],
1758                                   ptr_cb + (dct_offset >> 1), wrap_c);
1759                 s->dsp.get_pixels(s->block[7],
1760                                   ptr_cr + (dct_offset >> 1), wrap_c);
1761             }
1762         }
1763     } else {
1764         op_pixels_func (*op_pix)[4];
1765         qpel_mc_func (*op_qpix)[16];
1766         uint8_t *dest_y, *dest_cb, *dest_cr;
1767
1768         dest_y  = s->dest[0];
1769         dest_cb = s->dest[1];
1770         dest_cr = s->dest[2];
1771
1772         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1773             op_pix  = s->hdsp.put_pixels_tab;
1774             op_qpix = s->dsp.put_qpel_pixels_tab;
1775         } else {
1776             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1777             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1778         }
1779
1780         if (s->mv_dir & MV_DIR_FORWARD) {
1781             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1782                           s->last_picture.f.data,
1783                           op_pix, op_qpix);
1784             op_pix  = s->hdsp.avg_pixels_tab;
1785             op_qpix = s->dsp.avg_qpel_pixels_tab;
1786         }
1787         if (s->mv_dir & MV_DIR_BACKWARD) {
1788             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1789                           s->next_picture.f.data,
1790                           op_pix, op_qpix);
1791         }
1792
1793         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1794             int progressive_score, interlaced_score;
1795
1796             s->interlaced_dct = 0;
1797             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1798                                                     ptr_y,              wrap_y,
1799                                                     8) +
1800                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1801                                                     ptr_y + wrap_y * 8, wrap_y,
1802                                                     8) - 400;
1803
1804             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1805                 progressive_score -= 400;
1806
1807             if (progressive_score > 0) {
1808                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1809                                                        ptr_y,
1810                                                        wrap_y * 2, 8) +
1811                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1812                                                        ptr_y + wrap_y,
1813                                                        wrap_y * 2, 8);
1814
1815                 if (progressive_score > interlaced_score) {
1816                     s->interlaced_dct = 1;
1817
1818                     dct_offset = wrap_y;
1819                     wrap_y <<= 1;
1820                     if (s->chroma_format == CHROMA_422)
1821                         wrap_c <<= 1;
1822                 }
1823             }
1824         }
1825
1826         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1827         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1828         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1829                            dest_y + dct_offset, wrap_y);
1830         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1831                            dest_y + dct_offset + 8, wrap_y);
1832
1833         if (s->flags & CODEC_FLAG_GRAY) {
1834             skip_dct[4] = 1;
1835             skip_dct[5] = 1;
1836         } else {
1837             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1838             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1839             if (!s->chroma_y_shift) { /* 422 */
1840                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1841                                    dest_cb + (dct_offset >> 1), wrap_c);
1842                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1843                                    dest_cr + (dct_offset >> 1), wrap_c);
1844             }
1845         }
1846         /* pre quantization */
1847         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1848                 2 * s->qscale * s->qscale) {
1849             // FIXME optimize
1850             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1851                               wrap_y, 8) < 20 * s->qscale)
1852                 skip_dct[0] = 1;
1853             if (s->dsp.sad[1](NULL, ptr_y + 8,
1854                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1855                 skip_dct[1] = 1;
1856             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1857                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1858                 skip_dct[2] = 1;
1859             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1860                               dest_y + dct_offset + 8,
1861                               wrap_y, 8) < 20 * s->qscale)
1862                 skip_dct[3] = 1;
1863             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1864                               wrap_c, 8) < 20 * s->qscale)
1865                 skip_dct[4] = 1;
1866             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1867                               wrap_c, 8) < 20 * s->qscale)
1868                 skip_dct[5] = 1;
1869             if (!s->chroma_y_shift) { /* 422 */
1870                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1871                                   dest_cb + (dct_offset >> 1),
1872                                   wrap_c, 8) < 20 * s->qscale)
1873                     skip_dct[6] = 1;
1874                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1875                                   dest_cr + (dct_offset >> 1),
1876                                   wrap_c, 8) < 20 * s->qscale)
1877                     skip_dct[7] = 1;
1878             }
1879         }
1880     }
1881
1882     if (s->quantizer_noise_shaping) {
1883         if (!skip_dct[0])
1884             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1885         if (!skip_dct[1])
1886             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1887         if (!skip_dct[2])
1888             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1889         if (!skip_dct[3])
1890             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1891         if (!skip_dct[4])
1892             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1893         if (!skip_dct[5])
1894             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1895         if (!s->chroma_y_shift) { /* 422 */
1896             if (!skip_dct[6])
1897                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1898                                   wrap_c);
1899             if (!skip_dct[7])
1900                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1901                                   wrap_c);
1902         }
1903         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
1904     }
1905
1906     /* DCT & quantize */
1907     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1908     {
1909         for (i = 0; i < mb_block_count; i++) {
1910             if (!skip_dct[i]) {
1911                 int overflow;
1912                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1913                 // FIXME we could decide to change to quantizer instead of
1914                 // clipping
1915                 // JS: I don't think that would be a good idea it could lower
1916                 //     quality instead of improve it. Just INTRADC clipping
1917                 //     deserves changes in quantizer
1918                 if (overflow)
1919                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
1920             } else
1921                 s->block_last_index[i] = -1;
1922         }
1923         if (s->quantizer_noise_shaping) {
1924             for (i = 0; i < mb_block_count; i++) {
1925                 if (!skip_dct[i]) {
1926                     s->block_last_index[i] =
1927                         dct_quantize_refine(s, s->block[i], weight[i],
1928                                             orig[i], i, s->qscale);
1929                 }
1930             }
1931         }
1932
1933         if (s->luma_elim_threshold && !s->mb_intra)
1934             for (i = 0; i < 4; i++)
1935                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
1936         if (s->chroma_elim_threshold && !s->mb_intra)
1937             for (i = 4; i < mb_block_count; i++)
1938                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
1939
1940         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
1941             for (i = 0; i < mb_block_count; i++) {
1942                 if (s->block_last_index[i] == -1)
1943                     s->coded_score[i] = INT_MAX / 256;
1944             }
1945         }
1946     }
1947
1948     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
1949         s->block_last_index[4] =
1950         s->block_last_index[5] = 0;
1951         s->block[4][0] =
1952         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
1953     }
1954
1955     // non c quantize code returns incorrect block_last_index FIXME
1956     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
1957         for (i = 0; i < mb_block_count; i++) {
1958             int j;
1959             if (s->block_last_index[i] > 0) {
1960                 for (j = 63; j > 0; j--) {
1961                     if (s->block[i][s->intra_scantable.permutated[j]])
1962                         break;
1963                 }
1964                 s->block_last_index[i] = j;
1965             }
1966         }
1967     }
1968
1969     /* huffman encode */
1970     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
1971     case AV_CODEC_ID_MPEG1VIDEO:
1972     case AV_CODEC_ID_MPEG2VIDEO:
1973         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
1974             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
1975         break;
1976     case AV_CODEC_ID_MPEG4:
1977         if (CONFIG_MPEG4_ENCODER)
1978             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
1979         break;
1980     case AV_CODEC_ID_MSMPEG4V2:
1981     case AV_CODEC_ID_MSMPEG4V3:
1982     case AV_CODEC_ID_WMV1:
1983         if (CONFIG_MSMPEG4_ENCODER)
1984             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
1985         break;
1986     case AV_CODEC_ID_WMV2:
1987         if (CONFIG_WMV2_ENCODER)
1988             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
1989         break;
1990     case AV_CODEC_ID_H261:
1991         if (CONFIG_H261_ENCODER)
1992             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
1993         break;
1994     case AV_CODEC_ID_H263:
1995     case AV_CODEC_ID_H263P:
1996     case AV_CODEC_ID_FLV1:
1997     case AV_CODEC_ID_RV10:
1998     case AV_CODEC_ID_RV20:
1999         if (CONFIG_H263_ENCODER)
2000             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2001         break;
2002     case AV_CODEC_ID_MJPEG:
2003         if (CONFIG_MJPEG_ENCODER)
2004             ff_mjpeg_encode_mb(s, s->block);
2005         break;
2006     default:
2007         assert(0);
2008     }
2009 }
2010
2011 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2012 {
2013     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2014     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2015 }
2016
2017 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2018     int i;
2019
2020     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2021
2022     /* mpeg1 */
2023     d->mb_skip_run= s->mb_skip_run;
2024     for(i=0; i<3; i++)
2025         d->last_dc[i] = s->last_dc[i];
2026
2027     /* statistics */
2028     d->mv_bits= s->mv_bits;
2029     d->i_tex_bits= s->i_tex_bits;
2030     d->p_tex_bits= s->p_tex_bits;
2031     d->i_count= s->i_count;
2032     d->f_count= s->f_count;
2033     d->b_count= s->b_count;
2034     d->skip_count= s->skip_count;
2035     d->misc_bits= s->misc_bits;
2036     d->last_bits= 0;
2037
2038     d->mb_skipped= 0;
2039     d->qscale= s->qscale;
2040     d->dquant= s->dquant;
2041
2042     d->esc3_level_length= s->esc3_level_length;
2043 }
2044
2045 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2046     int i;
2047
2048     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2049     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2050
2051     /* mpeg1 */
2052     d->mb_skip_run= s->mb_skip_run;
2053     for(i=0; i<3; i++)
2054         d->last_dc[i] = s->last_dc[i];
2055
2056     /* statistics */
2057     d->mv_bits= s->mv_bits;
2058     d->i_tex_bits= s->i_tex_bits;
2059     d->p_tex_bits= s->p_tex_bits;
2060     d->i_count= s->i_count;
2061     d->f_count= s->f_count;
2062     d->b_count= s->b_count;
2063     d->skip_count= s->skip_count;
2064     d->misc_bits= s->misc_bits;
2065
2066     d->mb_intra= s->mb_intra;
2067     d->mb_skipped= s->mb_skipped;
2068     d->mv_type= s->mv_type;
2069     d->mv_dir= s->mv_dir;
2070     d->pb= s->pb;
2071     if(s->data_partitioning){
2072         d->pb2= s->pb2;
2073         d->tex_pb= s->tex_pb;
2074     }
2075     d->block= s->block;
2076     for(i=0; i<8; i++)
2077         d->block_last_index[i]= s->block_last_index[i];
2078     d->interlaced_dct= s->interlaced_dct;
2079     d->qscale= s->qscale;
2080
2081     d->esc3_level_length= s->esc3_level_length;
2082 }
2083
2084 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2085                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2086                            int *dmin, int *next_block, int motion_x, int motion_y)
2087 {
2088     int score;
2089     uint8_t *dest_backup[3];
2090
2091     copy_context_before_encode(s, backup, type);
2092
2093     s->block= s->blocks[*next_block];
2094     s->pb= pb[*next_block];
2095     if(s->data_partitioning){
2096         s->pb2   = pb2   [*next_block];
2097         s->tex_pb= tex_pb[*next_block];
2098     }
2099
2100     if(*next_block){
2101         memcpy(dest_backup, s->dest, sizeof(s->dest));
2102         s->dest[0] = s->rd_scratchpad;
2103         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2104         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2105         assert(s->linesize >= 32); //FIXME
2106     }
2107
2108     encode_mb(s, motion_x, motion_y);
2109
2110     score= put_bits_count(&s->pb);
2111     if(s->data_partitioning){
2112         score+= put_bits_count(&s->pb2);
2113         score+= put_bits_count(&s->tex_pb);
2114     }
2115
2116     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2117         ff_MPV_decode_mb(s, s->block);
2118
2119         score *= s->lambda2;
2120         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2121     }
2122
2123     if(*next_block){
2124         memcpy(s->dest, dest_backup, sizeof(s->dest));
2125     }
2126
2127     if(score<*dmin){
2128         *dmin= score;
2129         *next_block^=1;
2130
2131         copy_context_after_encode(best, s, type);
2132     }
2133 }
2134
2135 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2136     uint32_t *sq = ff_squareTbl + 256;
2137     int acc=0;
2138     int x,y;
2139
2140     if(w==16 && h==16)
2141         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2142     else if(w==8 && h==8)
2143         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2144
2145     for(y=0; y<h; y++){
2146         for(x=0; x<w; x++){
2147             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2148         }
2149     }
2150
2151     assert(acc>=0);
2152
2153     return acc;
2154 }
2155
2156 static int sse_mb(MpegEncContext *s){
2157     int w= 16;
2158     int h= 16;
2159
2160     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2161     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2162
2163     if(w==16 && h==16)
2164       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2165         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2166                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2167                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2168       }else{
2169         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2170                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2171                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2172       }
2173     else
2174         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2175                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2176                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2177 }
2178
2179 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2180     MpegEncContext *s= *(void**)arg;
2181
2182
2183     s->me.pre_pass=1;
2184     s->me.dia_size= s->avctx->pre_dia_size;
2185     s->first_slice_line=1;
2186     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2187         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2188             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2189         }
2190         s->first_slice_line=0;
2191     }
2192
2193     s->me.pre_pass=0;
2194
2195     return 0;
2196 }
2197
2198 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2199     MpegEncContext *s= *(void**)arg;
2200
2201     ff_check_alignment();
2202
2203     s->me.dia_size= s->avctx->dia_size;
2204     s->first_slice_line=1;
2205     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2206         s->mb_x=0; //for block init below
2207         ff_init_block_index(s);
2208         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2209             s->block_index[0]+=2;
2210             s->block_index[1]+=2;
2211             s->block_index[2]+=2;
2212             s->block_index[3]+=2;
2213
2214             /* compute motion vector & mb_type and store in context */
2215             if(s->pict_type==AV_PICTURE_TYPE_B)
2216                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2217             else
2218                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2219         }
2220         s->first_slice_line=0;
2221     }
2222     return 0;
2223 }
2224
2225 static int mb_var_thread(AVCodecContext *c, void *arg){
2226     MpegEncContext *s= *(void**)arg;
2227     int mb_x, mb_y;
2228
2229     ff_check_alignment();
2230
2231     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2232         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2233             int xx = mb_x * 16;
2234             int yy = mb_y * 16;
2235             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2236             int varc;
2237             int sum = s->dsp.pix_sum(pix, s->linesize);
2238
2239             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2240
2241             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2242             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2243             s->me.mb_var_sum_temp    += varc;
2244         }
2245     }
2246     return 0;
2247 }
2248
2249 static void write_slice_end(MpegEncContext *s){
2250     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2251         if(s->partitioned_frame){
2252             ff_mpeg4_merge_partitions(s);
2253         }
2254
2255         ff_mpeg4_stuffing(&s->pb);
2256     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2257         ff_mjpeg_encode_stuffing(&s->pb);
2258     }
2259
2260     avpriv_align_put_bits(&s->pb);
2261     flush_put_bits(&s->pb);
2262
2263     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2264         s->misc_bits+= get_bits_diff(s);
2265 }
2266
2267 static void write_mb_info(MpegEncContext *s)
2268 {
2269     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2270     int offset = put_bits_count(&s->pb);
2271     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2272     int gobn = s->mb_y / s->gob_index;
2273     int pred_x, pred_y;
2274     if (CONFIG_H263_ENCODER)
2275         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2276     bytestream_put_le32(&ptr, offset);
2277     bytestream_put_byte(&ptr, s->qscale);
2278     bytestream_put_byte(&ptr, gobn);
2279     bytestream_put_le16(&ptr, mba);
2280     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2281     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2282     /* 4MV not implemented */
2283     bytestream_put_byte(&ptr, 0); /* hmv2 */
2284     bytestream_put_byte(&ptr, 0); /* vmv2 */
2285 }
2286
2287 static void update_mb_info(MpegEncContext *s, int startcode)
2288 {
2289     if (!s->mb_info)
2290         return;
2291     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2292         s->mb_info_size += 12;
2293         s->prev_mb_info = s->last_mb_info;
2294     }
2295     if (startcode) {
2296         s->prev_mb_info = put_bits_count(&s->pb)/8;
2297         /* This might have incremented mb_info_size above, and we return without
2298          * actually writing any info into that slot yet. But in that case,
2299          * this will be called again at the start of the after writing the
2300          * start code, actually writing the mb info. */
2301         return;
2302     }
2303
2304     s->last_mb_info = put_bits_count(&s->pb)/8;
2305     if (!s->mb_info_size)
2306         s->mb_info_size += 12;
2307     write_mb_info(s);
2308 }
2309
2310 static int encode_thread(AVCodecContext *c, void *arg){
2311     MpegEncContext *s= *(void**)arg;
2312     int mb_x, mb_y, pdif = 0;
2313     int chr_h= 16>>s->chroma_y_shift;
2314     int i, j;
2315     MpegEncContext best_s, backup_s;
2316     uint8_t bit_buf[2][MAX_MB_BYTES];
2317     uint8_t bit_buf2[2][MAX_MB_BYTES];
2318     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2319     PutBitContext pb[2], pb2[2], tex_pb[2];
2320
2321     ff_check_alignment();
2322
2323     for(i=0; i<2; i++){
2324         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2325         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2326         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2327     }
2328
2329     s->last_bits= put_bits_count(&s->pb);
2330     s->mv_bits=0;
2331     s->misc_bits=0;
2332     s->i_tex_bits=0;
2333     s->p_tex_bits=0;
2334     s->i_count=0;
2335     s->f_count=0;
2336     s->b_count=0;
2337     s->skip_count=0;
2338
2339     for(i=0; i<3; i++){
2340         /* init last dc values */
2341         /* note: quant matrix value (8) is implied here */
2342         s->last_dc[i] = 128 << s->intra_dc_precision;
2343
2344         s->current_picture.f.error[i] = 0;
2345     }
2346     s->mb_skip_run = 0;
2347     memset(s->last_mv, 0, sizeof(s->last_mv));
2348
2349     s->last_mv_dir = 0;
2350
2351     switch(s->codec_id){
2352     case AV_CODEC_ID_H263:
2353     case AV_CODEC_ID_H263P:
2354     case AV_CODEC_ID_FLV1:
2355         if (CONFIG_H263_ENCODER)
2356             s->gob_index = ff_h263_get_gob_height(s);
2357         break;
2358     case AV_CODEC_ID_MPEG4:
2359         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2360             ff_mpeg4_init_partitions(s);
2361         break;
2362     }
2363
2364     s->resync_mb_x=0;
2365     s->resync_mb_y=0;
2366     s->first_slice_line = 1;
2367     s->ptr_lastgob = s->pb.buf;
2368     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2369         s->mb_x=0;
2370         s->mb_y= mb_y;
2371
2372         ff_set_qscale(s, s->qscale);
2373         ff_init_block_index(s);
2374
2375         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2376             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2377             int mb_type= s->mb_type[xy];
2378 //            int d;
2379             int dmin= INT_MAX;
2380             int dir;
2381
2382             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2383                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2384                 return -1;
2385             }
2386             if(s->data_partitioning){
2387                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2388                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2389                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2390                     return -1;
2391                 }
2392             }
2393
2394             s->mb_x = mb_x;
2395             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2396             ff_update_block_index(s);
2397
2398             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2399                 ff_h261_reorder_mb_index(s);
2400                 xy= s->mb_y*s->mb_stride + s->mb_x;
2401                 mb_type= s->mb_type[xy];
2402             }
2403
2404             /* write gob / video packet header  */
2405             if(s->rtp_mode){
2406                 int current_packet_size, is_gob_start;
2407
2408                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2409
2410                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2411
2412                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2413
2414                 switch(s->codec_id){
2415                 case AV_CODEC_ID_H263:
2416                 case AV_CODEC_ID_H263P:
2417                     if(!s->h263_slice_structured)
2418                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2419                     break;
2420                 case AV_CODEC_ID_MPEG2VIDEO:
2421                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2422                 case AV_CODEC_ID_MPEG1VIDEO:
2423                     if(s->mb_skip_run) is_gob_start=0;
2424                     break;
2425                 }
2426
2427                 if(is_gob_start){
2428                     if(s->start_mb_y != mb_y || mb_x!=0){
2429                         write_slice_end(s);
2430
2431                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2432                             ff_mpeg4_init_partitions(s);
2433                         }
2434                     }
2435
2436                     assert((put_bits_count(&s->pb)&7) == 0);
2437                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2438
2439                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2440                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2441                         int d = 100 / s->error_rate;
2442                         if(r % d == 0){
2443                             current_packet_size=0;
2444                             s->pb.buf_ptr= s->ptr_lastgob;
2445                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2446                         }
2447                     }
2448
2449                     if (s->avctx->rtp_callback){
2450                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2451                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2452                     }
2453                     update_mb_info(s, 1);
2454
2455                     switch(s->codec_id){
2456                     case AV_CODEC_ID_MPEG4:
2457                         if (CONFIG_MPEG4_ENCODER) {
2458                             ff_mpeg4_encode_video_packet_header(s);
2459                             ff_mpeg4_clean_buffers(s);
2460                         }
2461                     break;
2462                     case AV_CODEC_ID_MPEG1VIDEO:
2463                     case AV_CODEC_ID_MPEG2VIDEO:
2464                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2465                             ff_mpeg1_encode_slice_header(s);
2466                             ff_mpeg1_clean_buffers(s);
2467                         }
2468                     break;
2469                     case AV_CODEC_ID_H263:
2470                     case AV_CODEC_ID_H263P:
2471                         if (CONFIG_H263_ENCODER)
2472                             ff_h263_encode_gob_header(s, mb_y);
2473                     break;
2474                     }
2475
2476                     if(s->flags&CODEC_FLAG_PASS1){
2477                         int bits= put_bits_count(&s->pb);
2478                         s->misc_bits+= bits - s->last_bits;
2479                         s->last_bits= bits;
2480                     }
2481
2482                     s->ptr_lastgob += current_packet_size;
2483                     s->first_slice_line=1;
2484                     s->resync_mb_x=mb_x;
2485                     s->resync_mb_y=mb_y;
2486                 }
2487             }
2488
2489             if(  (s->resync_mb_x   == s->mb_x)
2490                && s->resync_mb_y+1 == s->mb_y){
2491                 s->first_slice_line=0;
2492             }
2493
2494             s->mb_skipped=0;
2495             s->dquant=0; //only for QP_RD
2496
2497             update_mb_info(s, 0);
2498
2499             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2500                 int next_block=0;
2501                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2502
2503                 copy_context_before_encode(&backup_s, s, -1);
2504                 backup_s.pb= s->pb;
2505                 best_s.data_partitioning= s->data_partitioning;
2506                 best_s.partitioned_frame= s->partitioned_frame;
2507                 if(s->data_partitioning){
2508                     backup_s.pb2= s->pb2;
2509                     backup_s.tex_pb= s->tex_pb;
2510                 }
2511
2512                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2513                     s->mv_dir = MV_DIR_FORWARD;
2514                     s->mv_type = MV_TYPE_16X16;
2515                     s->mb_intra= 0;
2516                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2517                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2518                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2519                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2520                 }
2521                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2522                     s->mv_dir = MV_DIR_FORWARD;
2523                     s->mv_type = MV_TYPE_FIELD;
2524                     s->mb_intra= 0;
2525                     for(i=0; i<2; i++){
2526                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2527                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2528                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2529                     }
2530                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2531                                  &dmin, &next_block, 0, 0);
2532                 }
2533                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2534                     s->mv_dir = MV_DIR_FORWARD;
2535                     s->mv_type = MV_TYPE_16X16;
2536                     s->mb_intra= 0;
2537                     s->mv[0][0][0] = 0;
2538                     s->mv[0][0][1] = 0;
2539                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2540                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2541                 }
2542                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2543                     s->mv_dir = MV_DIR_FORWARD;
2544                     s->mv_type = MV_TYPE_8X8;
2545                     s->mb_intra= 0;
2546                     for(i=0; i<4; i++){
2547                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2548                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2549                     }
2550                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2551                                  &dmin, &next_block, 0, 0);
2552                 }
2553                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2554                     s->mv_dir = MV_DIR_FORWARD;
2555                     s->mv_type = MV_TYPE_16X16;
2556                     s->mb_intra= 0;
2557                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2558                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2559                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2560                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2561                 }
2562                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2563                     s->mv_dir = MV_DIR_BACKWARD;
2564                     s->mv_type = MV_TYPE_16X16;
2565                     s->mb_intra= 0;
2566                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2567                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2568                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2569                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2570                 }
2571                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2572                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2573                     s->mv_type = MV_TYPE_16X16;
2574                     s->mb_intra= 0;
2575                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2576                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2577                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2578                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2579                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2580                                  &dmin, &next_block, 0, 0);
2581                 }
2582                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2583                     s->mv_dir = MV_DIR_FORWARD;
2584                     s->mv_type = MV_TYPE_FIELD;
2585                     s->mb_intra= 0;
2586                     for(i=0; i<2; i++){
2587                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2588                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2589                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2590                     }
2591                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2592                                  &dmin, &next_block, 0, 0);
2593                 }
2594                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2595                     s->mv_dir = MV_DIR_BACKWARD;
2596                     s->mv_type = MV_TYPE_FIELD;
2597                     s->mb_intra= 0;
2598                     for(i=0; i<2; i++){
2599                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2600                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2601                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2602                     }
2603                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2604                                  &dmin, &next_block, 0, 0);
2605                 }
2606                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2607                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2608                     s->mv_type = MV_TYPE_FIELD;
2609                     s->mb_intra= 0;
2610                     for(dir=0; dir<2; dir++){
2611                         for(i=0; i<2; i++){
2612                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2613                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2614                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2615                         }
2616                     }
2617                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2618                                  &dmin, &next_block, 0, 0);
2619                 }
2620                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2621                     s->mv_dir = 0;
2622                     s->mv_type = MV_TYPE_16X16;
2623                     s->mb_intra= 1;
2624                     s->mv[0][0][0] = 0;
2625                     s->mv[0][0][1] = 0;
2626                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2627                                  &dmin, &next_block, 0, 0);
2628                     if(s->h263_pred || s->h263_aic){
2629                         if(best_s.mb_intra)
2630                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2631                         else
2632                             ff_clean_intra_table_entries(s); //old mode?
2633                     }
2634                 }
2635
2636                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2637                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2638                         const int last_qp= backup_s.qscale;
2639                         int qpi, qp, dc[6];
2640                         int16_t ac[6][16];
2641                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2642                         static const int dquant_tab[4]={-1,1,-2,2};
2643
2644                         assert(backup_s.dquant == 0);
2645
2646                         //FIXME intra
2647                         s->mv_dir= best_s.mv_dir;
2648                         s->mv_type = MV_TYPE_16X16;
2649                         s->mb_intra= best_s.mb_intra;
2650                         s->mv[0][0][0] = best_s.mv[0][0][0];
2651                         s->mv[0][0][1] = best_s.mv[0][0][1];
2652                         s->mv[1][0][0] = best_s.mv[1][0][0];
2653                         s->mv[1][0][1] = best_s.mv[1][0][1];
2654
2655                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2656                         for(; qpi<4; qpi++){
2657                             int dquant= dquant_tab[qpi];
2658                             qp= last_qp + dquant;
2659                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2660                                 continue;
2661                             backup_s.dquant= dquant;
2662                             if(s->mb_intra && s->dc_val[0]){
2663                                 for(i=0; i<6; i++){
2664                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2665                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2666                                 }
2667                             }
2668
2669                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2670                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2671                             if(best_s.qscale != qp){
2672                                 if(s->mb_intra && s->dc_val[0]){
2673                                     for(i=0; i<6; i++){
2674                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2675                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2676                                     }
2677                                 }
2678                             }
2679                         }
2680                     }
2681                 }
2682                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2683                     int mx= s->b_direct_mv_table[xy][0];
2684                     int my= s->b_direct_mv_table[xy][1];
2685
2686                     backup_s.dquant = 0;
2687                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2688                     s->mb_intra= 0;
2689                     ff_mpeg4_set_direct_mv(s, mx, my);
2690                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2691                                  &dmin, &next_block, mx, my);
2692                 }
2693                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2694                     backup_s.dquant = 0;
2695                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2696                     s->mb_intra= 0;
2697                     ff_mpeg4_set_direct_mv(s, 0, 0);
2698                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2699                                  &dmin, &next_block, 0, 0);
2700                 }
2701                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2702                     int coded=0;
2703                     for(i=0; i<6; i++)
2704                         coded |= s->block_last_index[i];
2705                     if(coded){
2706                         int mx,my;
2707                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2708                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2709                             mx=my=0; //FIXME find the one we actually used
2710                             ff_mpeg4_set_direct_mv(s, mx, my);
2711                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2712                             mx= s->mv[1][0][0];
2713                             my= s->mv[1][0][1];
2714                         }else{
2715                             mx= s->mv[0][0][0];
2716                             my= s->mv[0][0][1];
2717                         }
2718
2719                         s->mv_dir= best_s.mv_dir;
2720                         s->mv_type = best_s.mv_type;
2721                         s->mb_intra= 0;
2722 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2723                         s->mv[0][0][1] = best_s.mv[0][0][1];
2724                         s->mv[1][0][0] = best_s.mv[1][0][0];
2725                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2726                         backup_s.dquant= 0;
2727                         s->skipdct=1;
2728                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2729                                         &dmin, &next_block, mx, my);
2730                         s->skipdct=0;
2731                     }
2732                 }
2733
2734                 s->current_picture.qscale_table[xy] = best_s.qscale;
2735
2736                 copy_context_after_encode(s, &best_s, -1);
2737
2738                 pb_bits_count= put_bits_count(&s->pb);
2739                 flush_put_bits(&s->pb);
2740                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2741                 s->pb= backup_s.pb;
2742
2743                 if(s->data_partitioning){
2744                     pb2_bits_count= put_bits_count(&s->pb2);
2745                     flush_put_bits(&s->pb2);
2746                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2747                     s->pb2= backup_s.pb2;
2748
2749                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2750                     flush_put_bits(&s->tex_pb);
2751                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2752                     s->tex_pb= backup_s.tex_pb;
2753                 }
2754                 s->last_bits= put_bits_count(&s->pb);
2755
2756                 if (CONFIG_H263_ENCODER &&
2757                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2758                     ff_h263_update_motion_val(s);
2759
2760                 if(next_block==0){ //FIXME 16 vs linesize16
2761                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2762                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2763                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2764                 }
2765
2766                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2767                     ff_MPV_decode_mb(s, s->block);
2768             } else {
2769                 int motion_x = 0, motion_y = 0;
2770                 s->mv_type=MV_TYPE_16X16;
2771                 // only one MB-Type possible
2772
2773                 switch(mb_type){
2774                 case CANDIDATE_MB_TYPE_INTRA:
2775                     s->mv_dir = 0;
2776                     s->mb_intra= 1;
2777                     motion_x= s->mv[0][0][0] = 0;
2778                     motion_y= s->mv[0][0][1] = 0;
2779                     break;
2780                 case CANDIDATE_MB_TYPE_INTER:
2781                     s->mv_dir = MV_DIR_FORWARD;
2782                     s->mb_intra= 0;
2783                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2784                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2785                     break;
2786                 case CANDIDATE_MB_TYPE_INTER_I:
2787                     s->mv_dir = MV_DIR_FORWARD;
2788                     s->mv_type = MV_TYPE_FIELD;
2789                     s->mb_intra= 0;
2790                     for(i=0; i<2; i++){
2791                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2792                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2793                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2794                     }
2795                     break;
2796                 case CANDIDATE_MB_TYPE_INTER4V:
2797                     s->mv_dir = MV_DIR_FORWARD;
2798                     s->mv_type = MV_TYPE_8X8;
2799                     s->mb_intra= 0;
2800                     for(i=0; i<4; i++){
2801                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2802                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2803                     }
2804                     break;
2805                 case CANDIDATE_MB_TYPE_DIRECT:
2806                     if (CONFIG_MPEG4_ENCODER) {
2807                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2808                         s->mb_intra= 0;
2809                         motion_x=s->b_direct_mv_table[xy][0];
2810                         motion_y=s->b_direct_mv_table[xy][1];
2811                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2812                     }
2813                     break;
2814                 case CANDIDATE_MB_TYPE_DIRECT0:
2815                     if (CONFIG_MPEG4_ENCODER) {
2816                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2817                         s->mb_intra= 0;
2818                         ff_mpeg4_set_direct_mv(s, 0, 0);
2819                     }
2820                     break;
2821                 case CANDIDATE_MB_TYPE_BIDIR:
2822                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2823                     s->mb_intra= 0;
2824                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2825                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2826                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2827                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2828                     break;
2829                 case CANDIDATE_MB_TYPE_BACKWARD:
2830                     s->mv_dir = MV_DIR_BACKWARD;
2831                     s->mb_intra= 0;
2832                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2833                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2834                     break;
2835                 case CANDIDATE_MB_TYPE_FORWARD:
2836                     s->mv_dir = MV_DIR_FORWARD;
2837                     s->mb_intra= 0;
2838                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2839                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2840                     break;
2841                 case CANDIDATE_MB_TYPE_FORWARD_I:
2842                     s->mv_dir = MV_DIR_FORWARD;
2843                     s->mv_type = MV_TYPE_FIELD;
2844                     s->mb_intra= 0;
2845                     for(i=0; i<2; i++){
2846                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2847                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2848                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2849                     }
2850                     break;
2851                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2852                     s->mv_dir = MV_DIR_BACKWARD;
2853                     s->mv_type = MV_TYPE_FIELD;
2854                     s->mb_intra= 0;
2855                     for(i=0; i<2; i++){
2856                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2857                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2858                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2859                     }
2860                     break;
2861                 case CANDIDATE_MB_TYPE_BIDIR_I:
2862                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2863                     s->mv_type = MV_TYPE_FIELD;
2864                     s->mb_intra= 0;
2865                     for(dir=0; dir<2; dir++){
2866                         for(i=0; i<2; i++){
2867                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2868                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2869                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2870                         }
2871                     }
2872                     break;
2873                 default:
2874                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2875                 }
2876
2877                 encode_mb(s, motion_x, motion_y);
2878
2879                 // RAL: Update last macroblock type
2880                 s->last_mv_dir = s->mv_dir;
2881
2882                 if (CONFIG_H263_ENCODER &&
2883                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2884                     ff_h263_update_motion_val(s);
2885
2886                 ff_MPV_decode_mb(s, s->block);
2887             }
2888
2889             /* clean the MV table in IPS frames for direct mode in B frames */
2890             if(s->mb_intra /* && I,P,S_TYPE */){
2891                 s->p_mv_table[xy][0]=0;
2892                 s->p_mv_table[xy][1]=0;
2893             }
2894
2895             if(s->flags&CODEC_FLAG_PSNR){
2896                 int w= 16;
2897                 int h= 16;
2898
2899                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2900                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2901
2902                 s->current_picture.f.error[0] += sse(
2903                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2904                     s->dest[0], w, h, s->linesize);
2905                 s->current_picture.f.error[1] += sse(
2906                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2907                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2908                 s->current_picture.f.error[2] += sse(
2909                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2910                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2911             }
2912             if(s->loop_filter){
2913                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2914                     ff_h263_loop_filter(s);
2915             }
2916             av_dlog(s->avctx, "MB %d %d bits\n",
2917                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
2918         }
2919     }
2920
2921     //not beautiful here but we must write it before flushing so it has to be here
2922     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
2923         ff_msmpeg4_encode_ext_header(s);
2924
2925     write_slice_end(s);
2926
2927     /* Send the last GOB if RTP */
2928     if (s->avctx->rtp_callback) {
2929         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
2930         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
2931         /* Call the RTP callback to send the last GOB */
2932         emms_c();
2933         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
2934     }
2935
2936     return 0;
2937 }
2938
2939 #define MERGE(field) dst->field += src->field; src->field=0
2940 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
2941     MERGE(me.scene_change_score);
2942     MERGE(me.mc_mb_var_sum_temp);
2943     MERGE(me.mb_var_sum_temp);
2944 }
2945
2946 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
2947     int i;
2948
2949     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
2950     MERGE(dct_count[1]);
2951     MERGE(mv_bits);
2952     MERGE(i_tex_bits);
2953     MERGE(p_tex_bits);
2954     MERGE(i_count);
2955     MERGE(f_count);
2956     MERGE(b_count);
2957     MERGE(skip_count);
2958     MERGE(misc_bits);
2959     MERGE(er.error_count);
2960     MERGE(padding_bug_score);
2961     MERGE(current_picture.f.error[0]);
2962     MERGE(current_picture.f.error[1]);
2963     MERGE(current_picture.f.error[2]);
2964
2965     if(dst->avctx->noise_reduction){
2966         for(i=0; i<64; i++){
2967             MERGE(dct_error_sum[0][i]);
2968             MERGE(dct_error_sum[1][i]);
2969         }
2970     }
2971
2972     assert(put_bits_count(&src->pb) % 8 ==0);
2973     assert(put_bits_count(&dst->pb) % 8 ==0);
2974     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
2975     flush_put_bits(&dst->pb);
2976 }
2977
2978 static int estimate_qp(MpegEncContext *s, int dry_run){
2979     if (s->next_lambda){
2980         s->current_picture_ptr->f.quality =
2981         s->current_picture.f.quality = s->next_lambda;
2982         if(!dry_run) s->next_lambda= 0;
2983     } else if (!s->fixed_qscale) {
2984         s->current_picture_ptr->f.quality =
2985         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
2986         if (s->current_picture.f.quality < 0)
2987             return -1;
2988     }
2989
2990     if(s->adaptive_quant){
2991         switch(s->codec_id){
2992         case AV_CODEC_ID_MPEG4:
2993             if (CONFIG_MPEG4_ENCODER)
2994                 ff_clean_mpeg4_qscales(s);
2995             break;
2996         case AV_CODEC_ID_H263:
2997         case AV_CODEC_ID_H263P:
2998         case AV_CODEC_ID_FLV1:
2999             if (CONFIG_H263_ENCODER)
3000                 ff_clean_h263_qscales(s);
3001             break;
3002         default:
3003             ff_init_qscale_tab(s);
3004         }
3005
3006         s->lambda= s->lambda_table[0];
3007         //FIXME broken
3008     }else
3009         s->lambda = s->current_picture.f.quality;
3010     update_qscale(s);
3011     return 0;
3012 }
3013
3014 /* must be called before writing the header */
3015 static void set_frame_distances(MpegEncContext * s){
3016     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3017     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3018
3019     if(s->pict_type==AV_PICTURE_TYPE_B){
3020         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3021         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3022     }else{
3023         s->pp_time= s->time - s->last_non_b_time;
3024         s->last_non_b_time= s->time;
3025         assert(s->picture_number==0 || s->pp_time > 0);
3026     }
3027 }
3028
3029 static int encode_picture(MpegEncContext *s, int picture_number)
3030 {
3031     int i, ret;
3032     int bits;
3033     int context_count = s->slice_context_count;
3034
3035     s->picture_number = picture_number;
3036
3037     /* Reset the average MB variance */
3038     s->me.mb_var_sum_temp    =
3039     s->me.mc_mb_var_sum_temp = 0;
3040
3041     /* we need to initialize some time vars before we can encode b-frames */
3042     // RAL: Condition added for MPEG1VIDEO
3043     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3044         set_frame_distances(s);
3045     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3046         ff_set_mpeg4_time(s);
3047
3048     s->me.scene_change_score=0;
3049
3050 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3051
3052     if(s->pict_type==AV_PICTURE_TYPE_I){
3053         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3054         else                        s->no_rounding=0;
3055     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3056         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3057             s->no_rounding ^= 1;
3058     }
3059
3060     if(s->flags & CODEC_FLAG_PASS2){
3061         if (estimate_qp(s,1) < 0)
3062             return -1;
3063         ff_get_2pass_fcode(s);
3064     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3065         if(s->pict_type==AV_PICTURE_TYPE_B)
3066             s->lambda= s->last_lambda_for[s->pict_type];
3067         else
3068             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3069         update_qscale(s);
3070     }
3071
3072     s->mb_intra=0; //for the rate distortion & bit compare functions
3073     for(i=1; i<context_count; i++){
3074         ret = ff_update_duplicate_context(s->thread_context[i], s);
3075         if (ret < 0)
3076             return ret;
3077     }
3078
3079     if(ff_init_me(s)<0)
3080         return -1;
3081
3082     /* Estimate motion for every MB */
3083     if(s->pict_type != AV_PICTURE_TYPE_I){
3084         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3085         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3086         if (s->pict_type != AV_PICTURE_TYPE_B) {
3087             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3088                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3089             }
3090         }
3091
3092         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3093     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3094         /* I-Frame */
3095         for(i=0; i<s->mb_stride*s->mb_height; i++)
3096             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3097
3098         if(!s->fixed_qscale){
3099             /* finding spatial complexity for I-frame rate control */
3100             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3101         }
3102     }
3103     for(i=1; i<context_count; i++){
3104         merge_context_after_me(s, s->thread_context[i]);
3105     }
3106     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3107     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3108     emms_c();
3109
3110     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3111         s->pict_type= AV_PICTURE_TYPE_I;
3112         for(i=0; i<s->mb_stride*s->mb_height; i++)
3113             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3114         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3115                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3116     }
3117
3118     if(!s->umvplus){
3119         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3120             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3121
3122             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3123                 int a,b;
3124                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3125                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3126                 s->f_code= FFMAX3(s->f_code, a, b);
3127             }
3128
3129             ff_fix_long_p_mvs(s);
3130             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3131             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3132                 int j;
3133                 for(i=0; i<2; i++){
3134                     for(j=0; j<2; j++)
3135                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3136                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3137                 }
3138             }
3139         }
3140
3141         if(s->pict_type==AV_PICTURE_TYPE_B){
3142             int a, b;
3143
3144             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3145             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3146             s->f_code = FFMAX(a, b);
3147
3148             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3149             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3150             s->b_code = FFMAX(a, b);
3151
3152             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3153             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3154             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3155             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3156             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3157                 int dir, j;
3158                 for(dir=0; dir<2; dir++){
3159                     for(i=0; i<2; i++){
3160                         for(j=0; j<2; j++){
3161                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3162                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3163                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3164                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3165                         }
3166                     }
3167                 }
3168             }
3169         }
3170     }
3171
3172     if (estimate_qp(s, 0) < 0)
3173         return -1;
3174
3175     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3176         s->qscale= 3; //reduce clipping problems
3177
3178     if (s->out_format == FMT_MJPEG) {
3179         /* for mjpeg, we do include qscale in the matrix */
3180         for(i=1;i<64;i++){
3181             int j= s->dsp.idct_permutation[i];
3182
3183             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3184         }
3185         s->y_dc_scale_table=
3186         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3187         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3188         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3189                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3190         s->qscale= 8;
3191     }
3192
3193     //FIXME var duplication
3194     s->current_picture_ptr->f.key_frame =
3195     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3196     s->current_picture_ptr->f.pict_type =
3197     s->current_picture.f.pict_type = s->pict_type;
3198
3199     if (s->current_picture.f.key_frame)
3200         s->picture_in_gop_number=0;
3201
3202     s->last_bits= put_bits_count(&s->pb);
3203     switch(s->out_format) {
3204     case FMT_MJPEG:
3205         if (CONFIG_MJPEG_ENCODER)
3206             ff_mjpeg_encode_picture_header(s);
3207         break;
3208     case FMT_H261:
3209         if (CONFIG_H261_ENCODER)
3210             ff_h261_encode_picture_header(s, picture_number);
3211         break;
3212     case FMT_H263:
3213         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3214             ff_wmv2_encode_picture_header(s, picture_number);
3215         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3216             ff_msmpeg4_encode_picture_header(s, picture_number);
3217         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3218             ff_mpeg4_encode_picture_header(s, picture_number);
3219         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3220             ff_rv10_encode_picture_header(s, picture_number);
3221         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3222             ff_rv20_encode_picture_header(s, picture_number);
3223         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3224             ff_flv_encode_picture_header(s, picture_number);
3225         else if (CONFIG_H263_ENCODER)
3226             ff_h263_encode_picture_header(s, picture_number);
3227         break;
3228     case FMT_MPEG1:
3229         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3230             ff_mpeg1_encode_picture_header(s, picture_number);
3231         break;
3232     default:
3233         assert(0);
3234     }
3235     bits= put_bits_count(&s->pb);
3236     s->header_bits= bits - s->last_bits;
3237
3238     for(i=1; i<context_count; i++){
3239         update_duplicate_context_after_me(s->thread_context[i], s);
3240     }
3241     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3242     for(i=1; i<context_count; i++){
3243         merge_context_after_encode(s, s->thread_context[i]);
3244     }
3245     emms_c();
3246     return 0;
3247 }
3248
3249 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3250     const int intra= s->mb_intra;
3251     int i;
3252
3253     s->dct_count[intra]++;
3254
3255     for(i=0; i<64; i++){
3256         int level= block[i];
3257
3258         if(level){
3259             if(level>0){
3260                 s->dct_error_sum[intra][i] += level;
3261                 level -= s->dct_offset[intra][i];
3262                 if(level<0) level=0;
3263             }else{
3264                 s->dct_error_sum[intra][i] -= level;
3265                 level += s->dct_offset[intra][i];
3266                 if(level>0) level=0;
3267             }
3268             block[i]= level;
3269         }
3270     }
3271 }
3272
3273 static int dct_quantize_trellis_c(MpegEncContext *s,
3274                                   int16_t *block, int n,
3275                                   int qscale, int *overflow){
3276     const int *qmat;
3277     const uint8_t *scantable= s->intra_scantable.scantable;
3278     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3279     int max=0;
3280     unsigned int threshold1, threshold2;
3281     int bias=0;
3282     int run_tab[65];
3283     int level_tab[65];
3284     int score_tab[65];
3285     int survivor[65];
3286     int survivor_count;
3287     int last_run=0;
3288     int last_level=0;
3289     int last_score= 0;
3290     int last_i;
3291     int coeff[2][64];
3292     int coeff_count[64];
3293     int qmul, qadd, start_i, last_non_zero, i, dc;
3294     const int esc_length= s->ac_esc_length;
3295     uint8_t * length;
3296     uint8_t * last_length;
3297     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3298
3299     s->dsp.fdct (block);
3300
3301     if(s->dct_error_sum)
3302         s->denoise_dct(s, block);
3303     qmul= qscale*16;
3304     qadd= ((qscale-1)|1)*8;
3305
3306     if (s->mb_intra) {
3307         int q;
3308         if (!s->h263_aic) {
3309             if (n < 4)
3310                 q = s->y_dc_scale;
3311             else
3312                 q = s->c_dc_scale;
3313             q = q << 3;
3314         } else{
3315             /* For AIC we skip quant/dequant of INTRADC */
3316             q = 1 << 3;
3317             qadd=0;
3318         }
3319
3320         /* note: block[0] is assumed to be positive */
3321         block[0] = (block[0] + (q >> 1)) / q;
3322         start_i = 1;
3323         last_non_zero = 0;
3324         qmat = s->q_intra_matrix[qscale];
3325         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3326             bias= 1<<(QMAT_SHIFT-1);
3327         length     = s->intra_ac_vlc_length;
3328         last_length= s->intra_ac_vlc_last_length;
3329     } else {
3330         start_i = 0;
3331         last_non_zero = -1;
3332         qmat = s->q_inter_matrix[qscale];
3333         length     = s->inter_ac_vlc_length;
3334         last_length= s->inter_ac_vlc_last_length;
3335     }
3336     last_i= start_i;
3337
3338     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3339     threshold2= (threshold1<<1);
3340
3341     for(i=63; i>=start_i; i--) {
3342         const int j = scantable[i];
3343         int level = block[j] * qmat[j];
3344
3345         if(((unsigned)(level+threshold1))>threshold2){
3346             last_non_zero = i;
3347             break;
3348         }
3349     }
3350
3351     for(i=start_i; i<=last_non_zero; i++) {
3352         const int j = scantable[i];
3353         int level = block[j] * qmat[j];
3354
3355 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3356 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3357         if(((unsigned)(level+threshold1))>threshold2){
3358             if(level>0){
3359                 level= (bias + level)>>QMAT_SHIFT;
3360                 coeff[0][i]= level;
3361                 coeff[1][i]= level-1;
3362 //                coeff[2][k]= level-2;
3363             }else{
3364                 level= (bias - level)>>QMAT_SHIFT;
3365                 coeff[0][i]= -level;
3366                 coeff[1][i]= -level+1;
3367 //                coeff[2][k]= -level+2;
3368             }
3369             coeff_count[i]= FFMIN(level, 2);
3370             assert(coeff_count[i]);
3371             max |=level;
3372         }else{
3373             coeff[0][i]= (level>>31)|1;
3374             coeff_count[i]= 1;
3375         }
3376     }
3377
3378     *overflow= s->max_qcoeff < max; //overflow might have happened
3379
3380     if(last_non_zero < start_i){
3381         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3382         return last_non_zero;
3383     }
3384
3385     score_tab[start_i]= 0;
3386     survivor[0]= start_i;
3387     survivor_count= 1;
3388
3389     for(i=start_i; i<=last_non_zero; i++){
3390         int level_index, j, zero_distortion;
3391         int dct_coeff= FFABS(block[ scantable[i] ]);
3392         int best_score=256*256*256*120;
3393
3394         if (s->dsp.fdct == ff_fdct_ifast)
3395             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3396         zero_distortion= dct_coeff*dct_coeff;
3397
3398         for(level_index=0; level_index < coeff_count[i]; level_index++){
3399             int distortion;
3400             int level= coeff[level_index][i];
3401             const int alevel= FFABS(level);
3402             int unquant_coeff;
3403
3404             assert(level);
3405
3406             if(s->out_format == FMT_H263){
3407                 unquant_coeff= alevel*qmul + qadd;
3408             }else{ //MPEG1
3409                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3410                 if(s->mb_intra){
3411                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3412                         unquant_coeff =   (unquant_coeff - 1) | 1;
3413                 }else{
3414                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3415                         unquant_coeff =   (unquant_coeff - 1) | 1;
3416                 }
3417                 unquant_coeff<<= 3;
3418             }
3419
3420             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3421             level+=64;
3422             if((level&(~127)) == 0){
3423                 for(j=survivor_count-1; j>=0; j--){
3424                     int run= i - survivor[j];
3425                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3426                     score += score_tab[i-run];
3427
3428                     if(score < best_score){
3429                         best_score= score;
3430                         run_tab[i+1]= run;
3431                         level_tab[i+1]= level-64;
3432                     }
3433                 }
3434
3435                 if(s->out_format == FMT_H263){
3436                     for(j=survivor_count-1; j>=0; j--){
3437                         int run= i - survivor[j];
3438                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3439                         score += score_tab[i-run];
3440                         if(score < last_score){
3441                             last_score= score;
3442                             last_run= run;
3443                             last_level= level-64;
3444                             last_i= i+1;
3445                         }
3446                     }
3447                 }
3448             }else{
3449                 distortion += esc_length*lambda;
3450                 for(j=survivor_count-1; j>=0; j--){
3451                     int run= i - survivor[j];
3452                     int score= distortion + score_tab[i-run];
3453
3454                     if(score < best_score){
3455                         best_score= score;
3456                         run_tab[i+1]= run;
3457                         level_tab[i+1]= level-64;
3458                     }
3459                 }
3460
3461                 if(s->out_format == FMT_H263){
3462                   for(j=survivor_count-1; j>=0; j--){
3463                         int run= i - survivor[j];
3464                         int score= distortion + score_tab[i-run];
3465                         if(score < last_score){
3466                             last_score= score;
3467                             last_run= run;
3468                             last_level= level-64;
3469                             last_i= i+1;
3470                         }
3471                     }
3472                 }
3473             }
3474         }
3475
3476         score_tab[i+1]= best_score;
3477
3478         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3479         if(last_non_zero <= 27){
3480             for(; survivor_count; survivor_count--){
3481                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3482                     break;
3483             }
3484         }else{
3485             for(; survivor_count; survivor_count--){
3486                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3487                     break;
3488             }
3489         }
3490
3491         survivor[ survivor_count++ ]= i+1;
3492     }
3493
3494     if(s->out_format != FMT_H263){
3495         last_score= 256*256*256*120;
3496         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3497             int score= score_tab[i];
3498             if(i) score += lambda*2; //FIXME exacter?
3499
3500             if(score < last_score){
3501                 last_score= score;
3502                 last_i= i;
3503                 last_level= level_tab[i];
3504                 last_run= run_tab[i];
3505             }
3506         }
3507     }
3508
3509     s->coded_score[n] = last_score;
3510
3511     dc= FFABS(block[0]);
3512     last_non_zero= last_i - 1;
3513     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3514
3515     if(last_non_zero < start_i)
3516         return last_non_zero;
3517
3518     if(last_non_zero == 0 && start_i == 0){
3519         int best_level= 0;
3520         int best_score= dc * dc;
3521
3522         for(i=0; i<coeff_count[0]; i++){
3523             int level= coeff[i][0];
3524             int alevel= FFABS(level);
3525             int unquant_coeff, score, distortion;
3526
3527             if(s->out_format == FMT_H263){
3528                     unquant_coeff= (alevel*qmul + qadd)>>3;
3529             }else{ //MPEG1
3530                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3531                     unquant_coeff =   (unquant_coeff - 1) | 1;
3532             }
3533             unquant_coeff = (unquant_coeff + 4) >> 3;
3534             unquant_coeff<<= 3 + 3;
3535
3536             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3537             level+=64;
3538             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3539             else                    score= distortion + esc_length*lambda;
3540
3541             if(score < best_score){
3542                 best_score= score;
3543                 best_level= level - 64;
3544             }
3545         }
3546         block[0]= best_level;
3547         s->coded_score[n] = best_score - dc*dc;
3548         if(best_level == 0) return -1;
3549         else                return last_non_zero;
3550     }
3551
3552     i= last_i;
3553     assert(last_level);
3554
3555     block[ perm_scantable[last_non_zero] ]= last_level;
3556     i -= last_run + 1;
3557
3558     for(; i>start_i; i -= run_tab[i] + 1){
3559         block[ perm_scantable[i-1] ]= level_tab[i];
3560     }
3561
3562     return last_non_zero;
3563 }
3564
3565 //#define REFINE_STATS 1
3566 static int16_t basis[64][64];
3567
3568 static void build_basis(uint8_t *perm){
3569     int i, j, x, y;
3570     emms_c();
3571     for(i=0; i<8; i++){
3572         for(j=0; j<8; j++){
3573             for(y=0; y<8; y++){
3574                 for(x=0; x<8; x++){
3575                     double s= 0.25*(1<<BASIS_SHIFT);
3576                     int index= 8*i + j;
3577                     int perm_index= perm[index];
3578                     if(i==0) s*= sqrt(0.5);
3579                     if(j==0) s*= sqrt(0.5);
3580                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3581                 }
3582             }
3583         }
3584     }
3585 }
3586
3587 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3588                         int16_t *block, int16_t *weight, int16_t *orig,
3589                         int n, int qscale){
3590     int16_t rem[64];
3591     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3592     const uint8_t *scantable= s->intra_scantable.scantable;
3593     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3594 //    unsigned int threshold1, threshold2;
3595 //    int bias=0;
3596     int run_tab[65];
3597     int prev_run=0;
3598     int prev_level=0;
3599     int qmul, qadd, start_i, last_non_zero, i, dc;
3600     uint8_t * length;
3601     uint8_t * last_length;
3602     int lambda;
3603     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3604 #ifdef REFINE_STATS
3605 static int count=0;
3606 static int after_last=0;
3607 static int to_zero=0;
3608 static int from_zero=0;
3609 static int raise=0;
3610 static int lower=0;
3611 static int messed_sign=0;
3612 #endif
3613
3614     if(basis[0][0] == 0)
3615         build_basis(s->dsp.idct_permutation);
3616
3617     qmul= qscale*2;
3618     qadd= (qscale-1)|1;
3619     if (s->mb_intra) {
3620         if (!s->h263_aic) {
3621             if (n < 4)
3622                 q = s->y_dc_scale;
3623             else
3624                 q = s->c_dc_scale;
3625         } else{
3626             /* For AIC we skip quant/dequant of INTRADC */
3627             q = 1;
3628             qadd=0;
3629         }
3630         q <<= RECON_SHIFT-3;
3631         /* note: block[0] is assumed to be positive */
3632         dc= block[0]*q;
3633 //        block[0] = (block[0] + (q >> 1)) / q;
3634         start_i = 1;
3635 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3636 //            bias= 1<<(QMAT_SHIFT-1);
3637         length     = s->intra_ac_vlc_length;
3638         last_length= s->intra_ac_vlc_last_length;
3639     } else {
3640         dc= 0;
3641         start_i = 0;
3642         length     = s->inter_ac_vlc_length;
3643         last_length= s->inter_ac_vlc_last_length;
3644     }
3645     last_non_zero = s->block_last_index[n];
3646
3647 #ifdef REFINE_STATS
3648 {START_TIMER
3649 #endif
3650     dc += (1<<(RECON_SHIFT-1));
3651     for(i=0; i<64; i++){
3652         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3653     }
3654 #ifdef REFINE_STATS
3655 STOP_TIMER("memset rem[]")}
3656 #endif
3657     sum=0;
3658     for(i=0; i<64; i++){
3659         int one= 36;
3660         int qns=4;
3661         int w;
3662
3663         w= FFABS(weight[i]) + qns*one;
3664         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3665
3666         weight[i] = w;
3667 //        w=weight[i] = (63*qns + (w/2)) / w;
3668
3669         assert(w>0);
3670         assert(w<(1<<6));
3671         sum += w*w;
3672     }
3673     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3674 #ifdef REFINE_STATS
3675 {START_TIMER
3676 #endif
3677     run=0;
3678     rle_index=0;
3679     for(i=start_i; i<=last_non_zero; i++){
3680         int j= perm_scantable[i];
3681         const int level= block[j];
3682         int coeff;
3683
3684         if(level){
3685             if(level<0) coeff= qmul*level - qadd;
3686             else        coeff= qmul*level + qadd;
3687             run_tab[rle_index++]=run;
3688             run=0;
3689
3690             s->dsp.add_8x8basis(rem, basis[j], coeff);
3691         }else{
3692             run++;
3693         }
3694     }
3695 #ifdef REFINE_STATS
3696 if(last_non_zero>0){
3697 STOP_TIMER("init rem[]")
3698 }
3699 }
3700
3701 {START_TIMER
3702 #endif
3703     for(;;){
3704         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3705         int best_coeff=0;
3706         int best_change=0;
3707         int run2, best_unquant_change=0, analyze_gradient;
3708 #ifdef REFINE_STATS
3709 {START_TIMER
3710 #endif
3711         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3712
3713         if(analyze_gradient){
3714 #ifdef REFINE_STATS
3715 {START_TIMER
3716 #endif
3717             for(i=0; i<64; i++){
3718                 int w= weight[i];
3719
3720                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3721             }
3722 #ifdef REFINE_STATS
3723 STOP_TIMER("rem*w*w")}
3724 {START_TIMER
3725 #endif
3726             s->dsp.fdct(d1);
3727 #ifdef REFINE_STATS
3728 STOP_TIMER("dct")}
3729 #endif
3730         }
3731
3732         if(start_i){
3733             const int level= block[0];
3734             int change, old_coeff;
3735
3736             assert(s->mb_intra);
3737
3738             old_coeff= q*level;
3739
3740             for(change=-1; change<=1; change+=2){
3741                 int new_level= level + change;
3742                 int score, new_coeff;
3743
3744                 new_coeff= q*new_level;
3745                 if(new_coeff >= 2048 || new_coeff < 0)
3746                     continue;
3747
3748                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3749                 if(score<best_score){
3750                     best_score= score;
3751                     best_coeff= 0;
3752                     best_change= change;
3753                     best_unquant_change= new_coeff - old_coeff;
3754                 }
3755             }
3756         }
3757
3758         run=0;
3759         rle_index=0;
3760         run2= run_tab[rle_index++];
3761         prev_level=0;
3762         prev_run=0;
3763
3764         for(i=start_i; i<64; i++){
3765             int j= perm_scantable[i];
3766             const int level= block[j];
3767             int change, old_coeff;
3768
3769             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3770                 break;
3771
3772             if(level){
3773                 if(level<0) old_coeff= qmul*level - qadd;
3774                 else        old_coeff= qmul*level + qadd;
3775                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3776             }else{
3777                 old_coeff=0;
3778                 run2--;
3779                 assert(run2>=0 || i >= last_non_zero );
3780             }
3781
3782             for(change=-1; change<=1; change+=2){
3783                 int new_level= level + change;
3784                 int score, new_coeff, unquant_change;
3785
3786                 score=0;
3787                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3788                    continue;
3789
3790                 if(new_level){
3791                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3792                     else            new_coeff= qmul*new_level + qadd;
3793                     if(new_coeff >= 2048 || new_coeff <= -2048)
3794                         continue;
3795                     //FIXME check for overflow
3796
3797                     if(level){
3798                         if(level < 63 && level > -63){
3799                             if(i < last_non_zero)
3800                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3801                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3802                             else
3803                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3804                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3805                         }
3806                     }else{
3807                         assert(FFABS(new_level)==1);
3808
3809                         if(analyze_gradient){
3810                             int g= d1[ scantable[i] ];
3811                             if(g && (g^new_level) >= 0)
3812                                 continue;
3813                         }
3814
3815                         if(i < last_non_zero){
3816                             int next_i= i + run2 + 1;
3817                             int next_level= block[ perm_scantable[next_i] ] + 64;
3818
3819                             if(next_level&(~127))
3820                                 next_level= 0;
3821
3822                             if(next_i < last_non_zero)
3823                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3824                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3825                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3826                             else
3827                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3828                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3829                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3830                         }else{
3831                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3832                             if(prev_level){
3833                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3834                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3835                             }
3836                         }
3837                     }
3838                 }else{
3839                     new_coeff=0;
3840                     assert(FFABS(level)==1);
3841
3842                     if(i < last_non_zero){
3843                         int next_i= i + run2 + 1;
3844                         int next_level= block[ perm_scantable[next_i] ] + 64;
3845
3846                         if(next_level&(~127))
3847                             next_level= 0;
3848
3849                         if(next_i < last_non_zero)
3850                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3851                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3852                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3853                         else
3854                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3855                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3856                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3857                     }else{
3858                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3859                         if(prev_level){
3860                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3861                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3862                         }
3863                     }
3864                 }
3865
3866                 score *= lambda;
3867
3868                 unquant_change= new_coeff - old_coeff;
3869                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3870
3871                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3872                 if(score<best_score){
3873                     best_score= score;
3874                     best_coeff= i;
3875                     best_change= change;
3876                     best_unquant_change= unquant_change;
3877                 }
3878             }
3879             if(level){
3880                 prev_level= level + 64;
3881                 if(prev_level&(~127))
3882                     prev_level= 0;
3883                 prev_run= run;
3884                 run=0;
3885             }else{
3886                 run++;
3887             }
3888         }
3889 #ifdef REFINE_STATS
3890 STOP_TIMER("iterative step")}
3891 #endif
3892
3893         if(best_change){
3894             int j= perm_scantable[ best_coeff ];
3895
3896             block[j] += best_change;
3897
3898             if(best_coeff > last_non_zero){
3899                 last_non_zero= best_coeff;
3900                 assert(block[j]);
3901 #ifdef REFINE_STATS
3902 after_last++;
3903 #endif
3904             }else{
3905 #ifdef REFINE_STATS
3906 if(block[j]){
3907     if(block[j] - best_change){
3908         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3909             raise++;
3910         }else{
3911             lower++;
3912         }
3913     }else{
3914         from_zero++;
3915     }
3916 }else{
3917     to_zero++;
3918 }
3919 #endif
3920                 for(; last_non_zero>=start_i; last_non_zero--){
3921                     if(block[perm_scantable[last_non_zero]])
3922                         break;
3923                 }
3924             }
3925 #ifdef REFINE_STATS
3926 count++;
3927 if(256*256*256*64 % count == 0){
3928     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
3929 }
3930 #endif
3931             run=0;
3932             rle_index=0;
3933             for(i=start_i; i<=last_non_zero; i++){
3934                 int j= perm_scantable[i];
3935                 const int level= block[j];
3936
3937                  if(level){
3938                      run_tab[rle_index++]=run;
3939                      run=0;
3940                  }else{
3941                      run++;
3942                  }
3943             }
3944
3945             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
3946         }else{
3947             break;
3948         }
3949     }
3950 #ifdef REFINE_STATS
3951 if(last_non_zero>0){
3952 STOP_TIMER("iterative search")
3953 }
3954 }
3955 #endif
3956
3957     return last_non_zero;
3958 }
3959
3960 int ff_dct_quantize_c(MpegEncContext *s,
3961                         int16_t *block, int n,
3962                         int qscale, int *overflow)
3963 {
3964     int i, j, level, last_non_zero, q, start_i;
3965     const int *qmat;
3966     const uint8_t *scantable= s->intra_scantable.scantable;
3967     int bias;
3968     int max=0;
3969     unsigned int threshold1, threshold2;
3970
3971     s->dsp.fdct (block);
3972
3973     if(s->dct_error_sum)
3974         s->denoise_dct(s, block);
3975
3976     if (s->mb_intra) {
3977         if (!s->h263_aic) {
3978             if (n < 4)
3979                 q = s->y_dc_scale;
3980             else
3981                 q = s->c_dc_scale;
3982             q = q << 3;
3983         } else
3984             /* For AIC we skip quant/dequant of INTRADC */
3985             q = 1 << 3;
3986
3987         /* note: block[0] is assumed to be positive */
3988         block[0] = (block[0] + (q >> 1)) / q;
3989         start_i = 1;
3990         last_non_zero = 0;
3991         qmat = s->q_intra_matrix[qscale];
3992         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
3993     } else {
3994         start_i = 0;
3995         last_non_zero = -1;
3996         qmat = s->q_inter_matrix[qscale];
3997         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
3998     }
3999     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4000     threshold2= (threshold1<<1);
4001     for(i=63;i>=start_i;i--) {
4002         j = scantable[i];
4003         level = block[j] * qmat[j];
4004
4005         if(((unsigned)(level+threshold1))>threshold2){
4006             last_non_zero = i;
4007             break;
4008         }else{
4009             block[j]=0;
4010         }
4011     }
4012     for(i=start_i; i<=last_non_zero; i++) {
4013         j = scantable[i];
4014         level = block[j] * qmat[j];
4015
4016 //        if(   bias+level >= (1<<QMAT_SHIFT)
4017 //           || bias-level >= (1<<QMAT_SHIFT)){
4018         if(((unsigned)(level+threshold1))>threshold2){
4019             if(level>0){
4020                 level= (bias + level)>>QMAT_SHIFT;
4021                 block[j]= level;
4022             }else{
4023                 level= (bias - level)>>QMAT_SHIFT;
4024                 block[j]= -level;
4025             }
4026             max |=level;
4027         }else{
4028             block[j]=0;
4029         }
4030     }
4031     *overflow= s->max_qcoeff < max; //overflow might have happened
4032
4033     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4034     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4035         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4036
4037     return last_non_zero;
4038 }
4039
4040 #define OFFSET(x) offsetof(MpegEncContext, x)
4041 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4042 static const AVOption h263_options[] = {
4043     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4044     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4045     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4046     FF_MPV_COMMON_OPTS
4047     { NULL },
4048 };
4049
4050 static const AVClass h263_class = {
4051     .class_name = "H.263 encoder",
4052     .item_name  = av_default_item_name,
4053     .option     = h263_options,
4054     .version    = LIBAVUTIL_VERSION_INT,
4055 };
4056
4057 AVCodec ff_h263_encoder = {
4058     .name           = "h263",
4059     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4060     .type           = AVMEDIA_TYPE_VIDEO,
4061     .id             = AV_CODEC_ID_H263,
4062     .priv_data_size = sizeof(MpegEncContext),
4063     .init           = ff_MPV_encode_init,
4064     .encode2        = ff_MPV_encode_picture,
4065     .close          = ff_MPV_encode_end,
4066     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4067     .priv_class     = &h263_class,
4068 };
4069
4070 static const AVOption h263p_options[] = {
4071     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4072     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4073     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4074     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4075     FF_MPV_COMMON_OPTS
4076     { NULL },
4077 };
4078 static const AVClass h263p_class = {
4079     .class_name = "H.263p encoder",
4080     .item_name  = av_default_item_name,
4081     .option     = h263p_options,
4082     .version    = LIBAVUTIL_VERSION_INT,
4083 };
4084
4085 AVCodec ff_h263p_encoder = {
4086     .name           = "h263p",
4087     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4088     .type           = AVMEDIA_TYPE_VIDEO,
4089     .id             = AV_CODEC_ID_H263P,
4090     .priv_data_size = sizeof(MpegEncContext),
4091     .init           = ff_MPV_encode_init,
4092     .encode2        = ff_MPV_encode_picture,
4093     .close          = ff_MPV_encode_end,
4094     .capabilities   = CODEC_CAP_SLICE_THREADS,
4095     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4096     .priv_class     = &h263p_class,
4097 };
4098
4099 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4100
4101 AVCodec ff_msmpeg4v2_encoder = {
4102     .name           = "msmpeg4v2",
4103     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4104     .type           = AVMEDIA_TYPE_VIDEO,
4105     .id             = AV_CODEC_ID_MSMPEG4V2,
4106     .priv_data_size = sizeof(MpegEncContext),
4107     .init           = ff_MPV_encode_init,
4108     .encode2        = ff_MPV_encode_picture,
4109     .close          = ff_MPV_encode_end,
4110     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4111     .priv_class     = &msmpeg4v2_class,
4112 };
4113
4114 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4115
4116 AVCodec ff_msmpeg4v3_encoder = {
4117     .name           = "msmpeg4",
4118     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4119     .type           = AVMEDIA_TYPE_VIDEO,
4120     .id             = AV_CODEC_ID_MSMPEG4V3,
4121     .priv_data_size = sizeof(MpegEncContext),
4122     .init           = ff_MPV_encode_init,
4123     .encode2        = ff_MPV_encode_picture,
4124     .close          = ff_MPV_encode_end,
4125     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4126     .priv_class     = &msmpeg4v3_class,
4127 };
4128
4129 FF_MPV_GENERIC_CLASS(wmv1)
4130
4131 AVCodec ff_wmv1_encoder = {
4132     .name           = "wmv1",
4133     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4134     .type           = AVMEDIA_TYPE_VIDEO,
4135     .id             = AV_CODEC_ID_WMV1,
4136     .priv_data_size = sizeof(MpegEncContext),
4137     .init           = ff_MPV_encode_init,
4138     .encode2        = ff_MPV_encode_picture,
4139     .close          = ff_MPV_encode_end,
4140     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4141     .priv_class     = &wmv1_class,
4142 };