]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge commit '17e9d52c8c93f47721ff481b8867922f4b4bd663'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mjpegenc_common.h"
46 #include "mathops.h"
47 #include "mpegutils.h"
48 #include "mjpegenc.h"
49 #include "msmpeg4.h"
50 #include "pixblockdsp.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60 #include "sp5x.h"
61
62 static int encode_picture(MpegEncContext *s, int picture_number);
63 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
64 static int sse_mb(MpegEncContext *s);
65 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
66 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
67
68 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
69 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
70
71 const AVOption ff_mpv_generic_options[] = {
72     FF_MPV_COMMON_OPTS
73     { NULL },
74 };
75
76 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
77                        uint16_t (*qmat16)[2][64],
78                        const uint16_t *quant_matrix,
79                        int bias, int qmin, int qmax, int intra)
80 {
81     FDCTDSPContext *fdsp = &s->fdsp;
82     int qscale;
83     int shift = 0;
84
85     for (qscale = qmin; qscale <= qmax; qscale++) {
86         int i;
87         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
88             fdsp->fdct == ff_jpeg_fdct_islow_10 ||
89             fdsp->fdct == ff_faandct) {
90             for (i = 0; i < 64; i++) {
91                 const int j = s->idsp.idct_permutation[i];
92                 /* 16 <= qscale * quant_matrix[i] <= 7905
93                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
94                  *             19952 <=              x  <= 249205026
95                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
96                  *           3444240 >= (1 << 36) / (x) >= 275 */
97
98                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
99                                         (qscale * quant_matrix[j]));
100             }
101         } else if (fdsp->fdct == ff_fdct_ifast) {
102             for (i = 0; i < 64; i++) {
103                 const int j = s->idsp.idct_permutation[i];
104                 /* 16 <= qscale * quant_matrix[i] <= 7905
105                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
106                  *             19952 <=              x  <= 249205026
107                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
108                  *           3444240 >= (1 << 36) / (x) >= 275 */
109
110                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
111                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
112             }
113         } else {
114             for (i = 0; i < 64; i++) {
115                 const int j = s->idsp.idct_permutation[i];
116                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
117                  * Assume x = qscale * quant_matrix[i]
118                  * So             16 <=              x  <= 7905
119                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
120                  * so          32768 >= (1 << 19) / (x) >= 67 */
121                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
122                                         (qscale * quant_matrix[j]));
123                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
124                 //                    (qscale * quant_matrix[i]);
125                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
126                                        (qscale * quant_matrix[j]);
127
128                 if (qmat16[qscale][0][i] == 0 ||
129                     qmat16[qscale][0][i] == 128 * 256)
130                     qmat16[qscale][0][i] = 128 * 256 - 1;
131                 qmat16[qscale][1][i] =
132                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
133                                 qmat16[qscale][0][i]);
134             }
135         }
136
137         for (i = intra; i < 64; i++) {
138             int64_t max = 8191;
139             if (fdsp->fdct == ff_fdct_ifast) {
140                 max = (8191LL * ff_aanscales[i]) >> 14;
141             }
142             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
143                 shift++;
144             }
145         }
146     }
147     if (shift) {
148         av_log(NULL, AV_LOG_INFO,
149                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
150                QMAT_SHIFT - shift);
151     }
152 }
153
154 static inline void update_qscale(MpegEncContext *s)
155 {
156     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
157                 (FF_LAMBDA_SHIFT + 7);
158     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
159
160     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
161                  FF_LAMBDA_SHIFT;
162 }
163
164 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
165 {
166     int i;
167
168     if (matrix) {
169         put_bits(pb, 1, 1);
170         for (i = 0; i < 64; i++) {
171             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
172         }
173     } else
174         put_bits(pb, 1, 0);
175 }
176
177 /**
178  * init s->current_picture.qscale_table from s->lambda_table
179  */
180 void ff_init_qscale_tab(MpegEncContext *s)
181 {
182     int8_t * const qscale_table = s->current_picture.qscale_table;
183     int i;
184
185     for (i = 0; i < s->mb_num; i++) {
186         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
187         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
188         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
189                                                   s->avctx->qmax);
190     }
191 }
192
193 static void update_duplicate_context_after_me(MpegEncContext *dst,
194                                               MpegEncContext *src)
195 {
196 #define COPY(a) dst->a= src->a
197     COPY(pict_type);
198     COPY(current_picture);
199     COPY(f_code);
200     COPY(b_code);
201     COPY(qscale);
202     COPY(lambda);
203     COPY(lambda2);
204     COPY(picture_in_gop_number);
205     COPY(gop_picture_number);
206     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
207     COPY(progressive_frame);    // FIXME don't set in encode_header
208     COPY(partitioned_frame);    // FIXME don't set in encode_header
209 #undef COPY
210 }
211
212 /**
213  * Set the given MpegEncContext to defaults for encoding.
214  * the changed fields will not depend upon the prior state of the MpegEncContext.
215  */
216 static void MPV_encode_defaults(MpegEncContext *s)
217 {
218     int i;
219     ff_MPV_common_defaults(s);
220
221     for (i = -16; i < 16; i++) {
222         default_fcode_tab[i + MAX_MV] = 1;
223     }
224     s->me.mv_penalty = default_mv_penalty;
225     s->fcode_tab     = default_fcode_tab;
226
227     s->input_picture_number  = 0;
228     s->picture_in_gop_number = 0;
229 }
230
231 av_cold int ff_dct_encode_init(MpegEncContext *s) {
232     if (ARCH_X86)
233         ff_dct_encode_init_x86(s);
234
235     if (CONFIG_H263_ENCODER)
236         ff_h263dsp_init(&s->h263dsp);
237     if (!s->dct_quantize)
238         s->dct_quantize = ff_dct_quantize_c;
239     if (!s->denoise_dct)
240         s->denoise_dct  = denoise_dct_c;
241     s->fast_dct_quantize = s->dct_quantize;
242     if (s->avctx->trellis)
243         s->dct_quantize  = dct_quantize_trellis_c;
244
245     return 0;
246 }
247
248 /* init video encoder */
249 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
250 {
251     MpegEncContext *s = avctx->priv_data;
252     int i, ret, format_supported;
253
254     MPV_encode_defaults(s);
255
256     switch (avctx->codec_id) {
257     case AV_CODEC_ID_MPEG2VIDEO:
258         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
259             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
260             av_log(avctx, AV_LOG_ERROR,
261                    "only YUV420 and YUV422 are supported\n");
262             return -1;
263         }
264         break;
265     case AV_CODEC_ID_MJPEG:
266     case AV_CODEC_ID_AMV:
267         format_supported = 0;
268         /* JPEG color space */
269         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
270             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
271             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
272             (avctx->color_range == AVCOL_RANGE_JPEG &&
273              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
274               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
275               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
276             format_supported = 1;
277         /* MPEG color space */
278         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
279                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
280                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
281                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
282             format_supported = 1;
283
284         if (!format_supported) {
285             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
286             return -1;
287         }
288         break;
289     default:
290         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
291             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
292             return -1;
293         }
294     }
295
296     switch (avctx->pix_fmt) {
297     case AV_PIX_FMT_YUVJ444P:
298     case AV_PIX_FMT_YUV444P:
299         s->chroma_format = CHROMA_444;
300         break;
301     case AV_PIX_FMT_YUVJ422P:
302     case AV_PIX_FMT_YUV422P:
303         s->chroma_format = CHROMA_422;
304         break;
305     case AV_PIX_FMT_YUVJ420P:
306     case AV_PIX_FMT_YUV420P:
307     default:
308         s->chroma_format = CHROMA_420;
309         break;
310     }
311
312     s->bit_rate = avctx->bit_rate;
313     s->width    = avctx->width;
314     s->height   = avctx->height;
315     if (avctx->gop_size > 600 &&
316         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
317         av_log(avctx, AV_LOG_WARNING,
318                "keyframe interval too large!, reducing it from %d to %d\n",
319                avctx->gop_size, 600);
320         avctx->gop_size = 600;
321     }
322     s->gop_size     = avctx->gop_size;
323     s->avctx        = avctx;
324     s->flags        = avctx->flags;
325     s->flags2       = avctx->flags2;
326     if (avctx->max_b_frames > MAX_B_FRAMES) {
327         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
328                "is %d.\n", MAX_B_FRAMES);
329         avctx->max_b_frames = MAX_B_FRAMES;
330     }
331     s->max_b_frames = avctx->max_b_frames;
332     s->codec_id     = avctx->codec->id;
333     s->strict_std_compliance = avctx->strict_std_compliance;
334     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
335     s->mpeg_quant         = avctx->mpeg_quant;
336     s->rtp_mode           = !!avctx->rtp_payload_size;
337     s->intra_dc_precision = avctx->intra_dc_precision;
338     s->user_specified_pts = AV_NOPTS_VALUE;
339
340     if (s->gop_size <= 1) {
341         s->intra_only = 1;
342         s->gop_size   = 12;
343     } else {
344         s->intra_only = 0;
345     }
346
347     s->me_method = avctx->me_method;
348
349     /* Fixed QSCALE */
350     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
351
352     s->adaptive_quant = (s->avctx->lumi_masking ||
353                          s->avctx->dark_masking ||
354                          s->avctx->temporal_cplx_masking ||
355                          s->avctx->spatial_cplx_masking  ||
356                          s->avctx->p_masking      ||
357                          s->avctx->border_masking ||
358                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
359                         !s->fixed_qscale;
360
361     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
362
363     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
364         switch(avctx->codec_id) {
365         case AV_CODEC_ID_MPEG1VIDEO:
366         case AV_CODEC_ID_MPEG2VIDEO:
367             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
368             break;
369         case AV_CODEC_ID_MPEG4:
370         case AV_CODEC_ID_MSMPEG4V1:
371         case AV_CODEC_ID_MSMPEG4V2:
372         case AV_CODEC_ID_MSMPEG4V3:
373             if       (avctx->rc_max_rate >= 15000000) {
374                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
375             } else if(avctx->rc_max_rate >=  2000000) {
376                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
377             } else if(avctx->rc_max_rate >=   384000) {
378                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
379             } else
380                 avctx->rc_buffer_size = 40;
381             avctx->rc_buffer_size *= 16384;
382             break;
383         }
384         if (avctx->rc_buffer_size) {
385             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
386         }
387     }
388
389     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
390         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
391         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
392             return -1;
393     }
394
395     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
396         av_log(avctx, AV_LOG_INFO,
397                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
398     }
399
400     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
401         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
402         return -1;
403     }
404
405     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
406         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
407         return -1;
408     }
409
410     if (avctx->rc_max_rate &&
411         avctx->rc_max_rate == avctx->bit_rate &&
412         avctx->rc_max_rate != avctx->rc_min_rate) {
413         av_log(avctx, AV_LOG_INFO,
414                "impossible bitrate constraints, this will fail\n");
415     }
416
417     if (avctx->rc_buffer_size &&
418         avctx->bit_rate * (int64_t)avctx->time_base.num >
419             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
420         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
421         return -1;
422     }
423
424     if (!s->fixed_qscale &&
425         avctx->bit_rate * av_q2d(avctx->time_base) >
426             avctx->bit_rate_tolerance) {
427         av_log(avctx, AV_LOG_WARNING,
428                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
429         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
430     }
431
432     if (s->avctx->rc_max_rate &&
433         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
434         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
435          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
436         90000LL * (avctx->rc_buffer_size - 1) >
437             s->avctx->rc_max_rate * 0xFFFFLL) {
438         av_log(avctx, AV_LOG_INFO,
439                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
440                "specified vbv buffer is too large for the given bitrate!\n");
441     }
442
443     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
444         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
445         s->codec_id != AV_CODEC_ID_FLV1) {
446         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
447         return -1;
448     }
449
450     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
451         av_log(avctx, AV_LOG_ERROR,
452                "OBMC is only supported with simple mb decision\n");
453         return -1;
454     }
455
456     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
457         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
458         return -1;
459     }
460
461     if (s->max_b_frames                    &&
462         s->codec_id != AV_CODEC_ID_MPEG4      &&
463         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
464         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
465         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
466         return -1;
467     }
468     if (s->max_b_frames < 0) {
469         av_log(avctx, AV_LOG_ERROR,
470                "max b frames must be 0 or positive for mpegvideo based encoders\n");
471         return -1;
472     }
473
474     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
475          s->codec_id == AV_CODEC_ID_H263  ||
476          s->codec_id == AV_CODEC_ID_H263P) &&
477         (avctx->sample_aspect_ratio.num > 255 ||
478          avctx->sample_aspect_ratio.den > 255)) {
479         av_log(avctx, AV_LOG_WARNING,
480                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
481                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
482         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
483                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
484     }
485
486     if ((s->codec_id == AV_CODEC_ID_H263  ||
487          s->codec_id == AV_CODEC_ID_H263P) &&
488         (avctx->width  > 2048 ||
489          avctx->height > 1152 )) {
490         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
491         return -1;
492     }
493     if ((s->codec_id == AV_CODEC_ID_H263  ||
494          s->codec_id == AV_CODEC_ID_H263P) &&
495         ((avctx->width &3) ||
496          (avctx->height&3) )) {
497         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
498         return -1;
499     }
500
501     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
502         (avctx->width  > 4095 ||
503          avctx->height > 4095 )) {
504         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
505         return -1;
506     }
507
508     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
509         (avctx->width  > 16383 ||
510          avctx->height > 16383 )) {
511         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
512         return -1;
513     }
514
515     if (s->codec_id == AV_CODEC_ID_RV10 &&
516         (avctx->width &15 ||
517          avctx->height&15 )) {
518         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
519         return AVERROR(EINVAL);
520     }
521
522     if (s->codec_id == AV_CODEC_ID_RV20 &&
523         (avctx->width &3 ||
524          avctx->height&3 )) {
525         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
526         return AVERROR(EINVAL);
527     }
528
529     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
530          s->codec_id == AV_CODEC_ID_WMV2) &&
531          avctx->width & 1) {
532          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
533          return -1;
534     }
535
536     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
537         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
538         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
539         return -1;
540     }
541
542     // FIXME mpeg2 uses that too
543     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
544                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
545         av_log(avctx, AV_LOG_ERROR,
546                "mpeg2 style quantization not supported by codec\n");
547         return -1;
548     }
549
550     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
551         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
552         return -1;
553     }
554
555     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
556         s->avctx->mb_decision != FF_MB_DECISION_RD) {
557         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
558         return -1;
559     }
560
561     if (s->avctx->scenechange_threshold < 1000000000 &&
562         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
563         av_log(avctx, AV_LOG_ERROR,
564                "closed gop with scene change detection are not supported yet, "
565                "set threshold to 1000000000\n");
566         return -1;
567     }
568
569     if (s->flags & CODEC_FLAG_LOW_DELAY) {
570         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
571             av_log(avctx, AV_LOG_ERROR,
572                   "low delay forcing is only available for mpeg2\n");
573             return -1;
574         }
575         if (s->max_b_frames != 0) {
576             av_log(avctx, AV_LOG_ERROR,
577                    "b frames cannot be used with low delay\n");
578             return -1;
579         }
580     }
581
582     if (s->q_scale_type == 1) {
583         if (avctx->qmax > 12) {
584             av_log(avctx, AV_LOG_ERROR,
585                    "non linear quant only supports qmax <= 12 currently\n");
586             return -1;
587         }
588     }
589
590     if (s->avctx->thread_count > 1         &&
591         s->codec_id != AV_CODEC_ID_MPEG4      &&
592         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
593         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
594         s->codec_id != AV_CODEC_ID_MJPEG      &&
595         (s->codec_id != AV_CODEC_ID_H263P)) {
596         av_log(avctx, AV_LOG_ERROR,
597                "multi threaded encoding not supported by codec\n");
598         return -1;
599     }
600
601     if (s->avctx->thread_count < 1) {
602         av_log(avctx, AV_LOG_ERROR,
603                "automatic thread number detection not supported by codec, "
604                "patch welcome\n");
605         return -1;
606     }
607
608     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
609         s->rtp_mode = 1;
610
611     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
612         s->h263_slice_structured = 1;
613
614     if (!avctx->time_base.den || !avctx->time_base.num) {
615         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
616         return -1;
617     }
618
619     i = (INT_MAX / 2 + 128) >> 8;
620     if (avctx->mb_threshold >= i) {
621         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
622                i - 1);
623         return -1;
624     }
625
626     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
627         av_log(avctx, AV_LOG_INFO,
628                "notice: b_frame_strategy only affects the first pass\n");
629         avctx->b_frame_strategy = 0;
630     }
631
632     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
633     if (i > 1) {
634         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
635         avctx->time_base.den /= i;
636         avctx->time_base.num /= i;
637         //return -1;
638     }
639
640     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
641         // (a + x * 3 / 8) / x
642         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
643         s->inter_quant_bias = 0;
644     } else {
645         s->intra_quant_bias = 0;
646         // (a - x / 4) / x
647         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
648     }
649
650     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
651         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
652         return AVERROR(EINVAL);
653     }
654
655     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
656         s->intra_quant_bias = avctx->intra_quant_bias;
657     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
658         s->inter_quant_bias = avctx->inter_quant_bias;
659
660     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
661
662     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
663         s->avctx->time_base.den > (1 << 16) - 1) {
664         av_log(avctx, AV_LOG_ERROR,
665                "timebase %d/%d not supported by MPEG 4 standard, "
666                "the maximum admitted value for the timebase denominator "
667                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
668                (1 << 16) - 1);
669         return -1;
670     }
671     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
672
673     switch (avctx->codec->id) {
674     case AV_CODEC_ID_MPEG1VIDEO:
675         s->out_format = FMT_MPEG1;
676         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
677         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
678         break;
679     case AV_CODEC_ID_MPEG2VIDEO:
680         s->out_format = FMT_MPEG1;
681         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
682         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
683         s->rtp_mode   = 1;
684         break;
685     case AV_CODEC_ID_MJPEG:
686     case AV_CODEC_ID_AMV:
687         s->out_format = FMT_MJPEG;
688         s->intra_only = 1; /* force intra only for jpeg */
689         if (!CONFIG_MJPEG_ENCODER ||
690             ff_mjpeg_encode_init(s) < 0)
691             return -1;
692         avctx->delay = 0;
693         s->low_delay = 1;
694         break;
695     case AV_CODEC_ID_H261:
696         if (!CONFIG_H261_ENCODER)
697             return -1;
698         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
699             av_log(avctx, AV_LOG_ERROR,
700                    "The specified picture size of %dx%d is not valid for the "
701                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
702                     s->width, s->height);
703             return -1;
704         }
705         s->out_format = FMT_H261;
706         avctx->delay  = 0;
707         s->low_delay  = 1;
708         break;
709     case AV_CODEC_ID_H263:
710         if (!CONFIG_H263_ENCODER)
711             return -1;
712         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
713                              s->width, s->height) == 8) {
714             av_log(avctx, AV_LOG_ERROR,
715                    "The specified picture size of %dx%d is not valid for "
716                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
717                    "352x288, 704x576, and 1408x1152. "
718                    "Try H.263+.\n", s->width, s->height);
719             return -1;
720         }
721         s->out_format = FMT_H263;
722         avctx->delay  = 0;
723         s->low_delay  = 1;
724         break;
725     case AV_CODEC_ID_H263P:
726         s->out_format = FMT_H263;
727         s->h263_plus  = 1;
728         /* Fx */
729         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
730         s->modified_quant  = s->h263_aic;
731         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
732         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
733
734         /* /Fx */
735         /* These are just to be sure */
736         avctx->delay = 0;
737         s->low_delay = 1;
738         break;
739     case AV_CODEC_ID_FLV1:
740         s->out_format      = FMT_H263;
741         s->h263_flv        = 2; /* format = 1; 11-bit codes */
742         s->unrestricted_mv = 1;
743         s->rtp_mode  = 0; /* don't allow GOB */
744         avctx->delay = 0;
745         s->low_delay = 1;
746         break;
747     case AV_CODEC_ID_RV10:
748         s->out_format = FMT_H263;
749         avctx->delay  = 0;
750         s->low_delay  = 1;
751         break;
752     case AV_CODEC_ID_RV20:
753         s->out_format      = FMT_H263;
754         avctx->delay       = 0;
755         s->low_delay       = 1;
756         s->modified_quant  = 1;
757         s->h263_aic        = 1;
758         s->h263_plus       = 1;
759         s->loop_filter     = 1;
760         s->unrestricted_mv = 0;
761         break;
762     case AV_CODEC_ID_MPEG4:
763         s->out_format      = FMT_H263;
764         s->h263_pred       = 1;
765         s->unrestricted_mv = 1;
766         s->low_delay       = s->max_b_frames ? 0 : 1;
767         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
768         break;
769     case AV_CODEC_ID_MSMPEG4V2:
770         s->out_format      = FMT_H263;
771         s->h263_pred       = 1;
772         s->unrestricted_mv = 1;
773         s->msmpeg4_version = 2;
774         avctx->delay       = 0;
775         s->low_delay       = 1;
776         break;
777     case AV_CODEC_ID_MSMPEG4V3:
778         s->out_format        = FMT_H263;
779         s->h263_pred         = 1;
780         s->unrestricted_mv   = 1;
781         s->msmpeg4_version   = 3;
782         s->flipflop_rounding = 1;
783         avctx->delay         = 0;
784         s->low_delay         = 1;
785         break;
786     case AV_CODEC_ID_WMV1:
787         s->out_format        = FMT_H263;
788         s->h263_pred         = 1;
789         s->unrestricted_mv   = 1;
790         s->msmpeg4_version   = 4;
791         s->flipflop_rounding = 1;
792         avctx->delay         = 0;
793         s->low_delay         = 1;
794         break;
795     case AV_CODEC_ID_WMV2:
796         s->out_format        = FMT_H263;
797         s->h263_pred         = 1;
798         s->unrestricted_mv   = 1;
799         s->msmpeg4_version   = 5;
800         s->flipflop_rounding = 1;
801         avctx->delay         = 0;
802         s->low_delay         = 1;
803         break;
804     default:
805         return -1;
806     }
807
808     avctx->has_b_frames = !s->low_delay;
809
810     s->encoding = 1;
811
812     s->progressive_frame    =
813     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
814                                                 CODEC_FLAG_INTERLACED_ME) ||
815                                 s->alternate_scan);
816
817     /* init */
818     if (ff_MPV_common_init(s) < 0)
819         return -1;
820
821     ff_fdctdsp_init(&s->fdsp, avctx);
822     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
823     ff_pixblockdsp_init(&s->pdsp, avctx);
824     ff_qpeldsp_init(&s->qdsp);
825
826     s->avctx->coded_frame = s->current_picture.f;
827
828     if (s->msmpeg4_version) {
829         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
830                           2 * 2 * (MAX_LEVEL + 1) *
831                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
832     }
833     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
834
835     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
836     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
837     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
838     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
839     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
840     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
841     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
842                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
843     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
844                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
845
846     if (s->avctx->noise_reduction) {
847         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
848                           2 * 64 * sizeof(uint16_t), fail);
849     }
850
851     ff_dct_encode_init(s);
852
853     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
854         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
855
856     s->quant_precision = 5;
857
858     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
859     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
860
861     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
862         ff_h261_encode_init(s);
863     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
864         ff_h263_encode_init(s);
865     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
866         ff_msmpeg4_encode_init(s);
867     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
868         && s->out_format == FMT_MPEG1)
869         ff_mpeg1_encode_init(s);
870
871     /* init q matrix */
872     for (i = 0; i < 64; i++) {
873         int j = s->idsp.idct_permutation[i];
874         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
875             s->mpeg_quant) {
876             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
877             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
878         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
879             s->intra_matrix[j] =
880             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
881         } else {
882             /* mpeg1/2 */
883             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
884             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
885         }
886         if (s->avctx->intra_matrix)
887             s->intra_matrix[j] = s->avctx->intra_matrix[i];
888         if (s->avctx->inter_matrix)
889             s->inter_matrix[j] = s->avctx->inter_matrix[i];
890     }
891
892     /* precompute matrix */
893     /* for mjpeg, we do include qscale in the matrix */
894     if (s->out_format != FMT_MJPEG) {
895         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
896                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
897                           31, 1);
898         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
899                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
900                           31, 0);
901     }
902
903     if (ff_rate_control_init(s) < 0)
904         return -1;
905
906 #if FF_API_ERROR_RATE
907     FF_DISABLE_DEPRECATION_WARNINGS
908     if (avctx->error_rate)
909         s->error_rate = avctx->error_rate;
910     FF_ENABLE_DEPRECATION_WARNINGS;
911 #endif
912
913 #if FF_API_NORMALIZE_AQP
914     FF_DISABLE_DEPRECATION_WARNINGS
915     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
916         s->mpv_flags |= FF_MPV_FLAG_NAQ;
917     FF_ENABLE_DEPRECATION_WARNINGS;
918 #endif
919
920 #if FF_API_MV0
921     FF_DISABLE_DEPRECATION_WARNINGS
922     if (avctx->flags & CODEC_FLAG_MV0)
923         s->mpv_flags |= FF_MPV_FLAG_MV0;
924     FF_ENABLE_DEPRECATION_WARNINGS
925 #endif
926
927     if (avctx->b_frame_strategy == 2) {
928         for (i = 0; i < s->max_b_frames + 2; i++) {
929             s->tmp_frames[i] = av_frame_alloc();
930             if (!s->tmp_frames[i])
931                 return AVERROR(ENOMEM);
932
933             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
934             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
935             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
936
937             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
938             if (ret < 0)
939                 return ret;
940         }
941     }
942
943     return 0;
944 fail:
945     ff_MPV_encode_end(avctx);
946     return AVERROR_UNKNOWN;
947 }
948
949 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
950 {
951     MpegEncContext *s = avctx->priv_data;
952     int i;
953
954     ff_rate_control_uninit(s);
955
956     ff_MPV_common_end(s);
957     if (CONFIG_MJPEG_ENCODER &&
958         s->out_format == FMT_MJPEG)
959         ff_mjpeg_encode_close(s);
960
961     av_freep(&avctx->extradata);
962
963     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
964         av_frame_free(&s->tmp_frames[i]);
965
966     ff_free_picture_tables(&s->new_picture);
967     ff_mpeg_unref_picture(s, &s->new_picture);
968
969     av_freep(&s->avctx->stats_out);
970     av_freep(&s->ac_stats);
971
972     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
973     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
974     s->q_chroma_intra_matrix=   NULL;
975     s->q_chroma_intra_matrix16= NULL;
976     av_freep(&s->q_intra_matrix);
977     av_freep(&s->q_inter_matrix);
978     av_freep(&s->q_intra_matrix16);
979     av_freep(&s->q_inter_matrix16);
980     av_freep(&s->input_picture);
981     av_freep(&s->reordered_input_picture);
982     av_freep(&s->dct_offset);
983
984     return 0;
985 }
986
987 static int get_sae(uint8_t *src, int ref, int stride)
988 {
989     int x,y;
990     int acc = 0;
991
992     for (y = 0; y < 16; y++) {
993         for (x = 0; x < 16; x++) {
994             acc += FFABS(src[x + y * stride] - ref);
995         }
996     }
997
998     return acc;
999 }
1000
1001 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1002                            uint8_t *ref, int stride)
1003 {
1004     int x, y, w, h;
1005     int acc = 0;
1006
1007     w = s->width  & ~15;
1008     h = s->height & ~15;
1009
1010     for (y = 0; y < h; y += 16) {
1011         for (x = 0; x < w; x += 16) {
1012             int offset = x + y * stride;
1013             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
1014                                      16);
1015             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1016             int sae  = get_sae(src + offset, mean, stride);
1017
1018             acc += sae + 500 < sad;
1019         }
1020     }
1021     return acc;
1022 }
1023
1024
1025 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1026 {
1027     Picture *pic = NULL;
1028     int64_t pts;
1029     int i, display_picture_number = 0, ret;
1030     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1031                                                  (s->low_delay ? 0 : 1);
1032     int direct = 1;
1033
1034     if (pic_arg) {
1035         pts = pic_arg->pts;
1036         display_picture_number = s->input_picture_number++;
1037
1038         if (pts != AV_NOPTS_VALUE) {
1039             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1040                 int64_t last = s->user_specified_pts;
1041
1042                 if (pts <= last) {
1043                     av_log(s->avctx, AV_LOG_ERROR,
1044                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1045                            pts, last);
1046                     return AVERROR(EINVAL);
1047                 }
1048
1049                 if (!s->low_delay && display_picture_number == 1)
1050                     s->dts_delta = pts - last;
1051             }
1052             s->user_specified_pts = pts;
1053         } else {
1054             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1055                 s->user_specified_pts =
1056                 pts = s->user_specified_pts + 1;
1057                 av_log(s->avctx, AV_LOG_INFO,
1058                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1059                        pts);
1060             } else {
1061                 pts = display_picture_number;
1062             }
1063         }
1064     }
1065
1066     if (pic_arg) {
1067         if (!pic_arg->buf[0])
1068             direct = 0;
1069         if (pic_arg->linesize[0] != s->linesize)
1070             direct = 0;
1071         if (pic_arg->linesize[1] != s->uvlinesize)
1072             direct = 0;
1073         if (pic_arg->linesize[2] != s->uvlinesize)
1074             direct = 0;
1075         if ((s->width & 15) || (s->height & 15))
1076             direct = 0;
1077         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1078             direct = 0;
1079         if (s->linesize & (STRIDE_ALIGN-1))
1080             direct = 0;
1081
1082         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1083                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1084
1085         if (direct) {
1086             i = ff_find_unused_picture(s, 1);
1087             if (i < 0)
1088                 return i;
1089
1090             pic = &s->picture[i];
1091             pic->reference = 3;
1092
1093             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1094                 return ret;
1095             if (ff_alloc_picture(s, pic, 1) < 0) {
1096                 return -1;
1097             }
1098         } else {
1099             i = ff_find_unused_picture(s, 0);
1100             if (i < 0)
1101                 return i;
1102
1103             pic = &s->picture[i];
1104             pic->reference = 3;
1105
1106             if (ff_alloc_picture(s, pic, 0) < 0) {
1107                 return -1;
1108             }
1109
1110             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1111                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1112                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1113                 // empty
1114             } else {
1115                 int h_chroma_shift, v_chroma_shift;
1116                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1117                                                  &h_chroma_shift,
1118                                                  &v_chroma_shift);
1119
1120                 for (i = 0; i < 3; i++) {
1121                     int src_stride = pic_arg->linesize[i];
1122                     int dst_stride = i ? s->uvlinesize : s->linesize;
1123                     int h_shift = i ? h_chroma_shift : 0;
1124                     int v_shift = i ? v_chroma_shift : 0;
1125                     int w = s->width  >> h_shift;
1126                     int h = s->height >> v_shift;
1127                     uint8_t *src = pic_arg->data[i];
1128                     uint8_t *dst = pic->f->data[i];
1129                     int vpad = 16;
1130
1131                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1132                         && !s->progressive_sequence)
1133                         vpad = 32;
1134
1135                     if (!s->avctx->rc_buffer_size)
1136                         dst += INPLACE_OFFSET;
1137
1138                     if (src_stride == dst_stride)
1139                         memcpy(dst, src, src_stride * h);
1140                     else {
1141                         int h2 = h;
1142                         uint8_t *dst2 = dst;
1143                         while (h2--) {
1144                             memcpy(dst2, src, w);
1145                             dst2 += dst_stride;
1146                             src += src_stride;
1147                         }
1148                     }
1149                     if ((s->width & 15) || (s->height & (vpad-1))) {
1150                         s->mpvencdsp.draw_edges(dst, dst_stride,
1151                                                 w, h,
1152                                                 16>>h_shift,
1153                                                 vpad>>v_shift,
1154                                                 EDGE_BOTTOM);
1155                     }
1156                 }
1157             }
1158         }
1159         ret = av_frame_copy_props(pic->f, pic_arg);
1160         if (ret < 0)
1161             return ret;
1162
1163         pic->f->display_picture_number = display_picture_number;
1164         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1165     }
1166
1167     /* shift buffer entries */
1168     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1169         s->input_picture[i - 1] = s->input_picture[i];
1170
1171     s->input_picture[encoding_delay] = (Picture*) pic;
1172
1173     return 0;
1174 }
1175
1176 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1177 {
1178     int x, y, plane;
1179     int score = 0;
1180     int64_t score64 = 0;
1181
1182     for (plane = 0; plane < 3; plane++) {
1183         const int stride = p->f->linesize[plane];
1184         const int bw = plane ? 1 : 2;
1185         for (y = 0; y < s->mb_height * bw; y++) {
1186             for (x = 0; x < s->mb_width * bw; x++) {
1187                 int off = p->shared ? 0 : 16;
1188                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1189                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1190                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1191
1192                 switch (FFABS(s->avctx->frame_skip_exp)) {
1193                 case 0: score    =  FFMAX(score, v);          break;
1194                 case 1: score   += FFABS(v);                  break;
1195                 case 2: score64 += v * (int64_t)v;                       break;
1196                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1197                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1198                 }
1199             }
1200         }
1201     }
1202     emms_c();
1203
1204     if (score)
1205         score64 = score;
1206     if (s->avctx->frame_skip_exp < 0)
1207         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1208                       -1.0/s->avctx->frame_skip_exp);
1209
1210     if (score64 < s->avctx->frame_skip_threshold)
1211         return 1;
1212     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1213         return 1;
1214     return 0;
1215 }
1216
1217 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1218 {
1219     AVPacket pkt = { 0 };
1220     int ret, got_output;
1221
1222     av_init_packet(&pkt);
1223     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1224     if (ret < 0)
1225         return ret;
1226
1227     ret = pkt.size;
1228     av_free_packet(&pkt);
1229     return ret;
1230 }
1231
1232 static int estimate_best_b_count(MpegEncContext *s)
1233 {
1234     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1235     AVCodecContext *c = avcodec_alloc_context3(NULL);
1236     const int scale = s->avctx->brd_scale;
1237     int i, j, out_size, p_lambda, b_lambda, lambda2;
1238     int64_t best_rd  = INT64_MAX;
1239     int best_b_count = -1;
1240
1241     av_assert0(scale >= 0 && scale <= 3);
1242
1243     //emms_c();
1244     //s->next_picture_ptr->quality;
1245     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1246     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1247     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1248     if (!b_lambda) // FIXME we should do this somewhere else
1249         b_lambda = p_lambda;
1250     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1251                FF_LAMBDA_SHIFT;
1252
1253     c->width        = s->width  >> scale;
1254     c->height       = s->height >> scale;
1255     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1256     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1257     c->mb_decision  = s->avctx->mb_decision;
1258     c->me_cmp       = s->avctx->me_cmp;
1259     c->mb_cmp       = s->avctx->mb_cmp;
1260     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1261     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1262     c->time_base    = s->avctx->time_base;
1263     c->max_b_frames = s->max_b_frames;
1264
1265     if (avcodec_open2(c, codec, NULL) < 0)
1266         return -1;
1267
1268     for (i = 0; i < s->max_b_frames + 2; i++) {
1269         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1270                                                 s->next_picture_ptr;
1271         uint8_t *data[4];
1272
1273         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1274             pre_input = *pre_input_ptr;
1275             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1276
1277             if (!pre_input.shared && i) {
1278                 data[0] += INPLACE_OFFSET;
1279                 data[1] += INPLACE_OFFSET;
1280                 data[2] += INPLACE_OFFSET;
1281             }
1282
1283             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1284                                        s->tmp_frames[i]->linesize[0],
1285                                        data[0],
1286                                        pre_input.f->linesize[0],
1287                                        c->width, c->height);
1288             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1289                                        s->tmp_frames[i]->linesize[1],
1290                                        data[1],
1291                                        pre_input.f->linesize[1],
1292                                        c->width >> 1, c->height >> 1);
1293             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1294                                        s->tmp_frames[i]->linesize[2],
1295                                        data[2],
1296                                        pre_input.f->linesize[2],
1297                                        c->width >> 1, c->height >> 1);
1298         }
1299     }
1300
1301     for (j = 0; j < s->max_b_frames + 1; j++) {
1302         int64_t rd = 0;
1303
1304         if (!s->input_picture[j])
1305             break;
1306
1307         c->error[0] = c->error[1] = c->error[2] = 0;
1308
1309         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1310         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1311
1312         out_size = encode_frame(c, s->tmp_frames[0]);
1313
1314         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1315
1316         for (i = 0; i < s->max_b_frames + 1; i++) {
1317             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1318
1319             s->tmp_frames[i + 1]->pict_type = is_p ?
1320                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1321             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1322
1323             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1324
1325             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1326         }
1327
1328         /* get the delayed frames */
1329         while (out_size) {
1330             out_size = encode_frame(c, NULL);
1331             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1332         }
1333
1334         rd += c->error[0] + c->error[1] + c->error[2];
1335
1336         if (rd < best_rd) {
1337             best_rd = rd;
1338             best_b_count = j;
1339         }
1340     }
1341
1342     avcodec_close(c);
1343     av_freep(&c);
1344
1345     return best_b_count;
1346 }
1347
1348 static int select_input_picture(MpegEncContext *s)
1349 {
1350     int i, ret;
1351
1352     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1353         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1354     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1355
1356     /* set next picture type & ordering */
1357     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1358         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1359             if (s->picture_in_gop_number < s->gop_size &&
1360                 s->next_picture_ptr &&
1361                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1362                 // FIXME check that te gop check above is +-1 correct
1363                 av_frame_unref(s->input_picture[0]->f);
1364
1365                 ff_vbv_update(s, 0);
1366
1367                 goto no_output_pic;
1368             }
1369         }
1370
1371         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1372             s->next_picture_ptr == NULL || s->intra_only) {
1373             s->reordered_input_picture[0] = s->input_picture[0];
1374             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1375             s->reordered_input_picture[0]->f->coded_picture_number =
1376                 s->coded_picture_number++;
1377         } else {
1378             int b_frames;
1379
1380             if (s->flags & CODEC_FLAG_PASS2) {
1381                 for (i = 0; i < s->max_b_frames + 1; i++) {
1382                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1383
1384                     if (pict_num >= s->rc_context.num_entries)
1385                         break;
1386                     if (!s->input_picture[i]) {
1387                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1388                         break;
1389                     }
1390
1391                     s->input_picture[i]->f->pict_type =
1392                         s->rc_context.entry[pict_num].new_pict_type;
1393                 }
1394             }
1395
1396             if (s->avctx->b_frame_strategy == 0) {
1397                 b_frames = s->max_b_frames;
1398                 while (b_frames && !s->input_picture[b_frames])
1399                     b_frames--;
1400             } else if (s->avctx->b_frame_strategy == 1) {
1401                 for (i = 1; i < s->max_b_frames + 1; i++) {
1402                     if (s->input_picture[i] &&
1403                         s->input_picture[i]->b_frame_score == 0) {
1404                         s->input_picture[i]->b_frame_score =
1405                             get_intra_count(s,
1406                                             s->input_picture[i    ]->f->data[0],
1407                                             s->input_picture[i - 1]->f->data[0],
1408                                             s->linesize) + 1;
1409                     }
1410                 }
1411                 for (i = 0; i < s->max_b_frames + 1; i++) {
1412                     if (s->input_picture[i] == NULL ||
1413                         s->input_picture[i]->b_frame_score - 1 >
1414                             s->mb_num / s->avctx->b_sensitivity)
1415                         break;
1416                 }
1417
1418                 b_frames = FFMAX(0, i - 1);
1419
1420                 /* reset scores */
1421                 for (i = 0; i < b_frames + 1; i++) {
1422                     s->input_picture[i]->b_frame_score = 0;
1423                 }
1424             } else if (s->avctx->b_frame_strategy == 2) {
1425                 b_frames = estimate_best_b_count(s);
1426             } else {
1427                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1428                 b_frames = 0;
1429             }
1430
1431             emms_c();
1432
1433             for (i = b_frames - 1; i >= 0; i--) {
1434                 int type = s->input_picture[i]->f->pict_type;
1435                 if (type && type != AV_PICTURE_TYPE_B)
1436                     b_frames = i;
1437             }
1438             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1439                 b_frames == s->max_b_frames) {
1440                 av_log(s->avctx, AV_LOG_ERROR,
1441                        "warning, too many b frames in a row\n");
1442             }
1443
1444             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1445                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1446                     s->gop_size > s->picture_in_gop_number) {
1447                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1448                 } else {
1449                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1450                         b_frames = 0;
1451                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1452                 }
1453             }
1454
1455             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1456                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1457                 b_frames--;
1458
1459             s->reordered_input_picture[0] = s->input_picture[b_frames];
1460             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1461                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1462             s->reordered_input_picture[0]->f->coded_picture_number =
1463                 s->coded_picture_number++;
1464             for (i = 0; i < b_frames; i++) {
1465                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1466                 s->reordered_input_picture[i + 1]->f->pict_type =
1467                     AV_PICTURE_TYPE_B;
1468                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1469                     s->coded_picture_number++;
1470             }
1471         }
1472     }
1473 no_output_pic:
1474     if (s->reordered_input_picture[0]) {
1475         s->reordered_input_picture[0]->reference =
1476            s->reordered_input_picture[0]->f->pict_type !=
1477                AV_PICTURE_TYPE_B ? 3 : 0;
1478
1479         ff_mpeg_unref_picture(s, &s->new_picture);
1480         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1481             return ret;
1482
1483         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1484             // input is a shared pix, so we can't modifiy it -> alloc a new
1485             // one & ensure that the shared one is reuseable
1486
1487             Picture *pic;
1488             int i = ff_find_unused_picture(s, 0);
1489             if (i < 0)
1490                 return i;
1491             pic = &s->picture[i];
1492
1493             pic->reference = s->reordered_input_picture[0]->reference;
1494             if (ff_alloc_picture(s, pic, 0) < 0) {
1495                 return -1;
1496             }
1497
1498             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1499             if (ret < 0)
1500                 return ret;
1501
1502             /* mark us unused / free shared pic */
1503             av_frame_unref(s->reordered_input_picture[0]->f);
1504             s->reordered_input_picture[0]->shared = 0;
1505
1506             s->current_picture_ptr = pic;
1507         } else {
1508             // input is not a shared pix -> reuse buffer for current_pix
1509             s->current_picture_ptr = s->reordered_input_picture[0];
1510             for (i = 0; i < 4; i++) {
1511                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1512             }
1513         }
1514         ff_mpeg_unref_picture(s, &s->current_picture);
1515         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1516                                        s->current_picture_ptr)) < 0)
1517             return ret;
1518
1519         s->picture_number = s->new_picture.f->display_picture_number;
1520     } else {
1521         ff_mpeg_unref_picture(s, &s->new_picture);
1522     }
1523     return 0;
1524 }
1525
1526 static void frame_end(MpegEncContext *s)
1527 {
1528     if (s->unrestricted_mv &&
1529         s->current_picture.reference &&
1530         !s->intra_only) {
1531         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1532         int hshift = desc->log2_chroma_w;
1533         int vshift = desc->log2_chroma_h;
1534         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1535                                 s->current_picture.f->linesize[0],
1536                                 s->h_edge_pos, s->v_edge_pos,
1537                                 EDGE_WIDTH, EDGE_WIDTH,
1538                                 EDGE_TOP | EDGE_BOTTOM);
1539         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1540                                 s->current_picture.f->linesize[1],
1541                                 s->h_edge_pos >> hshift,
1542                                 s->v_edge_pos >> vshift,
1543                                 EDGE_WIDTH >> hshift,
1544                                 EDGE_WIDTH >> vshift,
1545                                 EDGE_TOP | EDGE_BOTTOM);
1546         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1547                                 s->current_picture.f->linesize[2],
1548                                 s->h_edge_pos >> hshift,
1549                                 s->v_edge_pos >> vshift,
1550                                 EDGE_WIDTH >> hshift,
1551                                 EDGE_WIDTH >> vshift,
1552                                 EDGE_TOP | EDGE_BOTTOM);
1553     }
1554
1555     emms_c();
1556
1557     s->last_pict_type                 = s->pict_type;
1558     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1559     if (s->pict_type!= AV_PICTURE_TYPE_B)
1560         s->last_non_b_pict_type = s->pict_type;
1561
1562     s->avctx->coded_frame = s->current_picture_ptr->f;
1563
1564 }
1565
1566 static void update_noise_reduction(MpegEncContext *s)
1567 {
1568     int intra, i;
1569
1570     for (intra = 0; intra < 2; intra++) {
1571         if (s->dct_count[intra] > (1 << 16)) {
1572             for (i = 0; i < 64; i++) {
1573                 s->dct_error_sum[intra][i] >>= 1;
1574             }
1575             s->dct_count[intra] >>= 1;
1576         }
1577
1578         for (i = 0; i < 64; i++) {
1579             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1580                                        s->dct_count[intra] +
1581                                        s->dct_error_sum[intra][i] / 2) /
1582                                       (s->dct_error_sum[intra][i] + 1);
1583         }
1584     }
1585 }
1586
1587 static int frame_start(MpegEncContext *s)
1588 {
1589     int ret;
1590
1591     /* mark & release old frames */
1592     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1593         s->last_picture_ptr != s->next_picture_ptr &&
1594         s->last_picture_ptr->f->buf[0]) {
1595         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1596     }
1597
1598     s->current_picture_ptr->f->pict_type = s->pict_type;
1599     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1600
1601     ff_mpeg_unref_picture(s, &s->current_picture);
1602     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1603                                    s->current_picture_ptr)) < 0)
1604         return ret;
1605
1606     if (s->pict_type != AV_PICTURE_TYPE_B) {
1607         s->last_picture_ptr = s->next_picture_ptr;
1608         if (!s->droppable)
1609             s->next_picture_ptr = s->current_picture_ptr;
1610     }
1611
1612     if (s->last_picture_ptr) {
1613         ff_mpeg_unref_picture(s, &s->last_picture);
1614         if (s->last_picture_ptr->f->buf[0] &&
1615             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1616                                        s->last_picture_ptr)) < 0)
1617             return ret;
1618     }
1619     if (s->next_picture_ptr) {
1620         ff_mpeg_unref_picture(s, &s->next_picture);
1621         if (s->next_picture_ptr->f->buf[0] &&
1622             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1623                                        s->next_picture_ptr)) < 0)
1624             return ret;
1625     }
1626
1627     if (s->picture_structure!= PICT_FRAME) {
1628         int i;
1629         for (i = 0; i < 4; i++) {
1630             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1631                 s->current_picture.f->data[i] +=
1632                     s->current_picture.f->linesize[i];
1633             }
1634             s->current_picture.f->linesize[i] *= 2;
1635             s->last_picture.f->linesize[i]    *= 2;
1636             s->next_picture.f->linesize[i]    *= 2;
1637         }
1638     }
1639
1640     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1641         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1642         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1643     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1644         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1645         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1646     } else {
1647         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1648         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1649     }
1650
1651     if (s->dct_error_sum) {
1652         av_assert2(s->avctx->noise_reduction && s->encoding);
1653         update_noise_reduction(s);
1654     }
1655
1656     return 0;
1657 }
1658
1659 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1660                           const AVFrame *pic_arg, int *got_packet)
1661 {
1662     MpegEncContext *s = avctx->priv_data;
1663     int i, stuffing_count, ret;
1664     int context_count = s->slice_context_count;
1665
1666     s->picture_in_gop_number++;
1667
1668     if (load_input_picture(s, pic_arg) < 0)
1669         return -1;
1670
1671     if (select_input_picture(s) < 0) {
1672         return -1;
1673     }
1674
1675     /* output? */
1676     if (s->new_picture.f->data[0]) {
1677         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1678             return ret;
1679         if (s->mb_info) {
1680             s->mb_info_ptr = av_packet_new_side_data(pkt,
1681                                  AV_PKT_DATA_H263_MB_INFO,
1682                                  s->mb_width*s->mb_height*12);
1683             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1684         }
1685
1686         for (i = 0; i < context_count; i++) {
1687             int start_y = s->thread_context[i]->start_mb_y;
1688             int   end_y = s->thread_context[i]->  end_mb_y;
1689             int h       = s->mb_height;
1690             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1691             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1692
1693             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1694         }
1695
1696         s->pict_type = s->new_picture.f->pict_type;
1697         //emms_c();
1698         ret = frame_start(s);
1699         if (ret < 0)
1700             return ret;
1701 vbv_retry:
1702         if (encode_picture(s, s->picture_number) < 0)
1703             return -1;
1704
1705         avctx->header_bits = s->header_bits;
1706         avctx->mv_bits     = s->mv_bits;
1707         avctx->misc_bits   = s->misc_bits;
1708         avctx->i_tex_bits  = s->i_tex_bits;
1709         avctx->p_tex_bits  = s->p_tex_bits;
1710         avctx->i_count     = s->i_count;
1711         // FIXME f/b_count in avctx
1712         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1713         avctx->skip_count  = s->skip_count;
1714
1715         frame_end(s);
1716
1717         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1718             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1719
1720         if (avctx->rc_buffer_size) {
1721             RateControlContext *rcc = &s->rc_context;
1722             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1723
1724             if (put_bits_count(&s->pb) > max_size &&
1725                 s->lambda < s->avctx->lmax) {
1726                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1727                                        (s->qscale + 1) / s->qscale);
1728                 if (s->adaptive_quant) {
1729                     int i;
1730                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1731                         s->lambda_table[i] =
1732                             FFMAX(s->lambda_table[i] + 1,
1733                                   s->lambda_table[i] * (s->qscale + 1) /
1734                                   s->qscale);
1735                 }
1736                 s->mb_skipped = 0;        // done in frame_start()
1737                 // done in encode_picture() so we must undo it
1738                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1739                     if (s->flipflop_rounding          ||
1740                         s->codec_id == AV_CODEC_ID_H263P ||
1741                         s->codec_id == AV_CODEC_ID_MPEG4)
1742                         s->no_rounding ^= 1;
1743                 }
1744                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1745                     s->time_base       = s->last_time_base;
1746                     s->last_non_b_time = s->time - s->pp_time;
1747                 }
1748                 for (i = 0; i < context_count; i++) {
1749                     PutBitContext *pb = &s->thread_context[i]->pb;
1750                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1751                 }
1752                 goto vbv_retry;
1753             }
1754
1755             av_assert0(s->avctx->rc_max_rate);
1756         }
1757
1758         if (s->flags & CODEC_FLAG_PASS1)
1759             ff_write_pass1_stats(s);
1760
1761         for (i = 0; i < 4; i++) {
1762             s->current_picture_ptr->f->error[i] =
1763             s->current_picture.f->error[i] =
1764                 s->current_picture.error[i];
1765             avctx->error[i] += s->current_picture_ptr->f->error[i];
1766         }
1767
1768         if (s->flags & CODEC_FLAG_PASS1)
1769             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1770                    avctx->i_tex_bits + avctx->p_tex_bits ==
1771                        put_bits_count(&s->pb));
1772         flush_put_bits(&s->pb);
1773         s->frame_bits  = put_bits_count(&s->pb);
1774
1775         stuffing_count = ff_vbv_update(s, s->frame_bits);
1776         s->stuffing_bits = 8*stuffing_count;
1777         if (stuffing_count) {
1778             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1779                     stuffing_count + 50) {
1780                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1781                 return -1;
1782             }
1783
1784             switch (s->codec_id) {
1785             case AV_CODEC_ID_MPEG1VIDEO:
1786             case AV_CODEC_ID_MPEG2VIDEO:
1787                 while (stuffing_count--) {
1788                     put_bits(&s->pb, 8, 0);
1789                 }
1790             break;
1791             case AV_CODEC_ID_MPEG4:
1792                 put_bits(&s->pb, 16, 0);
1793                 put_bits(&s->pb, 16, 0x1C3);
1794                 stuffing_count -= 4;
1795                 while (stuffing_count--) {
1796                     put_bits(&s->pb, 8, 0xFF);
1797                 }
1798             break;
1799             default:
1800                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1801             }
1802             flush_put_bits(&s->pb);
1803             s->frame_bits  = put_bits_count(&s->pb);
1804         }
1805
1806         /* update mpeg1/2 vbv_delay for CBR */
1807         if (s->avctx->rc_max_rate                          &&
1808             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1809             s->out_format == FMT_MPEG1                     &&
1810             90000LL * (avctx->rc_buffer_size - 1) <=
1811                 s->avctx->rc_max_rate * 0xFFFFLL) {
1812             int vbv_delay, min_delay;
1813             double inbits  = s->avctx->rc_max_rate *
1814                              av_q2d(s->avctx->time_base);
1815             int    minbits = s->frame_bits - 8 *
1816                              (s->vbv_delay_ptr - s->pb.buf - 1);
1817             double bits    = s->rc_context.buffer_index + minbits - inbits;
1818
1819             if (bits < 0)
1820                 av_log(s->avctx, AV_LOG_ERROR,
1821                        "Internal error, negative bits\n");
1822
1823             assert(s->repeat_first_field == 0);
1824
1825             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1826             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1827                         s->avctx->rc_max_rate;
1828
1829             vbv_delay = FFMAX(vbv_delay, min_delay);
1830
1831             av_assert0(vbv_delay < 0xFFFF);
1832
1833             s->vbv_delay_ptr[0] &= 0xF8;
1834             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1835             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1836             s->vbv_delay_ptr[2] &= 0x07;
1837             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1838             avctx->vbv_delay     = vbv_delay * 300;
1839         }
1840         s->total_bits     += s->frame_bits;
1841         avctx->frame_bits  = s->frame_bits;
1842
1843         pkt->pts = s->current_picture.f->pts;
1844         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1845             if (!s->current_picture.f->coded_picture_number)
1846                 pkt->dts = pkt->pts - s->dts_delta;
1847             else
1848                 pkt->dts = s->reordered_pts;
1849             s->reordered_pts = pkt->pts;
1850         } else
1851             pkt->dts = pkt->pts;
1852         if (s->current_picture.f->key_frame)
1853             pkt->flags |= AV_PKT_FLAG_KEY;
1854         if (s->mb_info)
1855             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1856     } else {
1857         s->frame_bits = 0;
1858     }
1859
1860     /* release non-reference frames */
1861     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1862         if (!s->picture[i].reference)
1863             ff_mpeg_unref_picture(s, &s->picture[i]);
1864     }
1865
1866     av_assert1((s->frame_bits & 7) == 0);
1867
1868     pkt->size = s->frame_bits / 8;
1869     *got_packet = !!pkt->size;
1870     return 0;
1871 }
1872
1873 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1874                                                 int n, int threshold)
1875 {
1876     static const char tab[64] = {
1877         3, 2, 2, 1, 1, 1, 1, 1,
1878         1, 1, 1, 1, 1, 1, 1, 1,
1879         1, 1, 1, 1, 1, 1, 1, 1,
1880         0, 0, 0, 0, 0, 0, 0, 0,
1881         0, 0, 0, 0, 0, 0, 0, 0,
1882         0, 0, 0, 0, 0, 0, 0, 0,
1883         0, 0, 0, 0, 0, 0, 0, 0,
1884         0, 0, 0, 0, 0, 0, 0, 0
1885     };
1886     int score = 0;
1887     int run = 0;
1888     int i;
1889     int16_t *block = s->block[n];
1890     const int last_index = s->block_last_index[n];
1891     int skip_dc;
1892
1893     if (threshold < 0) {
1894         skip_dc = 0;
1895         threshold = -threshold;
1896     } else
1897         skip_dc = 1;
1898
1899     /* Are all we could set to zero already zero? */
1900     if (last_index <= skip_dc - 1)
1901         return;
1902
1903     for (i = 0; i <= last_index; i++) {
1904         const int j = s->intra_scantable.permutated[i];
1905         const int level = FFABS(block[j]);
1906         if (level == 1) {
1907             if (skip_dc && i == 0)
1908                 continue;
1909             score += tab[run];
1910             run = 0;
1911         } else if (level > 1) {
1912             return;
1913         } else {
1914             run++;
1915         }
1916     }
1917     if (score >= threshold)
1918         return;
1919     for (i = skip_dc; i <= last_index; i++) {
1920         const int j = s->intra_scantable.permutated[i];
1921         block[j] = 0;
1922     }
1923     if (block[0])
1924         s->block_last_index[n] = 0;
1925     else
1926         s->block_last_index[n] = -1;
1927 }
1928
1929 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1930                                int last_index)
1931 {
1932     int i;
1933     const int maxlevel = s->max_qcoeff;
1934     const int minlevel = s->min_qcoeff;
1935     int overflow = 0;
1936
1937     if (s->mb_intra) {
1938         i = 1; // skip clipping of intra dc
1939     } else
1940         i = 0;
1941
1942     for (; i <= last_index; i++) {
1943         const int j = s->intra_scantable.permutated[i];
1944         int level = block[j];
1945
1946         if (level > maxlevel) {
1947             level = maxlevel;
1948             overflow++;
1949         } else if (level < minlevel) {
1950             level = minlevel;
1951             overflow++;
1952         }
1953
1954         block[j] = level;
1955     }
1956
1957     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1958         av_log(s->avctx, AV_LOG_INFO,
1959                "warning, clipping %d dct coefficients to %d..%d\n",
1960                overflow, minlevel, maxlevel);
1961 }
1962
1963 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1964 {
1965     int x, y;
1966     // FIXME optimize
1967     for (y = 0; y < 8; y++) {
1968         for (x = 0; x < 8; x++) {
1969             int x2, y2;
1970             int sum = 0;
1971             int sqr = 0;
1972             int count = 0;
1973
1974             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1975                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1976                     int v = ptr[x2 + y2 * stride];
1977                     sum += v;
1978                     sqr += v * v;
1979                     count++;
1980                 }
1981             }
1982             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1983         }
1984     }
1985 }
1986
1987 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1988                                                 int motion_x, int motion_y,
1989                                                 int mb_block_height,
1990                                                 int mb_block_width,
1991                                                 int mb_block_count)
1992 {
1993     int16_t weight[12][64];
1994     int16_t orig[12][64];
1995     const int mb_x = s->mb_x;
1996     const int mb_y = s->mb_y;
1997     int i;
1998     int skip_dct[12];
1999     int dct_offset = s->linesize * 8; // default for progressive frames
2000     int uv_dct_offset = s->uvlinesize * 8;
2001     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2002     ptrdiff_t wrap_y, wrap_c;
2003
2004     for (i = 0; i < mb_block_count; i++)
2005         skip_dct[i] = s->skipdct;
2006
2007     if (s->adaptive_quant) {
2008         const int last_qp = s->qscale;
2009         const int mb_xy = mb_x + mb_y * s->mb_stride;
2010
2011         s->lambda = s->lambda_table[mb_xy];
2012         update_qscale(s);
2013
2014         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2015             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2016             s->dquant = s->qscale - last_qp;
2017
2018             if (s->out_format == FMT_H263) {
2019                 s->dquant = av_clip(s->dquant, -2, 2);
2020
2021                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2022                     if (!s->mb_intra) {
2023                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2024                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2025                                 s->dquant = 0;
2026                         }
2027                         if (s->mv_type == MV_TYPE_8X8)
2028                             s->dquant = 0;
2029                     }
2030                 }
2031             }
2032         }
2033         ff_set_qscale(s, last_qp + s->dquant);
2034     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2035         ff_set_qscale(s, s->qscale + s->dquant);
2036
2037     wrap_y = s->linesize;
2038     wrap_c = s->uvlinesize;
2039     ptr_y  = s->new_picture.f->data[0] +
2040              (mb_y * 16 * wrap_y)              + mb_x * 16;
2041     ptr_cb = s->new_picture.f->data[1] +
2042              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2043     ptr_cr = s->new_picture.f->data[2] +
2044              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2045
2046     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2047         uint8_t *ebuf = s->edge_emu_buffer + 36 * wrap_y;
2048         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2049         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2050         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2051                                  wrap_y, wrap_y,
2052                                  16, 16, mb_x * 16, mb_y * 16,
2053                                  s->width, s->height);
2054         ptr_y = ebuf;
2055         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2056                                  wrap_c, wrap_c,
2057                                  mb_block_width, mb_block_height,
2058                                  mb_x * mb_block_width, mb_y * mb_block_height,
2059                                  cw, ch);
2060         ptr_cb = ebuf + 16 * wrap_y;
2061         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2062                                  wrap_c, wrap_c,
2063                                  mb_block_width, mb_block_height,
2064                                  mb_x * mb_block_width, mb_y * mb_block_height,
2065                                  cw, ch);
2066         ptr_cr = ebuf + 16 * wrap_y + 16;
2067     }
2068
2069     if (s->mb_intra) {
2070         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2071             int progressive_score, interlaced_score;
2072
2073             s->interlaced_dct = 0;
2074             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
2075                                                     NULL, wrap_y, 8) +
2076                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2077                                                     NULL, wrap_y, 8) - 400;
2078
2079             if (progressive_score > 0) {
2080                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
2081                                                        NULL, wrap_y * 2, 8) +
2082                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
2083                                                        NULL, wrap_y * 2, 8);
2084                 if (progressive_score > interlaced_score) {
2085                     s->interlaced_dct = 1;
2086
2087                     dct_offset = wrap_y;
2088                     uv_dct_offset = wrap_c;
2089                     wrap_y <<= 1;
2090                     if (s->chroma_format == CHROMA_422 ||
2091                         s->chroma_format == CHROMA_444)
2092                         wrap_c <<= 1;
2093                 }
2094             }
2095         }
2096
2097         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2098         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2099         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2100         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2101
2102         if (s->flags & CODEC_FLAG_GRAY) {
2103             skip_dct[4] = 1;
2104             skip_dct[5] = 1;
2105         } else {
2106             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2107             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2108             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2109                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2110                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2111             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2112                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2113                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2114                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2115                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2116                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2117                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2118             }
2119         }
2120     } else {
2121         op_pixels_func (*op_pix)[4];
2122         qpel_mc_func (*op_qpix)[16];
2123         uint8_t *dest_y, *dest_cb, *dest_cr;
2124
2125         dest_y  = s->dest[0];
2126         dest_cb = s->dest[1];
2127         dest_cr = s->dest[2];
2128
2129         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2130             op_pix  = s->hdsp.put_pixels_tab;
2131             op_qpix = s->qdsp.put_qpel_pixels_tab;
2132         } else {
2133             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2134             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2135         }
2136
2137         if (s->mv_dir & MV_DIR_FORWARD) {
2138             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
2139                           s->last_picture.f->data,
2140                           op_pix, op_qpix);
2141             op_pix  = s->hdsp.avg_pixels_tab;
2142             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2143         }
2144         if (s->mv_dir & MV_DIR_BACKWARD) {
2145             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
2146                           s->next_picture.f->data,
2147                           op_pix, op_qpix);
2148         }
2149
2150         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2151             int progressive_score, interlaced_score;
2152
2153             s->interlaced_dct = 0;
2154             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
2155                                                     ptr_y,              wrap_y,
2156                                                     8) +
2157                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
2158                                                     ptr_y + wrap_y * 8, wrap_y,
2159                                                     8) - 400;
2160
2161             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2162                 progressive_score -= 400;
2163
2164             if (progressive_score > 0) {
2165                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
2166                                                        ptr_y,
2167                                                        wrap_y * 2, 8) +
2168                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
2169                                                        ptr_y + wrap_y,
2170                                                        wrap_y * 2, 8);
2171
2172                 if (progressive_score > interlaced_score) {
2173                     s->interlaced_dct = 1;
2174
2175                     dct_offset = wrap_y;
2176                     uv_dct_offset = wrap_c;
2177                     wrap_y <<= 1;
2178                     if (s->chroma_format == CHROMA_422)
2179                         wrap_c <<= 1;
2180                 }
2181             }
2182         }
2183
2184         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2185         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2186         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2187                             dest_y + dct_offset, wrap_y);
2188         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2189                             dest_y + dct_offset + 8, wrap_y);
2190
2191         if (s->flags & CODEC_FLAG_GRAY) {
2192             skip_dct[4] = 1;
2193             skip_dct[5] = 1;
2194         } else {
2195             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2196             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2197             if (!s->chroma_y_shift) { /* 422 */
2198                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2199                                     dest_cb + uv_dct_offset, wrap_c);
2200                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2201                                     dest_cr + uv_dct_offset, wrap_c);
2202             }
2203         }
2204         /* pre quantization */
2205         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2206                 2 * s->qscale * s->qscale) {
2207             // FIXME optimize
2208             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2209                               wrap_y, 8) < 20 * s->qscale)
2210                 skip_dct[0] = 1;
2211             if (s->dsp.sad[1](NULL, ptr_y + 8,
2212                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2213                 skip_dct[1] = 1;
2214             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2215                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2216                 skip_dct[2] = 1;
2217             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2218                               dest_y + dct_offset + 8,
2219                               wrap_y, 8) < 20 * s->qscale)
2220                 skip_dct[3] = 1;
2221             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2222                               wrap_c, 8) < 20 * s->qscale)
2223                 skip_dct[4] = 1;
2224             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2225                               wrap_c, 8) < 20 * s->qscale)
2226                 skip_dct[5] = 1;
2227             if (!s->chroma_y_shift) { /* 422 */
2228                 if (s->dsp.sad[1](NULL, ptr_cb + uv_dct_offset,
2229                                   dest_cb + uv_dct_offset,
2230                                   wrap_c, 8) < 20 * s->qscale)
2231                     skip_dct[6] = 1;
2232                 if (s->dsp.sad[1](NULL, ptr_cr + uv_dct_offset,
2233                                   dest_cr + uv_dct_offset,
2234                                   wrap_c, 8) < 20 * s->qscale)
2235                     skip_dct[7] = 1;
2236             }
2237         }
2238     }
2239
2240     if (s->quantizer_noise_shaping) {
2241         if (!skip_dct[0])
2242             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2243         if (!skip_dct[1])
2244             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2245         if (!skip_dct[2])
2246             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2247         if (!skip_dct[3])
2248             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2249         if (!skip_dct[4])
2250             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2251         if (!skip_dct[5])
2252             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2253         if (!s->chroma_y_shift) { /* 422 */
2254             if (!skip_dct[6])
2255                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2256                                   wrap_c);
2257             if (!skip_dct[7])
2258                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2259                                   wrap_c);
2260         }
2261         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2262     }
2263
2264     /* DCT & quantize */
2265     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2266     {
2267         for (i = 0; i < mb_block_count; i++) {
2268             if (!skip_dct[i]) {
2269                 int overflow;
2270                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2271                 // FIXME we could decide to change to quantizer instead of
2272                 // clipping
2273                 // JS: I don't think that would be a good idea it could lower
2274                 //     quality instead of improve it. Just INTRADC clipping
2275                 //     deserves changes in quantizer
2276                 if (overflow)
2277                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2278             } else
2279                 s->block_last_index[i] = -1;
2280         }
2281         if (s->quantizer_noise_shaping) {
2282             for (i = 0; i < mb_block_count; i++) {
2283                 if (!skip_dct[i]) {
2284                     s->block_last_index[i] =
2285                         dct_quantize_refine(s, s->block[i], weight[i],
2286                                             orig[i], i, s->qscale);
2287                 }
2288             }
2289         }
2290
2291         if (s->luma_elim_threshold && !s->mb_intra)
2292             for (i = 0; i < 4; i++)
2293                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2294         if (s->chroma_elim_threshold && !s->mb_intra)
2295             for (i = 4; i < mb_block_count; i++)
2296                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2297
2298         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2299             for (i = 0; i < mb_block_count; i++) {
2300                 if (s->block_last_index[i] == -1)
2301                     s->coded_score[i] = INT_MAX / 256;
2302             }
2303         }
2304     }
2305
2306     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2307         s->block_last_index[4] =
2308         s->block_last_index[5] = 0;
2309         s->block[4][0] =
2310         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2311         if (!s->chroma_y_shift) { /* 422 / 444 */
2312             for (i=6; i<12; i++) {
2313                 s->block_last_index[i] = 0;
2314                 s->block[i][0] = s->block[4][0];
2315             }
2316         }
2317     }
2318
2319     // non c quantize code returns incorrect block_last_index FIXME
2320     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2321         for (i = 0; i < mb_block_count; i++) {
2322             int j;
2323             if (s->block_last_index[i] > 0) {
2324                 for (j = 63; j > 0; j--) {
2325                     if (s->block[i][s->intra_scantable.permutated[j]])
2326                         break;
2327                 }
2328                 s->block_last_index[i] = j;
2329             }
2330         }
2331     }
2332
2333     /* huffman encode */
2334     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2335     case AV_CODEC_ID_MPEG1VIDEO:
2336     case AV_CODEC_ID_MPEG2VIDEO:
2337         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2338             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2339         break;
2340     case AV_CODEC_ID_MPEG4:
2341         if (CONFIG_MPEG4_ENCODER)
2342             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2343         break;
2344     case AV_CODEC_ID_MSMPEG4V2:
2345     case AV_CODEC_ID_MSMPEG4V3:
2346     case AV_CODEC_ID_WMV1:
2347         if (CONFIG_MSMPEG4_ENCODER)
2348             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2349         break;
2350     case AV_CODEC_ID_WMV2:
2351         if (CONFIG_WMV2_ENCODER)
2352             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2353         break;
2354     case AV_CODEC_ID_H261:
2355         if (CONFIG_H261_ENCODER)
2356             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2357         break;
2358     case AV_CODEC_ID_H263:
2359     case AV_CODEC_ID_H263P:
2360     case AV_CODEC_ID_FLV1:
2361     case AV_CODEC_ID_RV10:
2362     case AV_CODEC_ID_RV20:
2363         if (CONFIG_H263_ENCODER)
2364             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2365         break;
2366     case AV_CODEC_ID_MJPEG:
2367     case AV_CODEC_ID_AMV:
2368         if (CONFIG_MJPEG_ENCODER)
2369             ff_mjpeg_encode_mb(s, s->block);
2370         break;
2371     default:
2372         av_assert1(0);
2373     }
2374 }
2375
2376 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2377 {
2378     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2379     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2380     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2381 }
2382
2383 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2384     int i;
2385
2386     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2387
2388     /* mpeg1 */
2389     d->mb_skip_run= s->mb_skip_run;
2390     for(i=0; i<3; i++)
2391         d->last_dc[i] = s->last_dc[i];
2392
2393     /* statistics */
2394     d->mv_bits= s->mv_bits;
2395     d->i_tex_bits= s->i_tex_bits;
2396     d->p_tex_bits= s->p_tex_bits;
2397     d->i_count= s->i_count;
2398     d->f_count= s->f_count;
2399     d->b_count= s->b_count;
2400     d->skip_count= s->skip_count;
2401     d->misc_bits= s->misc_bits;
2402     d->last_bits= 0;
2403
2404     d->mb_skipped= 0;
2405     d->qscale= s->qscale;
2406     d->dquant= s->dquant;
2407
2408     d->esc3_level_length= s->esc3_level_length;
2409 }
2410
2411 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2412     int i;
2413
2414     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2415     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2416
2417     /* mpeg1 */
2418     d->mb_skip_run= s->mb_skip_run;
2419     for(i=0; i<3; i++)
2420         d->last_dc[i] = s->last_dc[i];
2421
2422     /* statistics */
2423     d->mv_bits= s->mv_bits;
2424     d->i_tex_bits= s->i_tex_bits;
2425     d->p_tex_bits= s->p_tex_bits;
2426     d->i_count= s->i_count;
2427     d->f_count= s->f_count;
2428     d->b_count= s->b_count;
2429     d->skip_count= s->skip_count;
2430     d->misc_bits= s->misc_bits;
2431
2432     d->mb_intra= s->mb_intra;
2433     d->mb_skipped= s->mb_skipped;
2434     d->mv_type= s->mv_type;
2435     d->mv_dir= s->mv_dir;
2436     d->pb= s->pb;
2437     if(s->data_partitioning){
2438         d->pb2= s->pb2;
2439         d->tex_pb= s->tex_pb;
2440     }
2441     d->block= s->block;
2442     for(i=0; i<8; i++)
2443         d->block_last_index[i]= s->block_last_index[i];
2444     d->interlaced_dct= s->interlaced_dct;
2445     d->qscale= s->qscale;
2446
2447     d->esc3_level_length= s->esc3_level_length;
2448 }
2449
2450 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2451                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2452                            int *dmin, int *next_block, int motion_x, int motion_y)
2453 {
2454     int score;
2455     uint8_t *dest_backup[3];
2456
2457     copy_context_before_encode(s, backup, type);
2458
2459     s->block= s->blocks[*next_block];
2460     s->pb= pb[*next_block];
2461     if(s->data_partitioning){
2462         s->pb2   = pb2   [*next_block];
2463         s->tex_pb= tex_pb[*next_block];
2464     }
2465
2466     if(*next_block){
2467         memcpy(dest_backup, s->dest, sizeof(s->dest));
2468         s->dest[0] = s->rd_scratchpad;
2469         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2470         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2471         av_assert0(s->linesize >= 32); //FIXME
2472     }
2473
2474     encode_mb(s, motion_x, motion_y);
2475
2476     score= put_bits_count(&s->pb);
2477     if(s->data_partitioning){
2478         score+= put_bits_count(&s->pb2);
2479         score+= put_bits_count(&s->tex_pb);
2480     }
2481
2482     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2483         ff_MPV_decode_mb(s, s->block);
2484
2485         score *= s->lambda2;
2486         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2487     }
2488
2489     if(*next_block){
2490         memcpy(s->dest, dest_backup, sizeof(s->dest));
2491     }
2492
2493     if(score<*dmin){
2494         *dmin= score;
2495         *next_block^=1;
2496
2497         copy_context_after_encode(best, s, type);
2498     }
2499 }
2500
2501 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2502     uint32_t *sq = ff_square_tab + 256;
2503     int acc=0;
2504     int x,y;
2505
2506     if(w==16 && h==16)
2507         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2508     else if(w==8 && h==8)
2509         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2510
2511     for(y=0; y<h; y++){
2512         for(x=0; x<w; x++){
2513             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2514         }
2515     }
2516
2517     av_assert2(acc>=0);
2518
2519     return acc;
2520 }
2521
2522 static int sse_mb(MpegEncContext *s){
2523     int w= 16;
2524     int h= 16;
2525
2526     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2527     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2528
2529     if(w==16 && h==16)
2530       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2531         return  s->dsp.nsse[0](s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2532                +s->dsp.nsse[1](s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2533                +s->dsp.nsse[1](s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2534       }else{
2535         return  s->dsp.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2536                +s->dsp.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2537                +s->dsp.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2538       }
2539     else
2540         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2541                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2542                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2543 }
2544
2545 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2546     MpegEncContext *s= *(void**)arg;
2547
2548
2549     s->me.pre_pass=1;
2550     s->me.dia_size= s->avctx->pre_dia_size;
2551     s->first_slice_line=1;
2552     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2553         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2554             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2555         }
2556         s->first_slice_line=0;
2557     }
2558
2559     s->me.pre_pass=0;
2560
2561     return 0;
2562 }
2563
2564 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2565     MpegEncContext *s= *(void**)arg;
2566
2567     ff_check_alignment();
2568
2569     s->me.dia_size= s->avctx->dia_size;
2570     s->first_slice_line=1;
2571     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2572         s->mb_x=0; //for block init below
2573         ff_init_block_index(s);
2574         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2575             s->block_index[0]+=2;
2576             s->block_index[1]+=2;
2577             s->block_index[2]+=2;
2578             s->block_index[3]+=2;
2579
2580             /* compute motion vector & mb_type and store in context */
2581             if(s->pict_type==AV_PICTURE_TYPE_B)
2582                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2583             else
2584                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2585         }
2586         s->first_slice_line=0;
2587     }
2588     return 0;
2589 }
2590
2591 static int mb_var_thread(AVCodecContext *c, void *arg){
2592     MpegEncContext *s= *(void**)arg;
2593     int mb_x, mb_y;
2594
2595     ff_check_alignment();
2596
2597     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2598         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2599             int xx = mb_x * 16;
2600             int yy = mb_y * 16;
2601             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2602             int varc;
2603             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2604
2605             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2606                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2607
2608             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2609             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2610             s->me.mb_var_sum_temp    += varc;
2611         }
2612     }
2613     return 0;
2614 }
2615
2616 static void write_slice_end(MpegEncContext *s){
2617     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2618         if(s->partitioned_frame){
2619             ff_mpeg4_merge_partitions(s);
2620         }
2621
2622         ff_mpeg4_stuffing(&s->pb);
2623     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2624         ff_mjpeg_encode_stuffing(s);
2625     }
2626
2627     avpriv_align_put_bits(&s->pb);
2628     flush_put_bits(&s->pb);
2629
2630     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2631         s->misc_bits+= get_bits_diff(s);
2632 }
2633
2634 static void write_mb_info(MpegEncContext *s)
2635 {
2636     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2637     int offset = put_bits_count(&s->pb);
2638     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2639     int gobn = s->mb_y / s->gob_index;
2640     int pred_x, pred_y;
2641     if (CONFIG_H263_ENCODER)
2642         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2643     bytestream_put_le32(&ptr, offset);
2644     bytestream_put_byte(&ptr, s->qscale);
2645     bytestream_put_byte(&ptr, gobn);
2646     bytestream_put_le16(&ptr, mba);
2647     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2648     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2649     /* 4MV not implemented */
2650     bytestream_put_byte(&ptr, 0); /* hmv2 */
2651     bytestream_put_byte(&ptr, 0); /* vmv2 */
2652 }
2653
2654 static void update_mb_info(MpegEncContext *s, int startcode)
2655 {
2656     if (!s->mb_info)
2657         return;
2658     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2659         s->mb_info_size += 12;
2660         s->prev_mb_info = s->last_mb_info;
2661     }
2662     if (startcode) {
2663         s->prev_mb_info = put_bits_count(&s->pb)/8;
2664         /* This might have incremented mb_info_size above, and we return without
2665          * actually writing any info into that slot yet. But in that case,
2666          * this will be called again at the start of the after writing the
2667          * start code, actually writing the mb info. */
2668         return;
2669     }
2670
2671     s->last_mb_info = put_bits_count(&s->pb)/8;
2672     if (!s->mb_info_size)
2673         s->mb_info_size += 12;
2674     write_mb_info(s);
2675 }
2676
2677 static int encode_thread(AVCodecContext *c, void *arg){
2678     MpegEncContext *s= *(void**)arg;
2679     int mb_x, mb_y, pdif = 0;
2680     int chr_h= 16>>s->chroma_y_shift;
2681     int i, j;
2682     MpegEncContext best_s, backup_s;
2683     uint8_t bit_buf[2][MAX_MB_BYTES];
2684     uint8_t bit_buf2[2][MAX_MB_BYTES];
2685     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2686     PutBitContext pb[2], pb2[2], tex_pb[2];
2687
2688     ff_check_alignment();
2689
2690     for(i=0; i<2; i++){
2691         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2692         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2693         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2694     }
2695
2696     s->last_bits= put_bits_count(&s->pb);
2697     s->mv_bits=0;
2698     s->misc_bits=0;
2699     s->i_tex_bits=0;
2700     s->p_tex_bits=0;
2701     s->i_count=0;
2702     s->f_count=0;
2703     s->b_count=0;
2704     s->skip_count=0;
2705
2706     for(i=0; i<3; i++){
2707         /* init last dc values */
2708         /* note: quant matrix value (8) is implied here */
2709         s->last_dc[i] = 128 << s->intra_dc_precision;
2710
2711         s->current_picture.error[i] = 0;
2712     }
2713     if(s->codec_id==AV_CODEC_ID_AMV){
2714         s->last_dc[0] = 128*8/13;
2715         s->last_dc[1] = 128*8/14;
2716         s->last_dc[2] = 128*8/14;
2717     }
2718     s->mb_skip_run = 0;
2719     memset(s->last_mv, 0, sizeof(s->last_mv));
2720
2721     s->last_mv_dir = 0;
2722
2723     switch(s->codec_id){
2724     case AV_CODEC_ID_H263:
2725     case AV_CODEC_ID_H263P:
2726     case AV_CODEC_ID_FLV1:
2727         if (CONFIG_H263_ENCODER)
2728             s->gob_index = ff_h263_get_gob_height(s);
2729         break;
2730     case AV_CODEC_ID_MPEG4:
2731         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2732             ff_mpeg4_init_partitions(s);
2733         break;
2734     }
2735
2736     s->resync_mb_x=0;
2737     s->resync_mb_y=0;
2738     s->first_slice_line = 1;
2739     s->ptr_lastgob = s->pb.buf;
2740     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2741         s->mb_x=0;
2742         s->mb_y= mb_y;
2743
2744         ff_set_qscale(s, s->qscale);
2745         ff_init_block_index(s);
2746
2747         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2748             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2749             int mb_type= s->mb_type[xy];
2750 //            int d;
2751             int dmin= INT_MAX;
2752             int dir;
2753
2754             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2755                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2756                 return -1;
2757             }
2758             if(s->data_partitioning){
2759                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2760                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2761                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2762                     return -1;
2763                 }
2764             }
2765
2766             s->mb_x = mb_x;
2767             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2768             ff_update_block_index(s);
2769
2770             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2771                 ff_h261_reorder_mb_index(s);
2772                 xy= s->mb_y*s->mb_stride + s->mb_x;
2773                 mb_type= s->mb_type[xy];
2774             }
2775
2776             /* write gob / video packet header  */
2777             if(s->rtp_mode){
2778                 int current_packet_size, is_gob_start;
2779
2780                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2781
2782                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2783
2784                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2785
2786                 switch(s->codec_id){
2787                 case AV_CODEC_ID_H263:
2788                 case AV_CODEC_ID_H263P:
2789                     if(!s->h263_slice_structured)
2790                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2791                     break;
2792                 case AV_CODEC_ID_MPEG2VIDEO:
2793                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2794                 case AV_CODEC_ID_MPEG1VIDEO:
2795                     if(s->mb_skip_run) is_gob_start=0;
2796                     break;
2797                 case AV_CODEC_ID_MJPEG:
2798                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2799                     break;
2800                 }
2801
2802                 if(is_gob_start){
2803                     if(s->start_mb_y != mb_y || mb_x!=0){
2804                         write_slice_end(s);
2805
2806                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2807                             ff_mpeg4_init_partitions(s);
2808                         }
2809                     }
2810
2811                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2812                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2813
2814                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2815                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2816                         int d = 100 / s->error_rate;
2817                         if(r % d == 0){
2818                             current_packet_size=0;
2819                             s->pb.buf_ptr= s->ptr_lastgob;
2820                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2821                         }
2822                     }
2823
2824                     if (s->avctx->rtp_callback){
2825                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2826                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2827                     }
2828                     update_mb_info(s, 1);
2829
2830                     switch(s->codec_id){
2831                     case AV_CODEC_ID_MPEG4:
2832                         if (CONFIG_MPEG4_ENCODER) {
2833                             ff_mpeg4_encode_video_packet_header(s);
2834                             ff_mpeg4_clean_buffers(s);
2835                         }
2836                     break;
2837                     case AV_CODEC_ID_MPEG1VIDEO:
2838                     case AV_CODEC_ID_MPEG2VIDEO:
2839                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2840                             ff_mpeg1_encode_slice_header(s);
2841                             ff_mpeg1_clean_buffers(s);
2842                         }
2843                     break;
2844                     case AV_CODEC_ID_H263:
2845                     case AV_CODEC_ID_H263P:
2846                         if (CONFIG_H263_ENCODER)
2847                             ff_h263_encode_gob_header(s, mb_y);
2848                     break;
2849                     }
2850
2851                     if(s->flags&CODEC_FLAG_PASS1){
2852                         int bits= put_bits_count(&s->pb);
2853                         s->misc_bits+= bits - s->last_bits;
2854                         s->last_bits= bits;
2855                     }
2856
2857                     s->ptr_lastgob += current_packet_size;
2858                     s->first_slice_line=1;
2859                     s->resync_mb_x=mb_x;
2860                     s->resync_mb_y=mb_y;
2861                 }
2862             }
2863
2864             if(  (s->resync_mb_x   == s->mb_x)
2865                && s->resync_mb_y+1 == s->mb_y){
2866                 s->first_slice_line=0;
2867             }
2868
2869             s->mb_skipped=0;
2870             s->dquant=0; //only for QP_RD
2871
2872             update_mb_info(s, 0);
2873
2874             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2875                 int next_block=0;
2876                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2877
2878                 copy_context_before_encode(&backup_s, s, -1);
2879                 backup_s.pb= s->pb;
2880                 best_s.data_partitioning= s->data_partitioning;
2881                 best_s.partitioned_frame= s->partitioned_frame;
2882                 if(s->data_partitioning){
2883                     backup_s.pb2= s->pb2;
2884                     backup_s.tex_pb= s->tex_pb;
2885                 }
2886
2887                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2888                     s->mv_dir = MV_DIR_FORWARD;
2889                     s->mv_type = MV_TYPE_16X16;
2890                     s->mb_intra= 0;
2891                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2892                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2893                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2894                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2895                 }
2896                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2897                     s->mv_dir = MV_DIR_FORWARD;
2898                     s->mv_type = MV_TYPE_FIELD;
2899                     s->mb_intra= 0;
2900                     for(i=0; i<2; i++){
2901                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2902                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2903                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2904                     }
2905                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2906                                  &dmin, &next_block, 0, 0);
2907                 }
2908                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2909                     s->mv_dir = MV_DIR_FORWARD;
2910                     s->mv_type = MV_TYPE_16X16;
2911                     s->mb_intra= 0;
2912                     s->mv[0][0][0] = 0;
2913                     s->mv[0][0][1] = 0;
2914                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2915                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2916                 }
2917                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2918                     s->mv_dir = MV_DIR_FORWARD;
2919                     s->mv_type = MV_TYPE_8X8;
2920                     s->mb_intra= 0;
2921                     for(i=0; i<4; i++){
2922                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2923                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2924                     }
2925                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2926                                  &dmin, &next_block, 0, 0);
2927                 }
2928                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2929                     s->mv_dir = MV_DIR_FORWARD;
2930                     s->mv_type = MV_TYPE_16X16;
2931                     s->mb_intra= 0;
2932                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2933                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2934                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2935                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2936                 }
2937                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2938                     s->mv_dir = MV_DIR_BACKWARD;
2939                     s->mv_type = MV_TYPE_16X16;
2940                     s->mb_intra= 0;
2941                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2942                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2943                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2944                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2945                 }
2946                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2947                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2948                     s->mv_type = MV_TYPE_16X16;
2949                     s->mb_intra= 0;
2950                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2951                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2952                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2953                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2954                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2955                                  &dmin, &next_block, 0, 0);
2956                 }
2957                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2958                     s->mv_dir = MV_DIR_FORWARD;
2959                     s->mv_type = MV_TYPE_FIELD;
2960                     s->mb_intra= 0;
2961                     for(i=0; i<2; i++){
2962                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2963                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2964                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2965                     }
2966                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2967                                  &dmin, &next_block, 0, 0);
2968                 }
2969                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2970                     s->mv_dir = MV_DIR_BACKWARD;
2971                     s->mv_type = MV_TYPE_FIELD;
2972                     s->mb_intra= 0;
2973                     for(i=0; i<2; i++){
2974                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2975                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2976                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2977                     }
2978                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2979                                  &dmin, &next_block, 0, 0);
2980                 }
2981                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2982                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2983                     s->mv_type = MV_TYPE_FIELD;
2984                     s->mb_intra= 0;
2985                     for(dir=0; dir<2; dir++){
2986                         for(i=0; i<2; i++){
2987                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2988                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2989                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2990                         }
2991                     }
2992                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2993                                  &dmin, &next_block, 0, 0);
2994                 }
2995                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2996                     s->mv_dir = 0;
2997                     s->mv_type = MV_TYPE_16X16;
2998                     s->mb_intra= 1;
2999                     s->mv[0][0][0] = 0;
3000                     s->mv[0][0][1] = 0;
3001                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3002                                  &dmin, &next_block, 0, 0);
3003                     if(s->h263_pred || s->h263_aic){
3004                         if(best_s.mb_intra)
3005                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3006                         else
3007                             ff_clean_intra_table_entries(s); //old mode?
3008                     }
3009                 }
3010
3011                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3012                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3013                         const int last_qp= backup_s.qscale;
3014                         int qpi, qp, dc[6];
3015                         int16_t ac[6][16];
3016                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3017                         static const int dquant_tab[4]={-1,1,-2,2};
3018                         int storecoefs = s->mb_intra && s->dc_val[0];
3019
3020                         av_assert2(backup_s.dquant == 0);
3021
3022                         //FIXME intra
3023                         s->mv_dir= best_s.mv_dir;
3024                         s->mv_type = MV_TYPE_16X16;
3025                         s->mb_intra= best_s.mb_intra;
3026                         s->mv[0][0][0] = best_s.mv[0][0][0];
3027                         s->mv[0][0][1] = best_s.mv[0][0][1];
3028                         s->mv[1][0][0] = best_s.mv[1][0][0];
3029                         s->mv[1][0][1] = best_s.mv[1][0][1];
3030
3031                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3032                         for(; qpi<4; qpi++){
3033                             int dquant= dquant_tab[qpi];
3034                             qp= last_qp + dquant;
3035                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3036                                 continue;
3037                             backup_s.dquant= dquant;
3038                             if(storecoefs){
3039                                 for(i=0; i<6; i++){
3040                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3041                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3042                                 }
3043                             }
3044
3045                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3046                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3047                             if(best_s.qscale != qp){
3048                                 if(storecoefs){
3049                                     for(i=0; i<6; i++){
3050                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3051                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3052                                     }
3053                                 }
3054                             }
3055                         }
3056                     }
3057                 }
3058                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3059                     int mx= s->b_direct_mv_table[xy][0];
3060                     int my= s->b_direct_mv_table[xy][1];
3061
3062                     backup_s.dquant = 0;
3063                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3064                     s->mb_intra= 0;
3065                     ff_mpeg4_set_direct_mv(s, mx, my);
3066                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3067                                  &dmin, &next_block, mx, my);
3068                 }
3069                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3070                     backup_s.dquant = 0;
3071                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3072                     s->mb_intra= 0;
3073                     ff_mpeg4_set_direct_mv(s, 0, 0);
3074                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3075                                  &dmin, &next_block, 0, 0);
3076                 }
3077                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3078                     int coded=0;
3079                     for(i=0; i<6; i++)
3080                         coded |= s->block_last_index[i];
3081                     if(coded){
3082                         int mx,my;
3083                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3084                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3085                             mx=my=0; //FIXME find the one we actually used
3086                             ff_mpeg4_set_direct_mv(s, mx, my);
3087                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3088                             mx= s->mv[1][0][0];
3089                             my= s->mv[1][0][1];
3090                         }else{
3091                             mx= s->mv[0][0][0];
3092                             my= s->mv[0][0][1];
3093                         }
3094
3095                         s->mv_dir= best_s.mv_dir;
3096                         s->mv_type = best_s.mv_type;
3097                         s->mb_intra= 0;
3098 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3099                         s->mv[0][0][1] = best_s.mv[0][0][1];
3100                         s->mv[1][0][0] = best_s.mv[1][0][0];
3101                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3102                         backup_s.dquant= 0;
3103                         s->skipdct=1;
3104                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3105                                         &dmin, &next_block, mx, my);
3106                         s->skipdct=0;
3107                     }
3108                 }
3109
3110                 s->current_picture.qscale_table[xy] = best_s.qscale;
3111
3112                 copy_context_after_encode(s, &best_s, -1);
3113
3114                 pb_bits_count= put_bits_count(&s->pb);
3115                 flush_put_bits(&s->pb);
3116                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3117                 s->pb= backup_s.pb;
3118
3119                 if(s->data_partitioning){
3120                     pb2_bits_count= put_bits_count(&s->pb2);
3121                     flush_put_bits(&s->pb2);
3122                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3123                     s->pb2= backup_s.pb2;
3124
3125                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3126                     flush_put_bits(&s->tex_pb);
3127                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3128                     s->tex_pb= backup_s.tex_pb;
3129                 }
3130                 s->last_bits= put_bits_count(&s->pb);
3131
3132                 if (CONFIG_H263_ENCODER &&
3133                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3134                     ff_h263_update_motion_val(s);
3135
3136                 if(next_block==0){ //FIXME 16 vs linesize16
3137                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3138                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3139                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3140                 }
3141
3142                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3143                     ff_MPV_decode_mb(s, s->block);
3144             } else {
3145                 int motion_x = 0, motion_y = 0;
3146                 s->mv_type=MV_TYPE_16X16;
3147                 // only one MB-Type possible
3148
3149                 switch(mb_type){
3150                 case CANDIDATE_MB_TYPE_INTRA:
3151                     s->mv_dir = 0;
3152                     s->mb_intra= 1;
3153                     motion_x= s->mv[0][0][0] = 0;
3154                     motion_y= s->mv[0][0][1] = 0;
3155                     break;
3156                 case CANDIDATE_MB_TYPE_INTER:
3157                     s->mv_dir = MV_DIR_FORWARD;
3158                     s->mb_intra= 0;
3159                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3160                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3161                     break;
3162                 case CANDIDATE_MB_TYPE_INTER_I:
3163                     s->mv_dir = MV_DIR_FORWARD;
3164                     s->mv_type = MV_TYPE_FIELD;
3165                     s->mb_intra= 0;
3166                     for(i=0; i<2; i++){
3167                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3168                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3169                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3170                     }
3171                     break;
3172                 case CANDIDATE_MB_TYPE_INTER4V:
3173                     s->mv_dir = MV_DIR_FORWARD;
3174                     s->mv_type = MV_TYPE_8X8;
3175                     s->mb_intra= 0;
3176                     for(i=0; i<4; i++){
3177                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3178                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3179                     }
3180                     break;
3181                 case CANDIDATE_MB_TYPE_DIRECT:
3182                     if (CONFIG_MPEG4_ENCODER) {
3183                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3184                         s->mb_intra= 0;
3185                         motion_x=s->b_direct_mv_table[xy][0];
3186                         motion_y=s->b_direct_mv_table[xy][1];
3187                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3188                     }
3189                     break;
3190                 case CANDIDATE_MB_TYPE_DIRECT0:
3191                     if (CONFIG_MPEG4_ENCODER) {
3192                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3193                         s->mb_intra= 0;
3194                         ff_mpeg4_set_direct_mv(s, 0, 0);
3195                     }
3196                     break;
3197                 case CANDIDATE_MB_TYPE_BIDIR:
3198                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3199                     s->mb_intra= 0;
3200                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3201                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3202                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3203                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3204                     break;
3205                 case CANDIDATE_MB_TYPE_BACKWARD:
3206                     s->mv_dir = MV_DIR_BACKWARD;
3207                     s->mb_intra= 0;
3208                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3209                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3210                     break;
3211                 case CANDIDATE_MB_TYPE_FORWARD:
3212                     s->mv_dir = MV_DIR_FORWARD;
3213                     s->mb_intra= 0;
3214                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3215                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3216                     break;
3217                 case CANDIDATE_MB_TYPE_FORWARD_I:
3218                     s->mv_dir = MV_DIR_FORWARD;
3219                     s->mv_type = MV_TYPE_FIELD;
3220                     s->mb_intra= 0;
3221                     for(i=0; i<2; i++){
3222                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3223                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3224                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3225                     }
3226                     break;
3227                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3228                     s->mv_dir = MV_DIR_BACKWARD;
3229                     s->mv_type = MV_TYPE_FIELD;
3230                     s->mb_intra= 0;
3231                     for(i=0; i<2; i++){
3232                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3233                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3234                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3235                     }
3236                     break;
3237                 case CANDIDATE_MB_TYPE_BIDIR_I:
3238                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3239                     s->mv_type = MV_TYPE_FIELD;
3240                     s->mb_intra= 0;
3241                     for(dir=0; dir<2; dir++){
3242                         for(i=0; i<2; i++){
3243                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3244                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3245                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3246                         }
3247                     }
3248                     break;
3249                 default:
3250                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3251                 }
3252
3253                 encode_mb(s, motion_x, motion_y);
3254
3255                 // RAL: Update last macroblock type
3256                 s->last_mv_dir = s->mv_dir;
3257
3258                 if (CONFIG_H263_ENCODER &&
3259                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3260                     ff_h263_update_motion_val(s);
3261
3262                 ff_MPV_decode_mb(s, s->block);
3263             }
3264
3265             /* clean the MV table in IPS frames for direct mode in B frames */
3266             if(s->mb_intra /* && I,P,S_TYPE */){
3267                 s->p_mv_table[xy][0]=0;
3268                 s->p_mv_table[xy][1]=0;
3269             }
3270
3271             if(s->flags&CODEC_FLAG_PSNR){
3272                 int w= 16;
3273                 int h= 16;
3274
3275                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3276                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3277
3278                 s->current_picture.error[0] += sse(
3279                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3280                     s->dest[0], w, h, s->linesize);
3281                 s->current_picture.error[1] += sse(
3282                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3283                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3284                 s->current_picture.error[2] += sse(
3285                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3286                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3287             }
3288             if(s->loop_filter){
3289                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3290                     ff_h263_loop_filter(s);
3291             }
3292             av_dlog(s->avctx, "MB %d %d bits\n",
3293                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3294         }
3295     }
3296
3297     //not beautiful here but we must write it before flushing so it has to be here
3298     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3299         ff_msmpeg4_encode_ext_header(s);
3300
3301     write_slice_end(s);
3302
3303     /* Send the last GOB if RTP */
3304     if (s->avctx->rtp_callback) {
3305         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3306         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3307         /* Call the RTP callback to send the last GOB */
3308         emms_c();
3309         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3310     }
3311
3312     return 0;
3313 }
3314
3315 #define MERGE(field) dst->field += src->field; src->field=0
3316 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3317     MERGE(me.scene_change_score);
3318     MERGE(me.mc_mb_var_sum_temp);
3319     MERGE(me.mb_var_sum_temp);
3320 }
3321
3322 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3323     int i;
3324
3325     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3326     MERGE(dct_count[1]);
3327     MERGE(mv_bits);
3328     MERGE(i_tex_bits);
3329     MERGE(p_tex_bits);
3330     MERGE(i_count);
3331     MERGE(f_count);
3332     MERGE(b_count);
3333     MERGE(skip_count);
3334     MERGE(misc_bits);
3335     MERGE(er.error_count);
3336     MERGE(padding_bug_score);
3337     MERGE(current_picture.error[0]);
3338     MERGE(current_picture.error[1]);
3339     MERGE(current_picture.error[2]);
3340
3341     if(dst->avctx->noise_reduction){
3342         for(i=0; i<64; i++){
3343             MERGE(dct_error_sum[0][i]);
3344             MERGE(dct_error_sum[1][i]);
3345         }
3346     }
3347
3348     assert(put_bits_count(&src->pb) % 8 ==0);
3349     assert(put_bits_count(&dst->pb) % 8 ==0);
3350     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3351     flush_put_bits(&dst->pb);
3352 }
3353
3354 static int estimate_qp(MpegEncContext *s, int dry_run){
3355     if (s->next_lambda){
3356         s->current_picture_ptr->f->quality =
3357         s->current_picture.f->quality = s->next_lambda;
3358         if(!dry_run) s->next_lambda= 0;
3359     } else if (!s->fixed_qscale) {
3360         s->current_picture_ptr->f->quality =
3361         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3362         if (s->current_picture.f->quality < 0)
3363             return -1;
3364     }
3365
3366     if(s->adaptive_quant){
3367         switch(s->codec_id){
3368         case AV_CODEC_ID_MPEG4:
3369             if (CONFIG_MPEG4_ENCODER)
3370                 ff_clean_mpeg4_qscales(s);
3371             break;
3372         case AV_CODEC_ID_H263:
3373         case AV_CODEC_ID_H263P:
3374         case AV_CODEC_ID_FLV1:
3375             if (CONFIG_H263_ENCODER)
3376                 ff_clean_h263_qscales(s);
3377             break;
3378         default:
3379             ff_init_qscale_tab(s);
3380         }
3381
3382         s->lambda= s->lambda_table[0];
3383         //FIXME broken
3384     }else
3385         s->lambda = s->current_picture.f->quality;
3386     update_qscale(s);
3387     return 0;
3388 }
3389
3390 /* must be called before writing the header */
3391 static void set_frame_distances(MpegEncContext * s){
3392     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3393     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3394
3395     if(s->pict_type==AV_PICTURE_TYPE_B){
3396         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3397         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3398     }else{
3399         s->pp_time= s->time - s->last_non_b_time;
3400         s->last_non_b_time= s->time;
3401         assert(s->picture_number==0 || s->pp_time > 0);
3402     }
3403 }
3404
3405 static int encode_picture(MpegEncContext *s, int picture_number)
3406 {
3407     int i, ret;
3408     int bits;
3409     int context_count = s->slice_context_count;
3410
3411     s->picture_number = picture_number;
3412
3413     /* Reset the average MB variance */
3414     s->me.mb_var_sum_temp    =
3415     s->me.mc_mb_var_sum_temp = 0;
3416
3417     /* we need to initialize some time vars before we can encode b-frames */
3418     // RAL: Condition added for MPEG1VIDEO
3419     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3420         set_frame_distances(s);
3421     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3422         ff_set_mpeg4_time(s);
3423
3424     s->me.scene_change_score=0;
3425
3426 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3427
3428     if(s->pict_type==AV_PICTURE_TYPE_I){
3429         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3430         else                        s->no_rounding=0;
3431     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3432         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3433             s->no_rounding ^= 1;
3434     }
3435
3436     if(s->flags & CODEC_FLAG_PASS2){
3437         if (estimate_qp(s,1) < 0)
3438             return -1;
3439         ff_get_2pass_fcode(s);
3440     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3441         if(s->pict_type==AV_PICTURE_TYPE_B)
3442             s->lambda= s->last_lambda_for[s->pict_type];
3443         else
3444             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3445         update_qscale(s);
3446     }
3447
3448     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3449         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3450         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3451         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3452         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3453     }
3454
3455     s->mb_intra=0; //for the rate distortion & bit compare functions
3456     for(i=1; i<context_count; i++){
3457         ret = ff_update_duplicate_context(s->thread_context[i], s);
3458         if (ret < 0)
3459             return ret;
3460     }
3461
3462     if(ff_init_me(s)<0)
3463         return -1;
3464
3465     /* Estimate motion for every MB */
3466     if(s->pict_type != AV_PICTURE_TYPE_I){
3467         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3468         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3469         if (s->pict_type != AV_PICTURE_TYPE_B) {
3470             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3471                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3472             }
3473         }
3474
3475         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3476     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3477         /* I-Frame */
3478         for(i=0; i<s->mb_stride*s->mb_height; i++)
3479             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3480
3481         if(!s->fixed_qscale){
3482             /* finding spatial complexity for I-frame rate control */
3483             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3484         }
3485     }
3486     for(i=1; i<context_count; i++){
3487         merge_context_after_me(s, s->thread_context[i]);
3488     }
3489     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3490     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3491     emms_c();
3492
3493     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3494         s->pict_type= AV_PICTURE_TYPE_I;
3495         for(i=0; i<s->mb_stride*s->mb_height; i++)
3496             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3497         if(s->msmpeg4_version >= 3)
3498             s->no_rounding=1;
3499         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3500                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3501     }
3502
3503     if(!s->umvplus){
3504         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3505             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3506
3507             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3508                 int a,b;
3509                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3510                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3511                 s->f_code= FFMAX3(s->f_code, a, b);
3512             }
3513
3514             ff_fix_long_p_mvs(s);
3515             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3516             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3517                 int j;
3518                 for(i=0; i<2; i++){
3519                     for(j=0; j<2; j++)
3520                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3521                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3522                 }
3523             }
3524         }
3525
3526         if(s->pict_type==AV_PICTURE_TYPE_B){
3527             int a, b;
3528
3529             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3530             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3531             s->f_code = FFMAX(a, b);
3532
3533             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3534             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3535             s->b_code = FFMAX(a, b);
3536
3537             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3538             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3539             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3540             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3541             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3542                 int dir, j;
3543                 for(dir=0; dir<2; dir++){
3544                     for(i=0; i<2; i++){
3545                         for(j=0; j<2; j++){
3546                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3547                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3548                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3549                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3550                         }
3551                     }
3552                 }
3553             }
3554         }
3555     }
3556
3557     if (estimate_qp(s, 0) < 0)
3558         return -1;
3559
3560     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3561         s->qscale= 3; //reduce clipping problems
3562
3563     if (s->out_format == FMT_MJPEG) {
3564         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3565         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3566
3567         if (s->avctx->intra_matrix) {
3568             chroma_matrix =
3569             luma_matrix = s->avctx->intra_matrix;
3570         }
3571         if (s->avctx->chroma_intra_matrix)
3572             chroma_matrix = s->avctx->chroma_intra_matrix;
3573
3574         /* for mjpeg, we do include qscale in the matrix */
3575         for(i=1;i<64;i++){
3576             int j = s->idsp.idct_permutation[i];
3577
3578             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3579             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3580         }
3581         s->y_dc_scale_table=
3582         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3583         s->chroma_intra_matrix[0] =
3584         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3585         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3586                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3587         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3588                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3589         s->qscale= 8;
3590     }
3591     if(s->codec_id == AV_CODEC_ID_AMV){
3592         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3593         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3594         for(i=1;i<64;i++){
3595             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3596
3597             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3598             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3599         }
3600         s->y_dc_scale_table= y;
3601         s->c_dc_scale_table= c;
3602         s->intra_matrix[0] = 13;
3603         s->chroma_intra_matrix[0] = 14;
3604         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3605                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3606         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3607                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3608         s->qscale= 8;
3609     }
3610
3611     //FIXME var duplication
3612     s->current_picture_ptr->f->key_frame =
3613     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3614     s->current_picture_ptr->f->pict_type =
3615     s->current_picture.f->pict_type = s->pict_type;
3616
3617     if (s->current_picture.f->key_frame)
3618         s->picture_in_gop_number=0;
3619
3620     s->mb_x = s->mb_y = 0;
3621     s->last_bits= put_bits_count(&s->pb);
3622     switch(s->out_format) {
3623     case FMT_MJPEG:
3624         if (CONFIG_MJPEG_ENCODER)
3625             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3626                                            s->intra_matrix, s->chroma_intra_matrix);
3627         break;
3628     case FMT_H261:
3629         if (CONFIG_H261_ENCODER)
3630             ff_h261_encode_picture_header(s, picture_number);
3631         break;
3632     case FMT_H263:
3633         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3634             ff_wmv2_encode_picture_header(s, picture_number);
3635         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3636             ff_msmpeg4_encode_picture_header(s, picture_number);
3637         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3638             ff_mpeg4_encode_picture_header(s, picture_number);
3639         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3640             ff_rv10_encode_picture_header(s, picture_number);
3641         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3642             ff_rv20_encode_picture_header(s, picture_number);
3643         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3644             ff_flv_encode_picture_header(s, picture_number);
3645         else if (CONFIG_H263_ENCODER)
3646             ff_h263_encode_picture_header(s, picture_number);
3647         break;
3648     case FMT_MPEG1:
3649         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3650             ff_mpeg1_encode_picture_header(s, picture_number);
3651         break;
3652     default:
3653         av_assert0(0);
3654     }
3655     bits= put_bits_count(&s->pb);
3656     s->header_bits= bits - s->last_bits;
3657
3658     for(i=1; i<context_count; i++){
3659         update_duplicate_context_after_me(s->thread_context[i], s);
3660     }
3661     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3662     for(i=1; i<context_count; i++){
3663         merge_context_after_encode(s, s->thread_context[i]);
3664     }
3665     emms_c();
3666     return 0;
3667 }
3668
3669 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3670     const int intra= s->mb_intra;
3671     int i;
3672
3673     s->dct_count[intra]++;
3674
3675     for(i=0; i<64; i++){
3676         int level= block[i];
3677
3678         if(level){
3679             if(level>0){
3680                 s->dct_error_sum[intra][i] += level;
3681                 level -= s->dct_offset[intra][i];
3682                 if(level<0) level=0;
3683             }else{
3684                 s->dct_error_sum[intra][i] -= level;
3685                 level += s->dct_offset[intra][i];
3686                 if(level>0) level=0;
3687             }
3688             block[i]= level;
3689         }
3690     }
3691 }
3692
3693 static int dct_quantize_trellis_c(MpegEncContext *s,
3694                                   int16_t *block, int n,
3695                                   int qscale, int *overflow){
3696     const int *qmat;
3697     const uint8_t *scantable= s->intra_scantable.scantable;
3698     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3699     int max=0;
3700     unsigned int threshold1, threshold2;
3701     int bias=0;
3702     int run_tab[65];
3703     int level_tab[65];
3704     int score_tab[65];
3705     int survivor[65];
3706     int survivor_count;
3707     int last_run=0;
3708     int last_level=0;
3709     int last_score= 0;
3710     int last_i;
3711     int coeff[2][64];
3712     int coeff_count[64];
3713     int qmul, qadd, start_i, last_non_zero, i, dc;
3714     const int esc_length= s->ac_esc_length;
3715     uint8_t * length;
3716     uint8_t * last_length;
3717     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3718
3719     s->fdsp.fdct(block);
3720
3721     if(s->dct_error_sum)
3722         s->denoise_dct(s, block);
3723     qmul= qscale*16;
3724     qadd= ((qscale-1)|1)*8;
3725
3726     if (s->mb_intra) {
3727         int q;
3728         if (!s->h263_aic) {
3729             if (n < 4)
3730                 q = s->y_dc_scale;
3731             else
3732                 q = s->c_dc_scale;
3733             q = q << 3;
3734         } else{
3735             /* For AIC we skip quant/dequant of INTRADC */
3736             q = 1 << 3;
3737             qadd=0;
3738         }
3739
3740         /* note: block[0] is assumed to be positive */
3741         block[0] = (block[0] + (q >> 1)) / q;
3742         start_i = 1;
3743         last_non_zero = 0;
3744         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3745         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3746             bias= 1<<(QMAT_SHIFT-1);
3747         length     = s->intra_ac_vlc_length;
3748         last_length= s->intra_ac_vlc_last_length;
3749     } else {
3750         start_i = 0;
3751         last_non_zero = -1;
3752         qmat = s->q_inter_matrix[qscale];
3753         length     = s->inter_ac_vlc_length;
3754         last_length= s->inter_ac_vlc_last_length;
3755     }
3756     last_i= start_i;
3757
3758     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3759     threshold2= (threshold1<<1);
3760
3761     for(i=63; i>=start_i; i--) {
3762         const int j = scantable[i];
3763         int level = block[j] * qmat[j];
3764
3765         if(((unsigned)(level+threshold1))>threshold2){
3766             last_non_zero = i;
3767             break;
3768         }
3769     }
3770
3771     for(i=start_i; i<=last_non_zero; i++) {
3772         const int j = scantable[i];
3773         int level = block[j] * qmat[j];
3774
3775 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3776 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3777         if(((unsigned)(level+threshold1))>threshold2){
3778             if(level>0){
3779                 level= (bias + level)>>QMAT_SHIFT;
3780                 coeff[0][i]= level;
3781                 coeff[1][i]= level-1;
3782 //                coeff[2][k]= level-2;
3783             }else{
3784                 level= (bias - level)>>QMAT_SHIFT;
3785                 coeff[0][i]= -level;
3786                 coeff[1][i]= -level+1;
3787 //                coeff[2][k]= -level+2;
3788             }
3789             coeff_count[i]= FFMIN(level, 2);
3790             av_assert2(coeff_count[i]);
3791             max |=level;
3792         }else{
3793             coeff[0][i]= (level>>31)|1;
3794             coeff_count[i]= 1;
3795         }
3796     }
3797
3798     *overflow= s->max_qcoeff < max; //overflow might have happened
3799
3800     if(last_non_zero < start_i){
3801         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3802         return last_non_zero;
3803     }
3804
3805     score_tab[start_i]= 0;
3806     survivor[0]= start_i;
3807     survivor_count= 1;
3808
3809     for(i=start_i; i<=last_non_zero; i++){
3810         int level_index, j, zero_distortion;
3811         int dct_coeff= FFABS(block[ scantable[i] ]);
3812         int best_score=256*256*256*120;
3813
3814         if (s->fdsp.fdct == ff_fdct_ifast)
3815             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3816         zero_distortion= dct_coeff*dct_coeff;
3817
3818         for(level_index=0; level_index < coeff_count[i]; level_index++){
3819             int distortion;
3820             int level= coeff[level_index][i];
3821             const int alevel= FFABS(level);
3822             int unquant_coeff;
3823
3824             av_assert2(level);
3825
3826             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3827                 unquant_coeff= alevel*qmul + qadd;
3828             }else{ //MPEG1
3829                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3830                 if(s->mb_intra){
3831                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3832                         unquant_coeff =   (unquant_coeff - 1) | 1;
3833                 }else{
3834                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3835                         unquant_coeff =   (unquant_coeff - 1) | 1;
3836                 }
3837                 unquant_coeff<<= 3;
3838             }
3839
3840             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3841             level+=64;
3842             if((level&(~127)) == 0){
3843                 for(j=survivor_count-1; j>=0; j--){
3844                     int run= i - survivor[j];
3845                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3846                     score += score_tab[i-run];
3847
3848                     if(score < best_score){
3849                         best_score= score;
3850                         run_tab[i+1]= run;
3851                         level_tab[i+1]= level-64;
3852                     }
3853                 }
3854
3855                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3856                     for(j=survivor_count-1; j>=0; j--){
3857                         int run= i - survivor[j];
3858                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3859                         score += score_tab[i-run];
3860                         if(score < last_score){
3861                             last_score= score;
3862                             last_run= run;
3863                             last_level= level-64;
3864                             last_i= i+1;
3865                         }
3866                     }
3867                 }
3868             }else{
3869                 distortion += esc_length*lambda;
3870                 for(j=survivor_count-1; j>=0; j--){
3871                     int run= i - survivor[j];
3872                     int score= distortion + score_tab[i-run];
3873
3874                     if(score < best_score){
3875                         best_score= score;
3876                         run_tab[i+1]= run;
3877                         level_tab[i+1]= level-64;
3878                     }
3879                 }
3880
3881                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3882                   for(j=survivor_count-1; j>=0; j--){
3883                         int run= i - survivor[j];
3884                         int score= distortion + score_tab[i-run];
3885                         if(score < last_score){
3886                             last_score= score;
3887                             last_run= run;
3888                             last_level= level-64;
3889                             last_i= i+1;
3890                         }
3891                     }
3892                 }
3893             }
3894         }
3895
3896         score_tab[i+1]= best_score;
3897
3898         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3899         if(last_non_zero <= 27){
3900             for(; survivor_count; survivor_count--){
3901                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3902                     break;
3903             }
3904         }else{
3905             for(; survivor_count; survivor_count--){
3906                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3907                     break;
3908             }
3909         }
3910
3911         survivor[ survivor_count++ ]= i+1;
3912     }
3913
3914     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3915         last_score= 256*256*256*120;
3916         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3917             int score= score_tab[i];
3918             if(i) score += lambda*2; //FIXME exacter?
3919
3920             if(score < last_score){
3921                 last_score= score;
3922                 last_i= i;
3923                 last_level= level_tab[i];
3924                 last_run= run_tab[i];
3925             }
3926         }
3927     }
3928
3929     s->coded_score[n] = last_score;
3930
3931     dc= FFABS(block[0]);
3932     last_non_zero= last_i - 1;
3933     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3934
3935     if(last_non_zero < start_i)
3936         return last_non_zero;
3937
3938     if(last_non_zero == 0 && start_i == 0){
3939         int best_level= 0;
3940         int best_score= dc * dc;
3941
3942         for(i=0; i<coeff_count[0]; i++){
3943             int level= coeff[i][0];
3944             int alevel= FFABS(level);
3945             int unquant_coeff, score, distortion;
3946
3947             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3948                     unquant_coeff= (alevel*qmul + qadd)>>3;
3949             }else{ //MPEG1
3950                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3951                     unquant_coeff =   (unquant_coeff - 1) | 1;
3952             }
3953             unquant_coeff = (unquant_coeff + 4) >> 3;
3954             unquant_coeff<<= 3 + 3;
3955
3956             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3957             level+=64;
3958             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3959             else                    score= distortion + esc_length*lambda;
3960
3961             if(score < best_score){
3962                 best_score= score;
3963                 best_level= level - 64;
3964             }
3965         }
3966         block[0]= best_level;
3967         s->coded_score[n] = best_score - dc*dc;
3968         if(best_level == 0) return -1;
3969         else                return last_non_zero;
3970     }
3971
3972     i= last_i;
3973     av_assert2(last_level);
3974
3975     block[ perm_scantable[last_non_zero] ]= last_level;
3976     i -= last_run + 1;
3977
3978     for(; i>start_i; i -= run_tab[i] + 1){
3979         block[ perm_scantable[i-1] ]= level_tab[i];
3980     }
3981
3982     return last_non_zero;
3983 }
3984
3985 //#define REFINE_STATS 1
3986 static int16_t basis[64][64];
3987
3988 static void build_basis(uint8_t *perm){
3989     int i, j, x, y;
3990     emms_c();
3991     for(i=0; i<8; i++){
3992         for(j=0; j<8; j++){
3993             for(y=0; y<8; y++){
3994                 for(x=0; x<8; x++){
3995                     double s= 0.25*(1<<BASIS_SHIFT);
3996                     int index= 8*i + j;
3997                     int perm_index= perm[index];
3998                     if(i==0) s*= sqrt(0.5);
3999                     if(j==0) s*= sqrt(0.5);
4000                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4001                 }
4002             }
4003         }
4004     }
4005 }
4006
4007 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4008                         int16_t *block, int16_t *weight, int16_t *orig,
4009                         int n, int qscale){
4010     int16_t rem[64];
4011     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4012     const uint8_t *scantable= s->intra_scantable.scantable;
4013     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4014 //    unsigned int threshold1, threshold2;
4015 //    int bias=0;
4016     int run_tab[65];
4017     int prev_run=0;
4018     int prev_level=0;
4019     int qmul, qadd, start_i, last_non_zero, i, dc;
4020     uint8_t * length;
4021     uint8_t * last_length;
4022     int lambda;
4023     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4024 #ifdef REFINE_STATS
4025 static int count=0;
4026 static int after_last=0;
4027 static int to_zero=0;
4028 static int from_zero=0;
4029 static int raise=0;
4030 static int lower=0;
4031 static int messed_sign=0;
4032 #endif
4033
4034     if(basis[0][0] == 0)
4035         build_basis(s->idsp.idct_permutation);
4036
4037     qmul= qscale*2;
4038     qadd= (qscale-1)|1;
4039     if (s->mb_intra) {
4040         if (!s->h263_aic) {
4041             if (n < 4)
4042                 q = s->y_dc_scale;
4043             else
4044                 q = s->c_dc_scale;
4045         } else{
4046             /* For AIC we skip quant/dequant of INTRADC */
4047             q = 1;
4048             qadd=0;
4049         }
4050         q <<= RECON_SHIFT-3;
4051         /* note: block[0] is assumed to be positive */
4052         dc= block[0]*q;
4053 //        block[0] = (block[0] + (q >> 1)) / q;
4054         start_i = 1;
4055 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4056 //            bias= 1<<(QMAT_SHIFT-1);
4057         length     = s->intra_ac_vlc_length;
4058         last_length= s->intra_ac_vlc_last_length;
4059     } else {
4060         dc= 0;
4061         start_i = 0;
4062         length     = s->inter_ac_vlc_length;
4063         last_length= s->inter_ac_vlc_last_length;
4064     }
4065     last_non_zero = s->block_last_index[n];
4066
4067 #ifdef REFINE_STATS
4068 {START_TIMER
4069 #endif
4070     dc += (1<<(RECON_SHIFT-1));
4071     for(i=0; i<64; i++){
4072         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4073     }
4074 #ifdef REFINE_STATS
4075 STOP_TIMER("memset rem[]")}
4076 #endif
4077     sum=0;
4078     for(i=0; i<64; i++){
4079         int one= 36;
4080         int qns=4;
4081         int w;
4082
4083         w= FFABS(weight[i]) + qns*one;
4084         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4085
4086         weight[i] = w;
4087 //        w=weight[i] = (63*qns + (w/2)) / w;
4088
4089         av_assert2(w>0);
4090         av_assert2(w<(1<<6));
4091         sum += w*w;
4092     }
4093     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4094 #ifdef REFINE_STATS
4095 {START_TIMER
4096 #endif
4097     run=0;
4098     rle_index=0;
4099     for(i=start_i; i<=last_non_zero; i++){
4100         int j= perm_scantable[i];
4101         const int level= block[j];
4102         int coeff;
4103
4104         if(level){
4105             if(level<0) coeff= qmul*level - qadd;
4106             else        coeff= qmul*level + qadd;
4107             run_tab[rle_index++]=run;
4108             run=0;
4109
4110             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4111         }else{
4112             run++;
4113         }
4114     }
4115 #ifdef REFINE_STATS
4116 if(last_non_zero>0){
4117 STOP_TIMER("init rem[]")
4118 }
4119 }
4120
4121 {START_TIMER
4122 #endif
4123     for(;;){
4124         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4125         int best_coeff=0;
4126         int best_change=0;
4127         int run2, best_unquant_change=0, analyze_gradient;
4128 #ifdef REFINE_STATS
4129 {START_TIMER
4130 #endif
4131         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4132
4133         if(analyze_gradient){
4134 #ifdef REFINE_STATS
4135 {START_TIMER
4136 #endif
4137             for(i=0; i<64; i++){
4138                 int w= weight[i];
4139
4140                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4141             }
4142 #ifdef REFINE_STATS
4143 STOP_TIMER("rem*w*w")}
4144 {START_TIMER
4145 #endif
4146             s->fdsp.fdct(d1);
4147 #ifdef REFINE_STATS
4148 STOP_TIMER("dct")}
4149 #endif
4150         }
4151
4152         if(start_i){
4153             const int level= block[0];
4154             int change, old_coeff;
4155
4156             av_assert2(s->mb_intra);
4157
4158             old_coeff= q*level;
4159
4160             for(change=-1; change<=1; change+=2){
4161                 int new_level= level + change;
4162                 int score, new_coeff;
4163
4164                 new_coeff= q*new_level;
4165                 if(new_coeff >= 2048 || new_coeff < 0)
4166                     continue;
4167
4168                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4169                                                   new_coeff - old_coeff);
4170                 if(score<best_score){
4171                     best_score= score;
4172                     best_coeff= 0;
4173                     best_change= change;
4174                     best_unquant_change= new_coeff - old_coeff;
4175                 }
4176             }
4177         }
4178
4179         run=0;
4180         rle_index=0;
4181         run2= run_tab[rle_index++];
4182         prev_level=0;
4183         prev_run=0;
4184
4185         for(i=start_i; i<64; i++){
4186             int j= perm_scantable[i];
4187             const int level= block[j];
4188             int change, old_coeff;
4189
4190             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4191                 break;
4192
4193             if(level){
4194                 if(level<0) old_coeff= qmul*level - qadd;
4195                 else        old_coeff= qmul*level + qadd;
4196                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4197             }else{
4198                 old_coeff=0;
4199                 run2--;
4200                 av_assert2(run2>=0 || i >= last_non_zero );
4201             }
4202
4203             for(change=-1; change<=1; change+=2){
4204                 int new_level= level + change;
4205                 int score, new_coeff, unquant_change;
4206
4207                 score=0;
4208                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4209                    continue;
4210
4211                 if(new_level){
4212                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4213                     else            new_coeff= qmul*new_level + qadd;
4214                     if(new_coeff >= 2048 || new_coeff <= -2048)
4215                         continue;
4216                     //FIXME check for overflow
4217
4218                     if(level){
4219                         if(level < 63 && level > -63){
4220                             if(i < last_non_zero)
4221                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4222                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4223                             else
4224                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4225                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4226                         }
4227                     }else{
4228                         av_assert2(FFABS(new_level)==1);
4229
4230                         if(analyze_gradient){
4231                             int g= d1[ scantable[i] ];
4232                             if(g && (g^new_level) >= 0)
4233                                 continue;
4234                         }
4235
4236                         if(i < last_non_zero){
4237                             int next_i= i + run2 + 1;
4238                             int next_level= block[ perm_scantable[next_i] ] + 64;
4239
4240                             if(next_level&(~127))
4241                                 next_level= 0;
4242
4243                             if(next_i < last_non_zero)
4244                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4245                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4246                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4247                             else
4248                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4249                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4250                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4251                         }else{
4252                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4253                             if(prev_level){
4254                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4255                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4256                             }
4257                         }
4258                     }
4259                 }else{
4260                     new_coeff=0;
4261                     av_assert2(FFABS(level)==1);
4262
4263                     if(i < last_non_zero){
4264                         int next_i= i + run2 + 1;
4265                         int next_level= block[ perm_scantable[next_i] ] + 64;
4266
4267                         if(next_level&(~127))
4268                             next_level= 0;
4269
4270                         if(next_i < last_non_zero)
4271                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4272                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4273                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4274                         else
4275                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4276                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4277                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4278                     }else{
4279                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4280                         if(prev_level){
4281                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4282                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4283                         }
4284                     }
4285                 }
4286
4287                 score *= lambda;
4288
4289                 unquant_change= new_coeff - old_coeff;
4290                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4291
4292                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4293                                                    unquant_change);
4294                 if(score<best_score){
4295                     best_score= score;
4296                     best_coeff= i;
4297                     best_change= change;
4298                     best_unquant_change= unquant_change;
4299                 }
4300             }
4301             if(level){
4302                 prev_level= level + 64;
4303                 if(prev_level&(~127))
4304                     prev_level= 0;
4305                 prev_run= run;
4306                 run=0;
4307             }else{
4308                 run++;
4309             }
4310         }
4311 #ifdef REFINE_STATS
4312 STOP_TIMER("iterative step")}
4313 #endif
4314
4315         if(best_change){
4316             int j= perm_scantable[ best_coeff ];
4317
4318             block[j] += best_change;
4319
4320             if(best_coeff > last_non_zero){
4321                 last_non_zero= best_coeff;
4322                 av_assert2(block[j]);
4323 #ifdef REFINE_STATS
4324 after_last++;
4325 #endif
4326             }else{
4327 #ifdef REFINE_STATS
4328 if(block[j]){
4329     if(block[j] - best_change){
4330         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4331             raise++;
4332         }else{
4333             lower++;
4334         }
4335     }else{
4336         from_zero++;
4337     }
4338 }else{
4339     to_zero++;
4340 }
4341 #endif
4342                 for(; last_non_zero>=start_i; last_non_zero--){
4343                     if(block[perm_scantable[last_non_zero]])
4344                         break;
4345                 }
4346             }
4347 #ifdef REFINE_STATS
4348 count++;
4349 if(256*256*256*64 % count == 0){
4350     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4351 }
4352 #endif
4353             run=0;
4354             rle_index=0;
4355             for(i=start_i; i<=last_non_zero; i++){
4356                 int j= perm_scantable[i];
4357                 const int level= block[j];
4358
4359                  if(level){
4360                      run_tab[rle_index++]=run;
4361                      run=0;
4362                  }else{
4363                      run++;
4364                  }
4365             }
4366
4367             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4368         }else{
4369             break;
4370         }
4371     }
4372 #ifdef REFINE_STATS
4373 if(last_non_zero>0){
4374 STOP_TIMER("iterative search")
4375 }
4376 }
4377 #endif
4378
4379     return last_non_zero;
4380 }
4381
4382 int ff_dct_quantize_c(MpegEncContext *s,
4383                         int16_t *block, int n,
4384                         int qscale, int *overflow)
4385 {
4386     int i, j, level, last_non_zero, q, start_i;
4387     const int *qmat;
4388     const uint8_t *scantable= s->intra_scantable.scantable;
4389     int bias;
4390     int max=0;
4391     unsigned int threshold1, threshold2;
4392
4393     s->fdsp.fdct(block);
4394
4395     if(s->dct_error_sum)
4396         s->denoise_dct(s, block);
4397
4398     if (s->mb_intra) {
4399         if (!s->h263_aic) {
4400             if (n < 4)
4401                 q = s->y_dc_scale;
4402             else
4403                 q = s->c_dc_scale;
4404             q = q << 3;
4405         } else
4406             /* For AIC we skip quant/dequant of INTRADC */
4407             q = 1 << 3;
4408
4409         /* note: block[0] is assumed to be positive */
4410         block[0] = (block[0] + (q >> 1)) / q;
4411         start_i = 1;
4412         last_non_zero = 0;
4413         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4414         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4415     } else {
4416         start_i = 0;
4417         last_non_zero = -1;
4418         qmat = s->q_inter_matrix[qscale];
4419         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4420     }
4421     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4422     threshold2= (threshold1<<1);
4423     for(i=63;i>=start_i;i--) {
4424         j = scantable[i];
4425         level = block[j] * qmat[j];
4426
4427         if(((unsigned)(level+threshold1))>threshold2){
4428             last_non_zero = i;
4429             break;
4430         }else{
4431             block[j]=0;
4432         }
4433     }
4434     for(i=start_i; i<=last_non_zero; i++) {
4435         j = scantable[i];
4436         level = block[j] * qmat[j];
4437
4438 //        if(   bias+level >= (1<<QMAT_SHIFT)
4439 //           || bias-level >= (1<<QMAT_SHIFT)){
4440         if(((unsigned)(level+threshold1))>threshold2){
4441             if(level>0){
4442                 level= (bias + level)>>QMAT_SHIFT;
4443                 block[j]= level;
4444             }else{
4445                 level= (bias - level)>>QMAT_SHIFT;
4446                 block[j]= -level;
4447             }
4448             max |=level;
4449         }else{
4450             block[j]=0;
4451         }
4452     }
4453     *overflow= s->max_qcoeff < max; //overflow might have happened
4454
4455     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4456     if (s->idsp.idct_permutation_type != FF_NO_IDCT_PERM)
4457         ff_block_permute(block, s->idsp.idct_permutation,
4458                          scantable, last_non_zero);
4459
4460     return last_non_zero;
4461 }
4462
4463 #define OFFSET(x) offsetof(MpegEncContext, x)
4464 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4465 static const AVOption h263_options[] = {
4466     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4467     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4468     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4469     FF_MPV_COMMON_OPTS
4470     { NULL },
4471 };
4472
4473 static const AVClass h263_class = {
4474     .class_name = "H.263 encoder",
4475     .item_name  = av_default_item_name,
4476     .option     = h263_options,
4477     .version    = LIBAVUTIL_VERSION_INT,
4478 };
4479
4480 AVCodec ff_h263_encoder = {
4481     .name           = "h263",
4482     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4483     .type           = AVMEDIA_TYPE_VIDEO,
4484     .id             = AV_CODEC_ID_H263,
4485     .priv_data_size = sizeof(MpegEncContext),
4486     .init           = ff_MPV_encode_init,
4487     .encode2        = ff_MPV_encode_picture,
4488     .close          = ff_MPV_encode_end,
4489     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4490     .priv_class     = &h263_class,
4491 };
4492
4493 static const AVOption h263p_options[] = {
4494     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4495     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4496     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4497     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4498     FF_MPV_COMMON_OPTS
4499     { NULL },
4500 };
4501 static const AVClass h263p_class = {
4502     .class_name = "H.263p encoder",
4503     .item_name  = av_default_item_name,
4504     .option     = h263p_options,
4505     .version    = LIBAVUTIL_VERSION_INT,
4506 };
4507
4508 AVCodec ff_h263p_encoder = {
4509     .name           = "h263p",
4510     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4511     .type           = AVMEDIA_TYPE_VIDEO,
4512     .id             = AV_CODEC_ID_H263P,
4513     .priv_data_size = sizeof(MpegEncContext),
4514     .init           = ff_MPV_encode_init,
4515     .encode2        = ff_MPV_encode_picture,
4516     .close          = ff_MPV_encode_end,
4517     .capabilities   = CODEC_CAP_SLICE_THREADS,
4518     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4519     .priv_class     = &h263p_class,
4520 };
4521
4522 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4523
4524 AVCodec ff_msmpeg4v2_encoder = {
4525     .name           = "msmpeg4v2",
4526     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4527     .type           = AVMEDIA_TYPE_VIDEO,
4528     .id             = AV_CODEC_ID_MSMPEG4V2,
4529     .priv_data_size = sizeof(MpegEncContext),
4530     .init           = ff_MPV_encode_init,
4531     .encode2        = ff_MPV_encode_picture,
4532     .close          = ff_MPV_encode_end,
4533     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4534     .priv_class     = &msmpeg4v2_class,
4535 };
4536
4537 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4538
4539 AVCodec ff_msmpeg4v3_encoder = {
4540     .name           = "msmpeg4",
4541     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4542     .type           = AVMEDIA_TYPE_VIDEO,
4543     .id             = AV_CODEC_ID_MSMPEG4V3,
4544     .priv_data_size = sizeof(MpegEncContext),
4545     .init           = ff_MPV_encode_init,
4546     .encode2        = ff_MPV_encode_picture,
4547     .close          = ff_MPV_encode_end,
4548     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4549     .priv_class     = &msmpeg4v3_class,
4550 };
4551
4552 FF_MPV_GENERIC_CLASS(wmv1)
4553
4554 AVCodec ff_wmv1_encoder = {
4555     .name           = "wmv1",
4556     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4557     .type           = AVMEDIA_TYPE_VIDEO,
4558     .id             = AV_CODEC_ID_WMV1,
4559     .priv_data_size = sizeof(MpegEncContext),
4560     .init           = ff_MPV_encode_init,
4561     .encode2        = ff_MPV_encode_picture,
4562     .close          = ff_MPV_encode_end,
4563     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4564     .priv_class     = &wmv1_class,
4565 };