]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge commit '436ced244fadcde2c0b925627920e84b25482542'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mjpegenc_common.h"
46 #include "mathops.h"
47 #include "mpegutils.h"
48 #include "mjpegenc.h"
49 #include "msmpeg4.h"
50 #include "pixblockdsp.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60 #include "sp5x.h"
61
62 #define QUANT_BIAS_SHIFT 8
63
64 static int encode_picture(MpegEncContext *s, int picture_number);
65 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
66 static int sse_mb(MpegEncContext *s);
67 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
68 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
69
70 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
71 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
72
73 const AVOption ff_mpv_generic_options[] = {
74     FF_MPV_COMMON_OPTS
75     { NULL },
76 };
77
78 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
79                        uint16_t (*qmat16)[2][64],
80                        const uint16_t *quant_matrix,
81                        int bias, int qmin, int qmax, int intra)
82 {
83     FDCTDSPContext *fdsp = &s->fdsp;
84     int qscale;
85     int shift = 0;
86
87     for (qscale = qmin; qscale <= qmax; qscale++) {
88         int i;
89         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
90             fdsp->fdct == ff_jpeg_fdct_islow_10 ||
91             fdsp->fdct == ff_faandct) {
92             for (i = 0; i < 64; i++) {
93                 const int j = s->idsp.idct_permutation[i];
94                 /* 16 <= qscale * quant_matrix[i] <= 7905
95                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
96                  *             19952 <=              x  <= 249205026
97                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
98                  *           3444240 >= (1 << 36) / (x) >= 275 */
99
100                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
101                                         (qscale * quant_matrix[j]));
102             }
103         } else if (fdsp->fdct == ff_fdct_ifast) {
104             for (i = 0; i < 64; i++) {
105                 const int j = s->idsp.idct_permutation[i];
106                 /* 16 <= qscale * quant_matrix[i] <= 7905
107                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
108                  *             19952 <=              x  <= 249205026
109                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
110                  *           3444240 >= (1 << 36) / (x) >= 275 */
111
112                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
113                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
114             }
115         } else {
116             for (i = 0; i < 64; i++) {
117                 const int j = s->idsp.idct_permutation[i];
118                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
119                  * Assume x = qscale * quant_matrix[i]
120                  * So             16 <=              x  <= 7905
121                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
122                  * so          32768 >= (1 << 19) / (x) >= 67 */
123                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
124                                         (qscale * quant_matrix[j]));
125                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
126                 //                    (qscale * quant_matrix[i]);
127                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
128                                        (qscale * quant_matrix[j]);
129
130                 if (qmat16[qscale][0][i] == 0 ||
131                     qmat16[qscale][0][i] == 128 * 256)
132                     qmat16[qscale][0][i] = 128 * 256 - 1;
133                 qmat16[qscale][1][i] =
134                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
135                                 qmat16[qscale][0][i]);
136             }
137         }
138
139         for (i = intra; i < 64; i++) {
140             int64_t max = 8191;
141             if (fdsp->fdct == ff_fdct_ifast) {
142                 max = (8191LL * ff_aanscales[i]) >> 14;
143             }
144             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
145                 shift++;
146             }
147         }
148     }
149     if (shift) {
150         av_log(NULL, AV_LOG_INFO,
151                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
152                QMAT_SHIFT - shift);
153     }
154 }
155
156 static inline void update_qscale(MpegEncContext *s)
157 {
158     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
159                 (FF_LAMBDA_SHIFT + 7);
160     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
161
162     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
163                  FF_LAMBDA_SHIFT;
164 }
165
166 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
167 {
168     int i;
169
170     if (matrix) {
171         put_bits(pb, 1, 1);
172         for (i = 0; i < 64; i++) {
173             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
174         }
175     } else
176         put_bits(pb, 1, 0);
177 }
178
179 /**
180  * init s->current_picture.qscale_table from s->lambda_table
181  */
182 void ff_init_qscale_tab(MpegEncContext *s)
183 {
184     int8_t * const qscale_table = s->current_picture.qscale_table;
185     int i;
186
187     for (i = 0; i < s->mb_num; i++) {
188         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
189         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
190         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
191                                                   s->avctx->qmax);
192     }
193 }
194
195 static void update_duplicate_context_after_me(MpegEncContext *dst,
196                                               MpegEncContext *src)
197 {
198 #define COPY(a) dst->a= src->a
199     COPY(pict_type);
200     COPY(current_picture);
201     COPY(f_code);
202     COPY(b_code);
203     COPY(qscale);
204     COPY(lambda);
205     COPY(lambda2);
206     COPY(picture_in_gop_number);
207     COPY(gop_picture_number);
208     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
209     COPY(progressive_frame);    // FIXME don't set in encode_header
210     COPY(partitioned_frame);    // FIXME don't set in encode_header
211 #undef COPY
212 }
213
214 /**
215  * Set the given MpegEncContext to defaults for encoding.
216  * the changed fields will not depend upon the prior state of the MpegEncContext.
217  */
218 static void MPV_encode_defaults(MpegEncContext *s)
219 {
220     int i;
221     ff_MPV_common_defaults(s);
222
223     for (i = -16; i < 16; i++) {
224         default_fcode_tab[i + MAX_MV] = 1;
225     }
226     s->me.mv_penalty = default_mv_penalty;
227     s->fcode_tab     = default_fcode_tab;
228
229     s->input_picture_number  = 0;
230     s->picture_in_gop_number = 0;
231 }
232
233 av_cold int ff_dct_encode_init(MpegEncContext *s) {
234     if (ARCH_X86)
235         ff_dct_encode_init_x86(s);
236
237     if (CONFIG_H263_ENCODER)
238         ff_h263dsp_init(&s->h263dsp);
239     if (!s->dct_quantize)
240         s->dct_quantize = ff_dct_quantize_c;
241     if (!s->denoise_dct)
242         s->denoise_dct  = denoise_dct_c;
243     s->fast_dct_quantize = s->dct_quantize;
244     if (s->avctx->trellis)
245         s->dct_quantize  = dct_quantize_trellis_c;
246
247     return 0;
248 }
249
250 /* init video encoder */
251 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
252 {
253     MpegEncContext *s = avctx->priv_data;
254     int i, ret, format_supported;
255
256     MPV_encode_defaults(s);
257
258     switch (avctx->codec_id) {
259     case AV_CODEC_ID_MPEG2VIDEO:
260         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
261             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
262             av_log(avctx, AV_LOG_ERROR,
263                    "only YUV420 and YUV422 are supported\n");
264             return -1;
265         }
266         break;
267     case AV_CODEC_ID_MJPEG:
268     case AV_CODEC_ID_AMV:
269         format_supported = 0;
270         /* JPEG color space */
271         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
272             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
273             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
274             (avctx->color_range == AVCOL_RANGE_JPEG &&
275              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
276               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
277               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
278             format_supported = 1;
279         /* MPEG color space */
280         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
281                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
282                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
283                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
284             format_supported = 1;
285
286         if (!format_supported) {
287             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
288             return -1;
289         }
290         break;
291     default:
292         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
293             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
294             return -1;
295         }
296     }
297
298     switch (avctx->pix_fmt) {
299     case AV_PIX_FMT_YUVJ444P:
300     case AV_PIX_FMT_YUV444P:
301         s->chroma_format = CHROMA_444;
302         break;
303     case AV_PIX_FMT_YUVJ422P:
304     case AV_PIX_FMT_YUV422P:
305         s->chroma_format = CHROMA_422;
306         break;
307     case AV_PIX_FMT_YUVJ420P:
308     case AV_PIX_FMT_YUV420P:
309     default:
310         s->chroma_format = CHROMA_420;
311         break;
312     }
313
314     s->bit_rate = avctx->bit_rate;
315     s->width    = avctx->width;
316     s->height   = avctx->height;
317     if (avctx->gop_size > 600 &&
318         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
319         av_log(avctx, AV_LOG_WARNING,
320                "keyframe interval too large!, reducing it from %d to %d\n",
321                avctx->gop_size, 600);
322         avctx->gop_size = 600;
323     }
324     s->gop_size     = avctx->gop_size;
325     s->avctx        = avctx;
326     s->flags        = avctx->flags;
327     s->flags2       = avctx->flags2;
328     if (avctx->max_b_frames > MAX_B_FRAMES) {
329         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
330                "is %d.\n", MAX_B_FRAMES);
331         avctx->max_b_frames = MAX_B_FRAMES;
332     }
333     s->max_b_frames = avctx->max_b_frames;
334     s->codec_id     = avctx->codec->id;
335     s->strict_std_compliance = avctx->strict_std_compliance;
336     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
337     s->mpeg_quant         = avctx->mpeg_quant;
338     s->rtp_mode           = !!avctx->rtp_payload_size;
339     s->intra_dc_precision = avctx->intra_dc_precision;
340
341     // workaround some differences between how applications specify dc precission
342     if (s->intra_dc_precision < 0) {
343         s->intra_dc_precision += 8;
344     } else if (s->intra_dc_precision >= 8)
345         s->intra_dc_precision -= 8;
346
347     if (s->intra_dc_precision < 0) {
348         av_log(avctx, AV_LOG_ERROR,
349                 "intra dc precision must be positive, note some applications use"
350                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
351         return AVERROR(EINVAL);
352     }
353
354     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
355         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
356         return AVERROR(EINVAL);
357     }
358     s->user_specified_pts = AV_NOPTS_VALUE;
359
360     if (s->gop_size <= 1) {
361         s->intra_only = 1;
362         s->gop_size   = 12;
363     } else {
364         s->intra_only = 0;
365     }
366
367     s->me_method = avctx->me_method;
368
369     /* Fixed QSCALE */
370     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
371
372     s->adaptive_quant = (s->avctx->lumi_masking ||
373                          s->avctx->dark_masking ||
374                          s->avctx->temporal_cplx_masking ||
375                          s->avctx->spatial_cplx_masking  ||
376                          s->avctx->p_masking      ||
377                          s->avctx->border_masking ||
378                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
379                         !s->fixed_qscale;
380
381     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
382
383     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
384         switch(avctx->codec_id) {
385         case AV_CODEC_ID_MPEG1VIDEO:
386         case AV_CODEC_ID_MPEG2VIDEO:
387             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
388             break;
389         case AV_CODEC_ID_MPEG4:
390         case AV_CODEC_ID_MSMPEG4V1:
391         case AV_CODEC_ID_MSMPEG4V2:
392         case AV_CODEC_ID_MSMPEG4V3:
393             if       (avctx->rc_max_rate >= 15000000) {
394                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
395             } else if(avctx->rc_max_rate >=  2000000) {
396                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
397             } else if(avctx->rc_max_rate >=   384000) {
398                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
399             } else
400                 avctx->rc_buffer_size = 40;
401             avctx->rc_buffer_size *= 16384;
402             break;
403         }
404         if (avctx->rc_buffer_size) {
405             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
406         }
407     }
408
409     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
410         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
411         return -1;
412     }
413
414     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
415         av_log(avctx, AV_LOG_INFO,
416                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
417     }
418
419     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
420         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
421         return -1;
422     }
423
424     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
425         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
426         return -1;
427     }
428
429     if (avctx->rc_max_rate &&
430         avctx->rc_max_rate == avctx->bit_rate &&
431         avctx->rc_max_rate != avctx->rc_min_rate) {
432         av_log(avctx, AV_LOG_INFO,
433                "impossible bitrate constraints, this will fail\n");
434     }
435
436     if (avctx->rc_buffer_size &&
437         avctx->bit_rate * (int64_t)avctx->time_base.num >
438             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
439         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
440         return -1;
441     }
442
443     if (!s->fixed_qscale &&
444         avctx->bit_rate * av_q2d(avctx->time_base) >
445             avctx->bit_rate_tolerance) {
446         av_log(avctx, AV_LOG_WARNING,
447                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
448         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
449     }
450
451     if (s->avctx->rc_max_rate &&
452         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
453         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
454          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
455         90000LL * (avctx->rc_buffer_size - 1) >
456             s->avctx->rc_max_rate * 0xFFFFLL) {
457         av_log(avctx, AV_LOG_INFO,
458                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
459                "specified vbv buffer is too large for the given bitrate!\n");
460     }
461
462     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
463         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
464         s->codec_id != AV_CODEC_ID_FLV1) {
465         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
466         return -1;
467     }
468
469     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
470         av_log(avctx, AV_LOG_ERROR,
471                "OBMC is only supported with simple mb decision\n");
472         return -1;
473     }
474
475     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
476         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
477         return -1;
478     }
479
480     if (s->max_b_frames                    &&
481         s->codec_id != AV_CODEC_ID_MPEG4      &&
482         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
483         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
484         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
485         return -1;
486     }
487     if (s->max_b_frames < 0) {
488         av_log(avctx, AV_LOG_ERROR,
489                "max b frames must be 0 or positive for mpegvideo based encoders\n");
490         return -1;
491     }
492
493     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
494          s->codec_id == AV_CODEC_ID_H263  ||
495          s->codec_id == AV_CODEC_ID_H263P) &&
496         (avctx->sample_aspect_ratio.num > 255 ||
497          avctx->sample_aspect_ratio.den > 255)) {
498         av_log(avctx, AV_LOG_WARNING,
499                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
500                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
501         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
502                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
503     }
504
505     if ((s->codec_id == AV_CODEC_ID_H263  ||
506          s->codec_id == AV_CODEC_ID_H263P) &&
507         (avctx->width  > 2048 ||
508          avctx->height > 1152 )) {
509         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
510         return -1;
511     }
512     if ((s->codec_id == AV_CODEC_ID_H263  ||
513          s->codec_id == AV_CODEC_ID_H263P) &&
514         ((avctx->width &3) ||
515          (avctx->height&3) )) {
516         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
517         return -1;
518     }
519
520     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
521         (avctx->width  > 4095 ||
522          avctx->height > 4095 )) {
523         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
524         return -1;
525     }
526
527     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
528         (avctx->width  > 16383 ||
529          avctx->height > 16383 )) {
530         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
531         return -1;
532     }
533
534     if (s->codec_id == AV_CODEC_ID_RV10 &&
535         (avctx->width &15 ||
536          avctx->height&15 )) {
537         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
538         return AVERROR(EINVAL);
539     }
540
541     if (s->codec_id == AV_CODEC_ID_RV20 &&
542         (avctx->width &3 ||
543          avctx->height&3 )) {
544         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
545         return AVERROR(EINVAL);
546     }
547
548     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
549          s->codec_id == AV_CODEC_ID_WMV2) &&
550          avctx->width & 1) {
551          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
552          return -1;
553     }
554
555     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
556         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
557         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
558         return -1;
559     }
560
561     // FIXME mpeg2 uses that too
562     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
563                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
564         av_log(avctx, AV_LOG_ERROR,
565                "mpeg2 style quantization not supported by codec\n");
566         return -1;
567     }
568
569     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
570         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
571         return -1;
572     }
573
574     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
575         s->avctx->mb_decision != FF_MB_DECISION_RD) {
576         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
577         return -1;
578     }
579
580     if (s->avctx->scenechange_threshold < 1000000000 &&
581         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
582         av_log(avctx, AV_LOG_ERROR,
583                "closed gop with scene change detection are not supported yet, "
584                "set threshold to 1000000000\n");
585         return -1;
586     }
587
588     if (s->flags & CODEC_FLAG_LOW_DELAY) {
589         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
590             av_log(avctx, AV_LOG_ERROR,
591                   "low delay forcing is only available for mpeg2\n");
592             return -1;
593         }
594         if (s->max_b_frames != 0) {
595             av_log(avctx, AV_LOG_ERROR,
596                    "b frames cannot be used with low delay\n");
597             return -1;
598         }
599     }
600
601     if (s->q_scale_type == 1) {
602         if (avctx->qmax > 12) {
603             av_log(avctx, AV_LOG_ERROR,
604                    "non linear quant only supports qmax <= 12 currently\n");
605             return -1;
606         }
607     }
608
609     if (s->avctx->thread_count > 1         &&
610         s->codec_id != AV_CODEC_ID_MPEG4      &&
611         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
612         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
613         s->codec_id != AV_CODEC_ID_MJPEG      &&
614         (s->codec_id != AV_CODEC_ID_H263P)) {
615         av_log(avctx, AV_LOG_ERROR,
616                "multi threaded encoding not supported by codec\n");
617         return -1;
618     }
619
620     if (s->avctx->thread_count < 1) {
621         av_log(avctx, AV_LOG_ERROR,
622                "automatic thread number detection not supported by codec, "
623                "patch welcome\n");
624         return -1;
625     }
626
627     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
628         s->rtp_mode = 1;
629
630     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
631         s->h263_slice_structured = 1;
632
633     if (!avctx->time_base.den || !avctx->time_base.num) {
634         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
635         return -1;
636     }
637
638     i = (INT_MAX / 2 + 128) >> 8;
639     if (avctx->mb_threshold >= i) {
640         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
641                i - 1);
642         return -1;
643     }
644
645     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
646         av_log(avctx, AV_LOG_INFO,
647                "notice: b_frame_strategy only affects the first pass\n");
648         avctx->b_frame_strategy = 0;
649     }
650
651     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
652     if (i > 1) {
653         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
654         avctx->time_base.den /= i;
655         avctx->time_base.num /= i;
656         //return -1;
657     }
658
659     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
660         // (a + x * 3 / 8) / x
661         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
662         s->inter_quant_bias = 0;
663     } else {
664         s->intra_quant_bias = 0;
665         // (a - x / 4) / x
666         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
667     }
668
669     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
670         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
671         return AVERROR(EINVAL);
672     }
673
674     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
675         s->intra_quant_bias = avctx->intra_quant_bias;
676     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
677         s->inter_quant_bias = avctx->inter_quant_bias;
678
679     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
680
681     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
682         s->avctx->time_base.den > (1 << 16) - 1) {
683         av_log(avctx, AV_LOG_ERROR,
684                "timebase %d/%d not supported by MPEG 4 standard, "
685                "the maximum admitted value for the timebase denominator "
686                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
687                (1 << 16) - 1);
688         return -1;
689     }
690     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
691
692     switch (avctx->codec->id) {
693     case AV_CODEC_ID_MPEG1VIDEO:
694         s->out_format = FMT_MPEG1;
695         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
696         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
697         break;
698     case AV_CODEC_ID_MPEG2VIDEO:
699         s->out_format = FMT_MPEG1;
700         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
701         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
702         s->rtp_mode   = 1;
703         break;
704     case AV_CODEC_ID_MJPEG:
705     case AV_CODEC_ID_AMV:
706         s->out_format = FMT_MJPEG;
707         s->intra_only = 1; /* force intra only for jpeg */
708         if (!CONFIG_MJPEG_ENCODER ||
709             ff_mjpeg_encode_init(s) < 0)
710             return -1;
711         avctx->delay = 0;
712         s->low_delay = 1;
713         break;
714     case AV_CODEC_ID_H261:
715         if (!CONFIG_H261_ENCODER)
716             return -1;
717         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
718             av_log(avctx, AV_LOG_ERROR,
719                    "The specified picture size of %dx%d is not valid for the "
720                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
721                     s->width, s->height);
722             return -1;
723         }
724         s->out_format = FMT_H261;
725         avctx->delay  = 0;
726         s->low_delay  = 1;
727         break;
728     case AV_CODEC_ID_H263:
729         if (!CONFIG_H263_ENCODER)
730             return -1;
731         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
732                              s->width, s->height) == 8) {
733             av_log(avctx, AV_LOG_ERROR,
734                    "The specified picture size of %dx%d is not valid for "
735                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
736                    "352x288, 704x576, and 1408x1152. "
737                    "Try H.263+.\n", s->width, s->height);
738             return -1;
739         }
740         s->out_format = FMT_H263;
741         avctx->delay  = 0;
742         s->low_delay  = 1;
743         break;
744     case AV_CODEC_ID_H263P:
745         s->out_format = FMT_H263;
746         s->h263_plus  = 1;
747         /* Fx */
748         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
749         s->modified_quant  = s->h263_aic;
750         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
751         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
752
753         /* /Fx */
754         /* These are just to be sure */
755         avctx->delay = 0;
756         s->low_delay = 1;
757         break;
758     case AV_CODEC_ID_FLV1:
759         s->out_format      = FMT_H263;
760         s->h263_flv        = 2; /* format = 1; 11-bit codes */
761         s->unrestricted_mv = 1;
762         s->rtp_mode  = 0; /* don't allow GOB */
763         avctx->delay = 0;
764         s->low_delay = 1;
765         break;
766     case AV_CODEC_ID_RV10:
767         s->out_format = FMT_H263;
768         avctx->delay  = 0;
769         s->low_delay  = 1;
770         break;
771     case AV_CODEC_ID_RV20:
772         s->out_format      = FMT_H263;
773         avctx->delay       = 0;
774         s->low_delay       = 1;
775         s->modified_quant  = 1;
776         s->h263_aic        = 1;
777         s->h263_plus       = 1;
778         s->loop_filter     = 1;
779         s->unrestricted_mv = 0;
780         break;
781     case AV_CODEC_ID_MPEG4:
782         s->out_format      = FMT_H263;
783         s->h263_pred       = 1;
784         s->unrestricted_mv = 1;
785         s->low_delay       = s->max_b_frames ? 0 : 1;
786         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
787         break;
788     case AV_CODEC_ID_MSMPEG4V2:
789         s->out_format      = FMT_H263;
790         s->h263_pred       = 1;
791         s->unrestricted_mv = 1;
792         s->msmpeg4_version = 2;
793         avctx->delay       = 0;
794         s->low_delay       = 1;
795         break;
796     case AV_CODEC_ID_MSMPEG4V3:
797         s->out_format        = FMT_H263;
798         s->h263_pred         = 1;
799         s->unrestricted_mv   = 1;
800         s->msmpeg4_version   = 3;
801         s->flipflop_rounding = 1;
802         avctx->delay         = 0;
803         s->low_delay         = 1;
804         break;
805     case AV_CODEC_ID_WMV1:
806         s->out_format        = FMT_H263;
807         s->h263_pred         = 1;
808         s->unrestricted_mv   = 1;
809         s->msmpeg4_version   = 4;
810         s->flipflop_rounding = 1;
811         avctx->delay         = 0;
812         s->low_delay         = 1;
813         break;
814     case AV_CODEC_ID_WMV2:
815         s->out_format        = FMT_H263;
816         s->h263_pred         = 1;
817         s->unrestricted_mv   = 1;
818         s->msmpeg4_version   = 5;
819         s->flipflop_rounding = 1;
820         avctx->delay         = 0;
821         s->low_delay         = 1;
822         break;
823     default:
824         return -1;
825     }
826
827     avctx->has_b_frames = !s->low_delay;
828
829     s->encoding = 1;
830
831     s->progressive_frame    =
832     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
833                                                 CODEC_FLAG_INTERLACED_ME) ||
834                                 s->alternate_scan);
835
836     /* init */
837     if (ff_MPV_common_init(s) < 0)
838         return -1;
839
840     ff_fdctdsp_init(&s->fdsp, avctx);
841     ff_me_cmp_init(&s->mecc, avctx);
842     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
843     ff_pixblockdsp_init(&s->pdsp, avctx);
844     ff_qpeldsp_init(&s->qdsp);
845
846     s->avctx->coded_frame = s->current_picture.f;
847
848     if (s->msmpeg4_version) {
849         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
850                           2 * 2 * (MAX_LEVEL + 1) *
851                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
852     }
853     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
854
855     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
856     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
857     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
858     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
859     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
860     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
861     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
862                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
863     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
864                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
865
866     if (s->avctx->noise_reduction) {
867         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
868                           2 * 64 * sizeof(uint16_t), fail);
869     }
870
871     ff_dct_encode_init(s);
872
873     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
874         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
875
876     s->quant_precision = 5;
877
878     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
879     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
880
881     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
882         ff_h261_encode_init(s);
883     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
884         ff_h263_encode_init(s);
885     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
886         ff_msmpeg4_encode_init(s);
887     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
888         && s->out_format == FMT_MPEG1)
889         ff_mpeg1_encode_init(s);
890
891     /* init q matrix */
892     for (i = 0; i < 64; i++) {
893         int j = s->idsp.idct_permutation[i];
894         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
895             s->mpeg_quant) {
896             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
897             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
898         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
899             s->intra_matrix[j] =
900             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
901         } else {
902             /* mpeg1/2 */
903             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
904             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
905         }
906         if (s->avctx->intra_matrix)
907             s->intra_matrix[j] = s->avctx->intra_matrix[i];
908         if (s->avctx->inter_matrix)
909             s->inter_matrix[j] = s->avctx->inter_matrix[i];
910     }
911
912     /* precompute matrix */
913     /* for mjpeg, we do include qscale in the matrix */
914     if (s->out_format != FMT_MJPEG) {
915         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
916                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
917                           31, 1);
918         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
919                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
920                           31, 0);
921     }
922
923     if (ff_rate_control_init(s) < 0)
924         return -1;
925
926 #if FF_API_ERROR_RATE
927     FF_DISABLE_DEPRECATION_WARNINGS
928     if (avctx->error_rate)
929         s->error_rate = avctx->error_rate;
930     FF_ENABLE_DEPRECATION_WARNINGS;
931 #endif
932
933 #if FF_API_NORMALIZE_AQP
934     FF_DISABLE_DEPRECATION_WARNINGS
935     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
936         s->mpv_flags |= FF_MPV_FLAG_NAQ;
937     FF_ENABLE_DEPRECATION_WARNINGS;
938 #endif
939
940 #if FF_API_MV0
941     FF_DISABLE_DEPRECATION_WARNINGS
942     if (avctx->flags & CODEC_FLAG_MV0)
943         s->mpv_flags |= FF_MPV_FLAG_MV0;
944     FF_ENABLE_DEPRECATION_WARNINGS
945 #endif
946
947     if (avctx->b_frame_strategy == 2) {
948         for (i = 0; i < s->max_b_frames + 2; i++) {
949             s->tmp_frames[i] = av_frame_alloc();
950             if (!s->tmp_frames[i])
951                 return AVERROR(ENOMEM);
952
953             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
954             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
955             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
956
957             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
958             if (ret < 0)
959                 return ret;
960         }
961     }
962
963     return 0;
964 fail:
965     ff_MPV_encode_end(avctx);
966     return AVERROR_UNKNOWN;
967 }
968
969 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
970 {
971     MpegEncContext *s = avctx->priv_data;
972     int i;
973
974     ff_rate_control_uninit(s);
975
976     ff_MPV_common_end(s);
977     if (CONFIG_MJPEG_ENCODER &&
978         s->out_format == FMT_MJPEG)
979         ff_mjpeg_encode_close(s);
980
981     av_freep(&avctx->extradata);
982
983     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
984         av_frame_free(&s->tmp_frames[i]);
985
986     ff_free_picture_tables(&s->new_picture);
987     ff_mpeg_unref_picture(s, &s->new_picture);
988
989     av_freep(&s->avctx->stats_out);
990     av_freep(&s->ac_stats);
991
992     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
993     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
994     s->q_chroma_intra_matrix=   NULL;
995     s->q_chroma_intra_matrix16= NULL;
996     av_freep(&s->q_intra_matrix);
997     av_freep(&s->q_inter_matrix);
998     av_freep(&s->q_intra_matrix16);
999     av_freep(&s->q_inter_matrix16);
1000     av_freep(&s->input_picture);
1001     av_freep(&s->reordered_input_picture);
1002     av_freep(&s->dct_offset);
1003
1004     return 0;
1005 }
1006
1007 static int get_sae(uint8_t *src, int ref, int stride)
1008 {
1009     int x,y;
1010     int acc = 0;
1011
1012     for (y = 0; y < 16; y++) {
1013         for (x = 0; x < 16; x++) {
1014             acc += FFABS(src[x + y * stride] - ref);
1015         }
1016     }
1017
1018     return acc;
1019 }
1020
1021 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1022                            uint8_t *ref, int stride)
1023 {
1024     int x, y, w, h;
1025     int acc = 0;
1026
1027     w = s->width  & ~15;
1028     h = s->height & ~15;
1029
1030     for (y = 0; y < h; y += 16) {
1031         for (x = 0; x < w; x += 16) {
1032             int offset = x + y * stride;
1033             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1034                                       stride, 16);
1035             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1036             int sae  = get_sae(src + offset, mean, stride);
1037
1038             acc += sae + 500 < sad;
1039         }
1040     }
1041     return acc;
1042 }
1043
1044
1045 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1046 {
1047     Picture *pic = NULL;
1048     int64_t pts;
1049     int i, display_picture_number = 0, ret;
1050     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1051                                                  (s->low_delay ? 0 : 1);
1052     int direct = 1;
1053
1054     if (pic_arg) {
1055         pts = pic_arg->pts;
1056         display_picture_number = s->input_picture_number++;
1057
1058         if (pts != AV_NOPTS_VALUE) {
1059             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1060                 int64_t last = s->user_specified_pts;
1061
1062                 if (pts <= last) {
1063                     av_log(s->avctx, AV_LOG_ERROR,
1064                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1065                            pts, last);
1066                     return AVERROR(EINVAL);
1067                 }
1068
1069                 if (!s->low_delay && display_picture_number == 1)
1070                     s->dts_delta = pts - last;
1071             }
1072             s->user_specified_pts = pts;
1073         } else {
1074             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1075                 s->user_specified_pts =
1076                 pts = s->user_specified_pts + 1;
1077                 av_log(s->avctx, AV_LOG_INFO,
1078                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1079                        pts);
1080             } else {
1081                 pts = display_picture_number;
1082             }
1083         }
1084     }
1085
1086     if (pic_arg) {
1087         if (!pic_arg->buf[0])
1088             direct = 0;
1089         if (pic_arg->linesize[0] != s->linesize)
1090             direct = 0;
1091         if (pic_arg->linesize[1] != s->uvlinesize)
1092             direct = 0;
1093         if (pic_arg->linesize[2] != s->uvlinesize)
1094             direct = 0;
1095         if ((s->width & 15) || (s->height & 15))
1096             direct = 0;
1097         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1098             direct = 0;
1099         if (s->linesize & (STRIDE_ALIGN-1))
1100             direct = 0;
1101
1102         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1103                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1104
1105         if (direct) {
1106             i = ff_find_unused_picture(s, 1);
1107             if (i < 0)
1108                 return i;
1109
1110             pic = &s->picture[i];
1111             pic->reference = 3;
1112
1113             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1114                 return ret;
1115             if (ff_alloc_picture(s, pic, 1) < 0) {
1116                 return -1;
1117             }
1118         } else {
1119             i = ff_find_unused_picture(s, 0);
1120             if (i < 0)
1121                 return i;
1122
1123             pic = &s->picture[i];
1124             pic->reference = 3;
1125
1126             if (ff_alloc_picture(s, pic, 0) < 0) {
1127                 return -1;
1128             }
1129
1130             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1131                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1132                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1133                 // empty
1134             } else {
1135                 int h_chroma_shift, v_chroma_shift;
1136                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1137                                                  &h_chroma_shift,
1138                                                  &v_chroma_shift);
1139
1140                 for (i = 0; i < 3; i++) {
1141                     int src_stride = pic_arg->linesize[i];
1142                     int dst_stride = i ? s->uvlinesize : s->linesize;
1143                     int h_shift = i ? h_chroma_shift : 0;
1144                     int v_shift = i ? v_chroma_shift : 0;
1145                     int w = s->width  >> h_shift;
1146                     int h = s->height >> v_shift;
1147                     uint8_t *src = pic_arg->data[i];
1148                     uint8_t *dst = pic->f->data[i];
1149                     int vpad = 16;
1150
1151                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1152                         && !s->progressive_sequence
1153                         && FFALIGN(s->height, 32) - s->height > 16)
1154                         vpad = 32;
1155
1156                     if (!s->avctx->rc_buffer_size)
1157                         dst += INPLACE_OFFSET;
1158
1159                     if (src_stride == dst_stride)
1160                         memcpy(dst, src, src_stride * h);
1161                     else {
1162                         int h2 = h;
1163                         uint8_t *dst2 = dst;
1164                         while (h2--) {
1165                             memcpy(dst2, src, w);
1166                             dst2 += dst_stride;
1167                             src += src_stride;
1168                         }
1169                     }
1170                     if ((s->width & 15) || (s->height & (vpad-1))) {
1171                         s->mpvencdsp.draw_edges(dst, dst_stride,
1172                                                 w, h,
1173                                                 16>>h_shift,
1174                                                 vpad>>v_shift,
1175                                                 EDGE_BOTTOM);
1176                     }
1177                 }
1178             }
1179         }
1180         ret = av_frame_copy_props(pic->f, pic_arg);
1181         if (ret < 0)
1182             return ret;
1183
1184         pic->f->display_picture_number = display_picture_number;
1185         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1186     }
1187
1188     /* shift buffer entries */
1189     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1190         s->input_picture[i - 1] = s->input_picture[i];
1191
1192     s->input_picture[encoding_delay] = (Picture*) pic;
1193
1194     return 0;
1195 }
1196
1197 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1198 {
1199     int x, y, plane;
1200     int score = 0;
1201     int64_t score64 = 0;
1202
1203     for (plane = 0; plane < 3; plane++) {
1204         const int stride = p->f->linesize[plane];
1205         const int bw = plane ? 1 : 2;
1206         for (y = 0; y < s->mb_height * bw; y++) {
1207             for (x = 0; x < s->mb_width * bw; x++) {
1208                 int off = p->shared ? 0 : 16;
1209                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1210                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1211                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1212
1213                 switch (FFABS(s->avctx->frame_skip_exp)) {
1214                 case 0: score    =  FFMAX(score, v);          break;
1215                 case 1: score   += FFABS(v);                  break;
1216                 case 2: score64 += v * (int64_t)v;                       break;
1217                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1218                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1219                 }
1220             }
1221         }
1222     }
1223     emms_c();
1224
1225     if (score)
1226         score64 = score;
1227     if (s->avctx->frame_skip_exp < 0)
1228         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1229                       -1.0/s->avctx->frame_skip_exp);
1230
1231     if (score64 < s->avctx->frame_skip_threshold)
1232         return 1;
1233     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1234         return 1;
1235     return 0;
1236 }
1237
1238 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1239 {
1240     AVPacket pkt = { 0 };
1241     int ret, got_output;
1242
1243     av_init_packet(&pkt);
1244     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1245     if (ret < 0)
1246         return ret;
1247
1248     ret = pkt.size;
1249     av_free_packet(&pkt);
1250     return ret;
1251 }
1252
1253 static int estimate_best_b_count(MpegEncContext *s)
1254 {
1255     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1256     AVCodecContext *c = avcodec_alloc_context3(NULL);
1257     const int scale = s->avctx->brd_scale;
1258     int i, j, out_size, p_lambda, b_lambda, lambda2;
1259     int64_t best_rd  = INT64_MAX;
1260     int best_b_count = -1;
1261
1262     av_assert0(scale >= 0 && scale <= 3);
1263
1264     //emms_c();
1265     //s->next_picture_ptr->quality;
1266     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1267     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1268     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1269     if (!b_lambda) // FIXME we should do this somewhere else
1270         b_lambda = p_lambda;
1271     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1272                FF_LAMBDA_SHIFT;
1273
1274     c->width        = s->width  >> scale;
1275     c->height       = s->height >> scale;
1276     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1277     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1278     c->mb_decision  = s->avctx->mb_decision;
1279     c->me_cmp       = s->avctx->me_cmp;
1280     c->mb_cmp       = s->avctx->mb_cmp;
1281     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1282     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1283     c->time_base    = s->avctx->time_base;
1284     c->max_b_frames = s->max_b_frames;
1285
1286     if (avcodec_open2(c, codec, NULL) < 0)
1287         return -1;
1288
1289     for (i = 0; i < s->max_b_frames + 2; i++) {
1290         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1291                                                 s->next_picture_ptr;
1292         uint8_t *data[4];
1293
1294         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1295             pre_input = *pre_input_ptr;
1296             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1297
1298             if (!pre_input.shared && i) {
1299                 data[0] += INPLACE_OFFSET;
1300                 data[1] += INPLACE_OFFSET;
1301                 data[2] += INPLACE_OFFSET;
1302             }
1303
1304             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1305                                        s->tmp_frames[i]->linesize[0],
1306                                        data[0],
1307                                        pre_input.f->linesize[0],
1308                                        c->width, c->height);
1309             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1310                                        s->tmp_frames[i]->linesize[1],
1311                                        data[1],
1312                                        pre_input.f->linesize[1],
1313                                        c->width >> 1, c->height >> 1);
1314             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1315                                        s->tmp_frames[i]->linesize[2],
1316                                        data[2],
1317                                        pre_input.f->linesize[2],
1318                                        c->width >> 1, c->height >> 1);
1319         }
1320     }
1321
1322     for (j = 0; j < s->max_b_frames + 1; j++) {
1323         int64_t rd = 0;
1324
1325         if (!s->input_picture[j])
1326             break;
1327
1328         c->error[0] = c->error[1] = c->error[2] = 0;
1329
1330         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1331         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1332
1333         out_size = encode_frame(c, s->tmp_frames[0]);
1334
1335         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1336
1337         for (i = 0; i < s->max_b_frames + 1; i++) {
1338             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1339
1340             s->tmp_frames[i + 1]->pict_type = is_p ?
1341                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1342             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1343
1344             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1345
1346             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1347         }
1348
1349         /* get the delayed frames */
1350         while (out_size) {
1351             out_size = encode_frame(c, NULL);
1352             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1353         }
1354
1355         rd += c->error[0] + c->error[1] + c->error[2];
1356
1357         if (rd < best_rd) {
1358             best_rd = rd;
1359             best_b_count = j;
1360         }
1361     }
1362
1363     avcodec_close(c);
1364     av_freep(&c);
1365
1366     return best_b_count;
1367 }
1368
1369 static int select_input_picture(MpegEncContext *s)
1370 {
1371     int i, ret;
1372
1373     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1374         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1375     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1376
1377     /* set next picture type & ordering */
1378     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1379         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1380             if (s->picture_in_gop_number < s->gop_size &&
1381                 s->next_picture_ptr &&
1382                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1383                 // FIXME check that te gop check above is +-1 correct
1384                 av_frame_unref(s->input_picture[0]->f);
1385
1386                 ff_vbv_update(s, 0);
1387
1388                 goto no_output_pic;
1389             }
1390         }
1391
1392         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1393             s->next_picture_ptr == NULL || s->intra_only) {
1394             s->reordered_input_picture[0] = s->input_picture[0];
1395             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1396             s->reordered_input_picture[0]->f->coded_picture_number =
1397                 s->coded_picture_number++;
1398         } else {
1399             int b_frames;
1400
1401             if (s->flags & CODEC_FLAG_PASS2) {
1402                 for (i = 0; i < s->max_b_frames + 1; i++) {
1403                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1404
1405                     if (pict_num >= s->rc_context.num_entries)
1406                         break;
1407                     if (!s->input_picture[i]) {
1408                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1409                         break;
1410                     }
1411
1412                     s->input_picture[i]->f->pict_type =
1413                         s->rc_context.entry[pict_num].new_pict_type;
1414                 }
1415             }
1416
1417             if (s->avctx->b_frame_strategy == 0) {
1418                 b_frames = s->max_b_frames;
1419                 while (b_frames && !s->input_picture[b_frames])
1420                     b_frames--;
1421             } else if (s->avctx->b_frame_strategy == 1) {
1422                 for (i = 1; i < s->max_b_frames + 1; i++) {
1423                     if (s->input_picture[i] &&
1424                         s->input_picture[i]->b_frame_score == 0) {
1425                         s->input_picture[i]->b_frame_score =
1426                             get_intra_count(s,
1427                                             s->input_picture[i    ]->f->data[0],
1428                                             s->input_picture[i - 1]->f->data[0],
1429                                             s->linesize) + 1;
1430                     }
1431                 }
1432                 for (i = 0; i < s->max_b_frames + 1; i++) {
1433                     if (s->input_picture[i] == NULL ||
1434                         s->input_picture[i]->b_frame_score - 1 >
1435                             s->mb_num / s->avctx->b_sensitivity)
1436                         break;
1437                 }
1438
1439                 b_frames = FFMAX(0, i - 1);
1440
1441                 /* reset scores */
1442                 for (i = 0; i < b_frames + 1; i++) {
1443                     s->input_picture[i]->b_frame_score = 0;
1444                 }
1445             } else if (s->avctx->b_frame_strategy == 2) {
1446                 b_frames = estimate_best_b_count(s);
1447             } else {
1448                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1449                 b_frames = 0;
1450             }
1451
1452             emms_c();
1453
1454             for (i = b_frames - 1; i >= 0; i--) {
1455                 int type = s->input_picture[i]->f->pict_type;
1456                 if (type && type != AV_PICTURE_TYPE_B)
1457                     b_frames = i;
1458             }
1459             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1460                 b_frames == s->max_b_frames) {
1461                 av_log(s->avctx, AV_LOG_ERROR,
1462                        "warning, too many b frames in a row\n");
1463             }
1464
1465             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1466                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1467                     s->gop_size > s->picture_in_gop_number) {
1468                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1469                 } else {
1470                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1471                         b_frames = 0;
1472                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1473                 }
1474             }
1475
1476             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1477                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1478                 b_frames--;
1479
1480             s->reordered_input_picture[0] = s->input_picture[b_frames];
1481             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1482                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1483             s->reordered_input_picture[0]->f->coded_picture_number =
1484                 s->coded_picture_number++;
1485             for (i = 0; i < b_frames; i++) {
1486                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1487                 s->reordered_input_picture[i + 1]->f->pict_type =
1488                     AV_PICTURE_TYPE_B;
1489                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1490                     s->coded_picture_number++;
1491             }
1492         }
1493     }
1494 no_output_pic:
1495     if (s->reordered_input_picture[0]) {
1496         s->reordered_input_picture[0]->reference =
1497            s->reordered_input_picture[0]->f->pict_type !=
1498                AV_PICTURE_TYPE_B ? 3 : 0;
1499
1500         ff_mpeg_unref_picture(s, &s->new_picture);
1501         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1502             return ret;
1503
1504         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1505             // input is a shared pix, so we can't modifiy it -> alloc a new
1506             // one & ensure that the shared one is reuseable
1507
1508             Picture *pic;
1509             int i = ff_find_unused_picture(s, 0);
1510             if (i < 0)
1511                 return i;
1512             pic = &s->picture[i];
1513
1514             pic->reference = s->reordered_input_picture[0]->reference;
1515             if (ff_alloc_picture(s, pic, 0) < 0) {
1516                 return -1;
1517             }
1518
1519             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1520             if (ret < 0)
1521                 return ret;
1522
1523             /* mark us unused / free shared pic */
1524             av_frame_unref(s->reordered_input_picture[0]->f);
1525             s->reordered_input_picture[0]->shared = 0;
1526
1527             s->current_picture_ptr = pic;
1528         } else {
1529             // input is not a shared pix -> reuse buffer for current_pix
1530             s->current_picture_ptr = s->reordered_input_picture[0];
1531             for (i = 0; i < 4; i++) {
1532                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1533             }
1534         }
1535         ff_mpeg_unref_picture(s, &s->current_picture);
1536         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1537                                        s->current_picture_ptr)) < 0)
1538             return ret;
1539
1540         s->picture_number = s->new_picture.f->display_picture_number;
1541     } else {
1542         ff_mpeg_unref_picture(s, &s->new_picture);
1543     }
1544     return 0;
1545 }
1546
1547 static void frame_end(MpegEncContext *s)
1548 {
1549     if (s->unrestricted_mv &&
1550         s->current_picture.reference &&
1551         !s->intra_only) {
1552         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1553         int hshift = desc->log2_chroma_w;
1554         int vshift = desc->log2_chroma_h;
1555         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1556                                 s->current_picture.f->linesize[0],
1557                                 s->h_edge_pos, s->v_edge_pos,
1558                                 EDGE_WIDTH, EDGE_WIDTH,
1559                                 EDGE_TOP | EDGE_BOTTOM);
1560         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1561                                 s->current_picture.f->linesize[1],
1562                                 s->h_edge_pos >> hshift,
1563                                 s->v_edge_pos >> vshift,
1564                                 EDGE_WIDTH >> hshift,
1565                                 EDGE_WIDTH >> vshift,
1566                                 EDGE_TOP | EDGE_BOTTOM);
1567         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1568                                 s->current_picture.f->linesize[2],
1569                                 s->h_edge_pos >> hshift,
1570                                 s->v_edge_pos >> vshift,
1571                                 EDGE_WIDTH >> hshift,
1572                                 EDGE_WIDTH >> vshift,
1573                                 EDGE_TOP | EDGE_BOTTOM);
1574     }
1575
1576     emms_c();
1577
1578     s->last_pict_type                 = s->pict_type;
1579     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1580     if (s->pict_type!= AV_PICTURE_TYPE_B)
1581         s->last_non_b_pict_type = s->pict_type;
1582
1583     s->avctx->coded_frame = s->current_picture_ptr->f;
1584
1585 }
1586
1587 static void update_noise_reduction(MpegEncContext *s)
1588 {
1589     int intra, i;
1590
1591     for (intra = 0; intra < 2; intra++) {
1592         if (s->dct_count[intra] > (1 << 16)) {
1593             for (i = 0; i < 64; i++) {
1594                 s->dct_error_sum[intra][i] >>= 1;
1595             }
1596             s->dct_count[intra] >>= 1;
1597         }
1598
1599         for (i = 0; i < 64; i++) {
1600             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1601                                        s->dct_count[intra] +
1602                                        s->dct_error_sum[intra][i] / 2) /
1603                                       (s->dct_error_sum[intra][i] + 1);
1604         }
1605     }
1606 }
1607
1608 static int frame_start(MpegEncContext *s)
1609 {
1610     int ret;
1611
1612     /* mark & release old frames */
1613     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1614         s->last_picture_ptr != s->next_picture_ptr &&
1615         s->last_picture_ptr->f->buf[0]) {
1616         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1617     }
1618
1619     s->current_picture_ptr->f->pict_type = s->pict_type;
1620     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1621
1622     ff_mpeg_unref_picture(s, &s->current_picture);
1623     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1624                                    s->current_picture_ptr)) < 0)
1625         return ret;
1626
1627     if (s->pict_type != AV_PICTURE_TYPE_B) {
1628         s->last_picture_ptr = s->next_picture_ptr;
1629         if (!s->droppable)
1630             s->next_picture_ptr = s->current_picture_ptr;
1631     }
1632
1633     if (s->last_picture_ptr) {
1634         ff_mpeg_unref_picture(s, &s->last_picture);
1635         if (s->last_picture_ptr->f->buf[0] &&
1636             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1637                                        s->last_picture_ptr)) < 0)
1638             return ret;
1639     }
1640     if (s->next_picture_ptr) {
1641         ff_mpeg_unref_picture(s, &s->next_picture);
1642         if (s->next_picture_ptr->f->buf[0] &&
1643             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1644                                        s->next_picture_ptr)) < 0)
1645             return ret;
1646     }
1647
1648     if (s->picture_structure!= PICT_FRAME) {
1649         int i;
1650         for (i = 0; i < 4; i++) {
1651             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1652                 s->current_picture.f->data[i] +=
1653                     s->current_picture.f->linesize[i];
1654             }
1655             s->current_picture.f->linesize[i] *= 2;
1656             s->last_picture.f->linesize[i]    *= 2;
1657             s->next_picture.f->linesize[i]    *= 2;
1658         }
1659     }
1660
1661     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1662         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1663         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1664     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1665         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1666         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1667     } else {
1668         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1669         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1670     }
1671
1672     if (s->dct_error_sum) {
1673         av_assert2(s->avctx->noise_reduction && s->encoding);
1674         update_noise_reduction(s);
1675     }
1676
1677     return 0;
1678 }
1679
1680 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1681                           const AVFrame *pic_arg, int *got_packet)
1682 {
1683     MpegEncContext *s = avctx->priv_data;
1684     int i, stuffing_count, ret;
1685     int context_count = s->slice_context_count;
1686
1687     s->picture_in_gop_number++;
1688
1689     if (load_input_picture(s, pic_arg) < 0)
1690         return -1;
1691
1692     if (select_input_picture(s) < 0) {
1693         return -1;
1694     }
1695
1696     /* output? */
1697     if (s->new_picture.f->data[0]) {
1698         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1699             return ret;
1700         if (s->mb_info) {
1701             s->mb_info_ptr = av_packet_new_side_data(pkt,
1702                                  AV_PKT_DATA_H263_MB_INFO,
1703                                  s->mb_width*s->mb_height*12);
1704             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1705         }
1706
1707         for (i = 0; i < context_count; i++) {
1708             int start_y = s->thread_context[i]->start_mb_y;
1709             int   end_y = s->thread_context[i]->  end_mb_y;
1710             int h       = s->mb_height;
1711             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1712             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1713
1714             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1715         }
1716
1717         s->pict_type = s->new_picture.f->pict_type;
1718         //emms_c();
1719         ret = frame_start(s);
1720         if (ret < 0)
1721             return ret;
1722 vbv_retry:
1723         if (encode_picture(s, s->picture_number) < 0)
1724             return -1;
1725
1726         avctx->header_bits = s->header_bits;
1727         avctx->mv_bits     = s->mv_bits;
1728         avctx->misc_bits   = s->misc_bits;
1729         avctx->i_tex_bits  = s->i_tex_bits;
1730         avctx->p_tex_bits  = s->p_tex_bits;
1731         avctx->i_count     = s->i_count;
1732         // FIXME f/b_count in avctx
1733         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1734         avctx->skip_count  = s->skip_count;
1735
1736         frame_end(s);
1737
1738         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1739             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1740
1741         if (avctx->rc_buffer_size) {
1742             RateControlContext *rcc = &s->rc_context;
1743             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1744
1745             if (put_bits_count(&s->pb) > max_size &&
1746                 s->lambda < s->avctx->lmax) {
1747                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1748                                        (s->qscale + 1) / s->qscale);
1749                 if (s->adaptive_quant) {
1750                     int i;
1751                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1752                         s->lambda_table[i] =
1753                             FFMAX(s->lambda_table[i] + 1,
1754                                   s->lambda_table[i] * (s->qscale + 1) /
1755                                   s->qscale);
1756                 }
1757                 s->mb_skipped = 0;        // done in frame_start()
1758                 // done in encode_picture() so we must undo it
1759                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1760                     if (s->flipflop_rounding          ||
1761                         s->codec_id == AV_CODEC_ID_H263P ||
1762                         s->codec_id == AV_CODEC_ID_MPEG4)
1763                         s->no_rounding ^= 1;
1764                 }
1765                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1766                     s->time_base       = s->last_time_base;
1767                     s->last_non_b_time = s->time - s->pp_time;
1768                 }
1769                 for (i = 0; i < context_count; i++) {
1770                     PutBitContext *pb = &s->thread_context[i]->pb;
1771                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1772                 }
1773                 goto vbv_retry;
1774             }
1775
1776             av_assert0(s->avctx->rc_max_rate);
1777         }
1778
1779         if (s->flags & CODEC_FLAG_PASS1)
1780             ff_write_pass1_stats(s);
1781
1782         for (i = 0; i < 4; i++) {
1783             s->current_picture_ptr->f->error[i] =
1784             s->current_picture.f->error[i] =
1785                 s->current_picture.error[i];
1786             avctx->error[i] += s->current_picture_ptr->f->error[i];
1787         }
1788
1789         if (s->flags & CODEC_FLAG_PASS1)
1790             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1791                    avctx->i_tex_bits + avctx->p_tex_bits ==
1792                        put_bits_count(&s->pb));
1793         flush_put_bits(&s->pb);
1794         s->frame_bits  = put_bits_count(&s->pb);
1795
1796         stuffing_count = ff_vbv_update(s, s->frame_bits);
1797         s->stuffing_bits = 8*stuffing_count;
1798         if (stuffing_count) {
1799             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1800                     stuffing_count + 50) {
1801                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1802                 return -1;
1803             }
1804
1805             switch (s->codec_id) {
1806             case AV_CODEC_ID_MPEG1VIDEO:
1807             case AV_CODEC_ID_MPEG2VIDEO:
1808                 while (stuffing_count--) {
1809                     put_bits(&s->pb, 8, 0);
1810                 }
1811             break;
1812             case AV_CODEC_ID_MPEG4:
1813                 put_bits(&s->pb, 16, 0);
1814                 put_bits(&s->pb, 16, 0x1C3);
1815                 stuffing_count -= 4;
1816                 while (stuffing_count--) {
1817                     put_bits(&s->pb, 8, 0xFF);
1818                 }
1819             break;
1820             default:
1821                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1822             }
1823             flush_put_bits(&s->pb);
1824             s->frame_bits  = put_bits_count(&s->pb);
1825         }
1826
1827         /* update mpeg1/2 vbv_delay for CBR */
1828         if (s->avctx->rc_max_rate                          &&
1829             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1830             s->out_format == FMT_MPEG1                     &&
1831             90000LL * (avctx->rc_buffer_size - 1) <=
1832                 s->avctx->rc_max_rate * 0xFFFFLL) {
1833             int vbv_delay, min_delay;
1834             double inbits  = s->avctx->rc_max_rate *
1835                              av_q2d(s->avctx->time_base);
1836             int    minbits = s->frame_bits - 8 *
1837                              (s->vbv_delay_ptr - s->pb.buf - 1);
1838             double bits    = s->rc_context.buffer_index + minbits - inbits;
1839
1840             if (bits < 0)
1841                 av_log(s->avctx, AV_LOG_ERROR,
1842                        "Internal error, negative bits\n");
1843
1844             assert(s->repeat_first_field == 0);
1845
1846             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1847             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1848                         s->avctx->rc_max_rate;
1849
1850             vbv_delay = FFMAX(vbv_delay, min_delay);
1851
1852             av_assert0(vbv_delay < 0xFFFF);
1853
1854             s->vbv_delay_ptr[0] &= 0xF8;
1855             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1856             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1857             s->vbv_delay_ptr[2] &= 0x07;
1858             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1859             avctx->vbv_delay     = vbv_delay * 300;
1860         }
1861         s->total_bits     += s->frame_bits;
1862         avctx->frame_bits  = s->frame_bits;
1863
1864         pkt->pts = s->current_picture.f->pts;
1865         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1866             if (!s->current_picture.f->coded_picture_number)
1867                 pkt->dts = pkt->pts - s->dts_delta;
1868             else
1869                 pkt->dts = s->reordered_pts;
1870             s->reordered_pts = pkt->pts;
1871         } else
1872             pkt->dts = pkt->pts;
1873         if (s->current_picture.f->key_frame)
1874             pkt->flags |= AV_PKT_FLAG_KEY;
1875         if (s->mb_info)
1876             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1877     } else {
1878         s->frame_bits = 0;
1879     }
1880
1881     /* release non-reference frames */
1882     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1883         if (!s->picture[i].reference)
1884             ff_mpeg_unref_picture(s, &s->picture[i]);
1885     }
1886
1887     av_assert1((s->frame_bits & 7) == 0);
1888
1889     pkt->size = s->frame_bits / 8;
1890     *got_packet = !!pkt->size;
1891     return 0;
1892 }
1893
1894 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1895                                                 int n, int threshold)
1896 {
1897     static const char tab[64] = {
1898         3, 2, 2, 1, 1, 1, 1, 1,
1899         1, 1, 1, 1, 1, 1, 1, 1,
1900         1, 1, 1, 1, 1, 1, 1, 1,
1901         0, 0, 0, 0, 0, 0, 0, 0,
1902         0, 0, 0, 0, 0, 0, 0, 0,
1903         0, 0, 0, 0, 0, 0, 0, 0,
1904         0, 0, 0, 0, 0, 0, 0, 0,
1905         0, 0, 0, 0, 0, 0, 0, 0
1906     };
1907     int score = 0;
1908     int run = 0;
1909     int i;
1910     int16_t *block = s->block[n];
1911     const int last_index = s->block_last_index[n];
1912     int skip_dc;
1913
1914     if (threshold < 0) {
1915         skip_dc = 0;
1916         threshold = -threshold;
1917     } else
1918         skip_dc = 1;
1919
1920     /* Are all we could set to zero already zero? */
1921     if (last_index <= skip_dc - 1)
1922         return;
1923
1924     for (i = 0; i <= last_index; i++) {
1925         const int j = s->intra_scantable.permutated[i];
1926         const int level = FFABS(block[j]);
1927         if (level == 1) {
1928             if (skip_dc && i == 0)
1929                 continue;
1930             score += tab[run];
1931             run = 0;
1932         } else if (level > 1) {
1933             return;
1934         } else {
1935             run++;
1936         }
1937     }
1938     if (score >= threshold)
1939         return;
1940     for (i = skip_dc; i <= last_index; i++) {
1941         const int j = s->intra_scantable.permutated[i];
1942         block[j] = 0;
1943     }
1944     if (block[0])
1945         s->block_last_index[n] = 0;
1946     else
1947         s->block_last_index[n] = -1;
1948 }
1949
1950 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1951                                int last_index)
1952 {
1953     int i;
1954     const int maxlevel = s->max_qcoeff;
1955     const int minlevel = s->min_qcoeff;
1956     int overflow = 0;
1957
1958     if (s->mb_intra) {
1959         i = 1; // skip clipping of intra dc
1960     } else
1961         i = 0;
1962
1963     for (; i <= last_index; i++) {
1964         const int j = s->intra_scantable.permutated[i];
1965         int level = block[j];
1966
1967         if (level > maxlevel) {
1968             level = maxlevel;
1969             overflow++;
1970         } else if (level < minlevel) {
1971             level = minlevel;
1972             overflow++;
1973         }
1974
1975         block[j] = level;
1976     }
1977
1978     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1979         av_log(s->avctx, AV_LOG_INFO,
1980                "warning, clipping %d dct coefficients to %d..%d\n",
1981                overflow, minlevel, maxlevel);
1982 }
1983
1984 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1985 {
1986     int x, y;
1987     // FIXME optimize
1988     for (y = 0; y < 8; y++) {
1989         for (x = 0; x < 8; x++) {
1990             int x2, y2;
1991             int sum = 0;
1992             int sqr = 0;
1993             int count = 0;
1994
1995             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1996                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1997                     int v = ptr[x2 + y2 * stride];
1998                     sum += v;
1999                     sqr += v * v;
2000                     count++;
2001                 }
2002             }
2003             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2004         }
2005     }
2006 }
2007
2008 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2009                                                 int motion_x, int motion_y,
2010                                                 int mb_block_height,
2011                                                 int mb_block_width,
2012                                                 int mb_block_count)
2013 {
2014     int16_t weight[12][64];
2015     int16_t orig[12][64];
2016     const int mb_x = s->mb_x;
2017     const int mb_y = s->mb_y;
2018     int i;
2019     int skip_dct[12];
2020     int dct_offset = s->linesize * 8; // default for progressive frames
2021     int uv_dct_offset = s->uvlinesize * 8;
2022     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2023     ptrdiff_t wrap_y, wrap_c;
2024
2025     for (i = 0; i < mb_block_count; i++)
2026         skip_dct[i] = s->skipdct;
2027
2028     if (s->adaptive_quant) {
2029         const int last_qp = s->qscale;
2030         const int mb_xy = mb_x + mb_y * s->mb_stride;
2031
2032         s->lambda = s->lambda_table[mb_xy];
2033         update_qscale(s);
2034
2035         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2036             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2037             s->dquant = s->qscale - last_qp;
2038
2039             if (s->out_format == FMT_H263) {
2040                 s->dquant = av_clip(s->dquant, -2, 2);
2041
2042                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2043                     if (!s->mb_intra) {
2044                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2045                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2046                                 s->dquant = 0;
2047                         }
2048                         if (s->mv_type == MV_TYPE_8X8)
2049                             s->dquant = 0;
2050                     }
2051                 }
2052             }
2053         }
2054         ff_set_qscale(s, last_qp + s->dquant);
2055     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2056         ff_set_qscale(s, s->qscale + s->dquant);
2057
2058     wrap_y = s->linesize;
2059     wrap_c = s->uvlinesize;
2060     ptr_y  = s->new_picture.f->data[0] +
2061              (mb_y * 16 * wrap_y)              + mb_x * 16;
2062     ptr_cb = s->new_picture.f->data[1] +
2063              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2064     ptr_cr = s->new_picture.f->data[2] +
2065              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2066
2067     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2068         uint8_t *ebuf = s->edge_emu_buffer + 36 * wrap_y;
2069         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2070         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2071         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2072                                  wrap_y, wrap_y,
2073                                  16, 16, mb_x * 16, mb_y * 16,
2074                                  s->width, s->height);
2075         ptr_y = ebuf;
2076         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2077                                  wrap_c, wrap_c,
2078                                  mb_block_width, mb_block_height,
2079                                  mb_x * mb_block_width, mb_y * mb_block_height,
2080                                  cw, ch);
2081         ptr_cb = ebuf + 16 * wrap_y;
2082         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2083                                  wrap_c, wrap_c,
2084                                  mb_block_width, mb_block_height,
2085                                  mb_x * mb_block_width, mb_y * mb_block_height,
2086                                  cw, ch);
2087         ptr_cr = ebuf + 16 * wrap_y + 16;
2088     }
2089
2090     if (s->mb_intra) {
2091         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2092             int progressive_score, interlaced_score;
2093
2094             s->interlaced_dct = 0;
2095             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2096                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2097                                                      NULL, wrap_y, 8) - 400;
2098
2099             if (progressive_score > 0) {
2100                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2101                                                         NULL, wrap_y * 2, 8) +
2102                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2103                                                         NULL, wrap_y * 2, 8);
2104                 if (progressive_score > interlaced_score) {
2105                     s->interlaced_dct = 1;
2106
2107                     dct_offset = wrap_y;
2108                     uv_dct_offset = wrap_c;
2109                     wrap_y <<= 1;
2110                     if (s->chroma_format == CHROMA_422 ||
2111                         s->chroma_format == CHROMA_444)
2112                         wrap_c <<= 1;
2113                 }
2114             }
2115         }
2116
2117         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2118         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2119         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2120         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2121
2122         if (s->flags & CODEC_FLAG_GRAY) {
2123             skip_dct[4] = 1;
2124             skip_dct[5] = 1;
2125         } else {
2126             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2127             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2128             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2129                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2130                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2131             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2132                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2133                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2134                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2135                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2136                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2137                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2138             }
2139         }
2140     } else {
2141         op_pixels_func (*op_pix)[4];
2142         qpel_mc_func (*op_qpix)[16];
2143         uint8_t *dest_y, *dest_cb, *dest_cr;
2144
2145         dest_y  = s->dest[0];
2146         dest_cb = s->dest[1];
2147         dest_cr = s->dest[2];
2148
2149         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2150             op_pix  = s->hdsp.put_pixels_tab;
2151             op_qpix = s->qdsp.put_qpel_pixels_tab;
2152         } else {
2153             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2154             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2155         }
2156
2157         if (s->mv_dir & MV_DIR_FORWARD) {
2158             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
2159                           s->last_picture.f->data,
2160                           op_pix, op_qpix);
2161             op_pix  = s->hdsp.avg_pixels_tab;
2162             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2163         }
2164         if (s->mv_dir & MV_DIR_BACKWARD) {
2165             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
2166                           s->next_picture.f->data,
2167                           op_pix, op_qpix);
2168         }
2169
2170         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2171             int progressive_score, interlaced_score;
2172
2173             s->interlaced_dct = 0;
2174             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2175                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2176                                                      ptr_y + wrap_y * 8,
2177                                                      wrap_y, 8) - 400;
2178
2179             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2180                 progressive_score -= 400;
2181
2182             if (progressive_score > 0) {
2183                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2184                                                         wrap_y * 2, 8) +
2185                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2186                                                         ptr_y + wrap_y,
2187                                                         wrap_y * 2, 8);
2188
2189                 if (progressive_score > interlaced_score) {
2190                     s->interlaced_dct = 1;
2191
2192                     dct_offset = wrap_y;
2193                     uv_dct_offset = wrap_c;
2194                     wrap_y <<= 1;
2195                     if (s->chroma_format == CHROMA_422)
2196                         wrap_c <<= 1;
2197                 }
2198             }
2199         }
2200
2201         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2202         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2203         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2204                             dest_y + dct_offset, wrap_y);
2205         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2206                             dest_y + dct_offset + 8, wrap_y);
2207
2208         if (s->flags & CODEC_FLAG_GRAY) {
2209             skip_dct[4] = 1;
2210             skip_dct[5] = 1;
2211         } else {
2212             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2213             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2214             if (!s->chroma_y_shift) { /* 422 */
2215                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2216                                     dest_cb + uv_dct_offset, wrap_c);
2217                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2218                                     dest_cr + uv_dct_offset, wrap_c);
2219             }
2220         }
2221         /* pre quantization */
2222         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2223                 2 * s->qscale * s->qscale) {
2224             // FIXME optimize
2225             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2226                 skip_dct[0] = 1;
2227             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2228                 skip_dct[1] = 1;
2229             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2230                                wrap_y, 8) < 20 * s->qscale)
2231                 skip_dct[2] = 1;
2232             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2233                                wrap_y, 8) < 20 * s->qscale)
2234                 skip_dct[3] = 1;
2235             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2236                 skip_dct[4] = 1;
2237             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2238                 skip_dct[5] = 1;
2239             if (!s->chroma_y_shift) { /* 422 */
2240                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2241                                    dest_cb + uv_dct_offset,
2242                                    wrap_c, 8) < 20 * s->qscale)
2243                     skip_dct[6] = 1;
2244                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2245                                    dest_cr + uv_dct_offset,
2246                                    wrap_c, 8) < 20 * s->qscale)
2247                     skip_dct[7] = 1;
2248             }
2249         }
2250     }
2251
2252     if (s->quantizer_noise_shaping) {
2253         if (!skip_dct[0])
2254             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2255         if (!skip_dct[1])
2256             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2257         if (!skip_dct[2])
2258             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2259         if (!skip_dct[3])
2260             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2261         if (!skip_dct[4])
2262             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2263         if (!skip_dct[5])
2264             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2265         if (!s->chroma_y_shift) { /* 422 */
2266             if (!skip_dct[6])
2267                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2268                                   wrap_c);
2269             if (!skip_dct[7])
2270                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2271                                   wrap_c);
2272         }
2273         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2274     }
2275
2276     /* DCT & quantize */
2277     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2278     {
2279         for (i = 0; i < mb_block_count; i++) {
2280             if (!skip_dct[i]) {
2281                 int overflow;
2282                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2283                 // FIXME we could decide to change to quantizer instead of
2284                 // clipping
2285                 // JS: I don't think that would be a good idea it could lower
2286                 //     quality instead of improve it. Just INTRADC clipping
2287                 //     deserves changes in quantizer
2288                 if (overflow)
2289                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2290             } else
2291                 s->block_last_index[i] = -1;
2292         }
2293         if (s->quantizer_noise_shaping) {
2294             for (i = 0; i < mb_block_count; i++) {
2295                 if (!skip_dct[i]) {
2296                     s->block_last_index[i] =
2297                         dct_quantize_refine(s, s->block[i], weight[i],
2298                                             orig[i], i, s->qscale);
2299                 }
2300             }
2301         }
2302
2303         if (s->luma_elim_threshold && !s->mb_intra)
2304             for (i = 0; i < 4; i++)
2305                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2306         if (s->chroma_elim_threshold && !s->mb_intra)
2307             for (i = 4; i < mb_block_count; i++)
2308                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2309
2310         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2311             for (i = 0; i < mb_block_count; i++) {
2312                 if (s->block_last_index[i] == -1)
2313                     s->coded_score[i] = INT_MAX / 256;
2314             }
2315         }
2316     }
2317
2318     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2319         s->block_last_index[4] =
2320         s->block_last_index[5] = 0;
2321         s->block[4][0] =
2322         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2323         if (!s->chroma_y_shift) { /* 422 / 444 */
2324             for (i=6; i<12; i++) {
2325                 s->block_last_index[i] = 0;
2326                 s->block[i][0] = s->block[4][0];
2327             }
2328         }
2329     }
2330
2331     // non c quantize code returns incorrect block_last_index FIXME
2332     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2333         for (i = 0; i < mb_block_count; i++) {
2334             int j;
2335             if (s->block_last_index[i] > 0) {
2336                 for (j = 63; j > 0; j--) {
2337                     if (s->block[i][s->intra_scantable.permutated[j]])
2338                         break;
2339                 }
2340                 s->block_last_index[i] = j;
2341             }
2342         }
2343     }
2344
2345     /* huffman encode */
2346     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2347     case AV_CODEC_ID_MPEG1VIDEO:
2348     case AV_CODEC_ID_MPEG2VIDEO:
2349         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2350             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2351         break;
2352     case AV_CODEC_ID_MPEG4:
2353         if (CONFIG_MPEG4_ENCODER)
2354             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2355         break;
2356     case AV_CODEC_ID_MSMPEG4V2:
2357     case AV_CODEC_ID_MSMPEG4V3:
2358     case AV_CODEC_ID_WMV1:
2359         if (CONFIG_MSMPEG4_ENCODER)
2360             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2361         break;
2362     case AV_CODEC_ID_WMV2:
2363         if (CONFIG_WMV2_ENCODER)
2364             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2365         break;
2366     case AV_CODEC_ID_H261:
2367         if (CONFIG_H261_ENCODER)
2368             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2369         break;
2370     case AV_CODEC_ID_H263:
2371     case AV_CODEC_ID_H263P:
2372     case AV_CODEC_ID_FLV1:
2373     case AV_CODEC_ID_RV10:
2374     case AV_CODEC_ID_RV20:
2375         if (CONFIG_H263_ENCODER)
2376             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2377         break;
2378     case AV_CODEC_ID_MJPEG:
2379     case AV_CODEC_ID_AMV:
2380         if (CONFIG_MJPEG_ENCODER)
2381             ff_mjpeg_encode_mb(s, s->block);
2382         break;
2383     default:
2384         av_assert1(0);
2385     }
2386 }
2387
2388 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2389 {
2390     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2391     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2392     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2393 }
2394
2395 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2396     int i;
2397
2398     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2399
2400     /* mpeg1 */
2401     d->mb_skip_run= s->mb_skip_run;
2402     for(i=0; i<3; i++)
2403         d->last_dc[i] = s->last_dc[i];
2404
2405     /* statistics */
2406     d->mv_bits= s->mv_bits;
2407     d->i_tex_bits= s->i_tex_bits;
2408     d->p_tex_bits= s->p_tex_bits;
2409     d->i_count= s->i_count;
2410     d->f_count= s->f_count;
2411     d->b_count= s->b_count;
2412     d->skip_count= s->skip_count;
2413     d->misc_bits= s->misc_bits;
2414     d->last_bits= 0;
2415
2416     d->mb_skipped= 0;
2417     d->qscale= s->qscale;
2418     d->dquant= s->dquant;
2419
2420     d->esc3_level_length= s->esc3_level_length;
2421 }
2422
2423 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2424     int i;
2425
2426     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2427     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2428
2429     /* mpeg1 */
2430     d->mb_skip_run= s->mb_skip_run;
2431     for(i=0; i<3; i++)
2432         d->last_dc[i] = s->last_dc[i];
2433
2434     /* statistics */
2435     d->mv_bits= s->mv_bits;
2436     d->i_tex_bits= s->i_tex_bits;
2437     d->p_tex_bits= s->p_tex_bits;
2438     d->i_count= s->i_count;
2439     d->f_count= s->f_count;
2440     d->b_count= s->b_count;
2441     d->skip_count= s->skip_count;
2442     d->misc_bits= s->misc_bits;
2443
2444     d->mb_intra= s->mb_intra;
2445     d->mb_skipped= s->mb_skipped;
2446     d->mv_type= s->mv_type;
2447     d->mv_dir= s->mv_dir;
2448     d->pb= s->pb;
2449     if(s->data_partitioning){
2450         d->pb2= s->pb2;
2451         d->tex_pb= s->tex_pb;
2452     }
2453     d->block= s->block;
2454     for(i=0; i<8; i++)
2455         d->block_last_index[i]= s->block_last_index[i];
2456     d->interlaced_dct= s->interlaced_dct;
2457     d->qscale= s->qscale;
2458
2459     d->esc3_level_length= s->esc3_level_length;
2460 }
2461
2462 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2463                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2464                            int *dmin, int *next_block, int motion_x, int motion_y)
2465 {
2466     int score;
2467     uint8_t *dest_backup[3];
2468
2469     copy_context_before_encode(s, backup, type);
2470
2471     s->block= s->blocks[*next_block];
2472     s->pb= pb[*next_block];
2473     if(s->data_partitioning){
2474         s->pb2   = pb2   [*next_block];
2475         s->tex_pb= tex_pb[*next_block];
2476     }
2477
2478     if(*next_block){
2479         memcpy(dest_backup, s->dest, sizeof(s->dest));
2480         s->dest[0] = s->rd_scratchpad;
2481         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2482         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2483         av_assert0(s->linesize >= 32); //FIXME
2484     }
2485
2486     encode_mb(s, motion_x, motion_y);
2487
2488     score= put_bits_count(&s->pb);
2489     if(s->data_partitioning){
2490         score+= put_bits_count(&s->pb2);
2491         score+= put_bits_count(&s->tex_pb);
2492     }
2493
2494     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2495         ff_MPV_decode_mb(s, s->block);
2496
2497         score *= s->lambda2;
2498         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2499     }
2500
2501     if(*next_block){
2502         memcpy(s->dest, dest_backup, sizeof(s->dest));
2503     }
2504
2505     if(score<*dmin){
2506         *dmin= score;
2507         *next_block^=1;
2508
2509         copy_context_after_encode(best, s, type);
2510     }
2511 }
2512
2513 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2514     uint32_t *sq = ff_square_tab + 256;
2515     int acc=0;
2516     int x,y;
2517
2518     if(w==16 && h==16)
2519         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2520     else if(w==8 && h==8)
2521         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2522
2523     for(y=0; y<h; y++){
2524         for(x=0; x<w; x++){
2525             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2526         }
2527     }
2528
2529     av_assert2(acc>=0);
2530
2531     return acc;
2532 }
2533
2534 static int sse_mb(MpegEncContext *s){
2535     int w= 16;
2536     int h= 16;
2537
2538     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2539     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2540
2541     if(w==16 && h==16)
2542       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2543         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2544                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2545                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2546       }else{
2547         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2548                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2549                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2550       }
2551     else
2552         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2553                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2554                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2555 }
2556
2557 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2558     MpegEncContext *s= *(void**)arg;
2559
2560
2561     s->me.pre_pass=1;
2562     s->me.dia_size= s->avctx->pre_dia_size;
2563     s->first_slice_line=1;
2564     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2565         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2566             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2567         }
2568         s->first_slice_line=0;
2569     }
2570
2571     s->me.pre_pass=0;
2572
2573     return 0;
2574 }
2575
2576 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2577     MpegEncContext *s= *(void**)arg;
2578
2579     ff_check_alignment();
2580
2581     s->me.dia_size= s->avctx->dia_size;
2582     s->first_slice_line=1;
2583     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2584         s->mb_x=0; //for block init below
2585         ff_init_block_index(s);
2586         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2587             s->block_index[0]+=2;
2588             s->block_index[1]+=2;
2589             s->block_index[2]+=2;
2590             s->block_index[3]+=2;
2591
2592             /* compute motion vector & mb_type and store in context */
2593             if(s->pict_type==AV_PICTURE_TYPE_B)
2594                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2595             else
2596                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2597         }
2598         s->first_slice_line=0;
2599     }
2600     return 0;
2601 }
2602
2603 static int mb_var_thread(AVCodecContext *c, void *arg){
2604     MpegEncContext *s= *(void**)arg;
2605     int mb_x, mb_y;
2606
2607     ff_check_alignment();
2608
2609     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2610         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2611             int xx = mb_x * 16;
2612             int yy = mb_y * 16;
2613             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2614             int varc;
2615             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2616
2617             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2618                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2619
2620             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2621             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2622             s->me.mb_var_sum_temp    += varc;
2623         }
2624     }
2625     return 0;
2626 }
2627
2628 static void write_slice_end(MpegEncContext *s){
2629     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2630         if(s->partitioned_frame){
2631             ff_mpeg4_merge_partitions(s);
2632         }
2633
2634         ff_mpeg4_stuffing(&s->pb);
2635     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2636         ff_mjpeg_encode_stuffing(s);
2637     }
2638
2639     avpriv_align_put_bits(&s->pb);
2640     flush_put_bits(&s->pb);
2641
2642     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2643         s->misc_bits+= get_bits_diff(s);
2644 }
2645
2646 static void write_mb_info(MpegEncContext *s)
2647 {
2648     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2649     int offset = put_bits_count(&s->pb);
2650     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2651     int gobn = s->mb_y / s->gob_index;
2652     int pred_x, pred_y;
2653     if (CONFIG_H263_ENCODER)
2654         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2655     bytestream_put_le32(&ptr, offset);
2656     bytestream_put_byte(&ptr, s->qscale);
2657     bytestream_put_byte(&ptr, gobn);
2658     bytestream_put_le16(&ptr, mba);
2659     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2660     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2661     /* 4MV not implemented */
2662     bytestream_put_byte(&ptr, 0); /* hmv2 */
2663     bytestream_put_byte(&ptr, 0); /* vmv2 */
2664 }
2665
2666 static void update_mb_info(MpegEncContext *s, int startcode)
2667 {
2668     if (!s->mb_info)
2669         return;
2670     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2671         s->mb_info_size += 12;
2672         s->prev_mb_info = s->last_mb_info;
2673     }
2674     if (startcode) {
2675         s->prev_mb_info = put_bits_count(&s->pb)/8;
2676         /* This might have incremented mb_info_size above, and we return without
2677          * actually writing any info into that slot yet. But in that case,
2678          * this will be called again at the start of the after writing the
2679          * start code, actually writing the mb info. */
2680         return;
2681     }
2682
2683     s->last_mb_info = put_bits_count(&s->pb)/8;
2684     if (!s->mb_info_size)
2685         s->mb_info_size += 12;
2686     write_mb_info(s);
2687 }
2688
2689 static int encode_thread(AVCodecContext *c, void *arg){
2690     MpegEncContext *s= *(void**)arg;
2691     int mb_x, mb_y, pdif = 0;
2692     int chr_h= 16>>s->chroma_y_shift;
2693     int i, j;
2694     MpegEncContext best_s, backup_s;
2695     uint8_t bit_buf[2][MAX_MB_BYTES];
2696     uint8_t bit_buf2[2][MAX_MB_BYTES];
2697     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2698     PutBitContext pb[2], pb2[2], tex_pb[2];
2699
2700     ff_check_alignment();
2701
2702     for(i=0; i<2; i++){
2703         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2704         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2705         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2706     }
2707
2708     s->last_bits= put_bits_count(&s->pb);
2709     s->mv_bits=0;
2710     s->misc_bits=0;
2711     s->i_tex_bits=0;
2712     s->p_tex_bits=0;
2713     s->i_count=0;
2714     s->f_count=0;
2715     s->b_count=0;
2716     s->skip_count=0;
2717
2718     for(i=0; i<3; i++){
2719         /* init last dc values */
2720         /* note: quant matrix value (8) is implied here */
2721         s->last_dc[i] = 128 << s->intra_dc_precision;
2722
2723         s->current_picture.error[i] = 0;
2724     }
2725     if(s->codec_id==AV_CODEC_ID_AMV){
2726         s->last_dc[0] = 128*8/13;
2727         s->last_dc[1] = 128*8/14;
2728         s->last_dc[2] = 128*8/14;
2729     }
2730     s->mb_skip_run = 0;
2731     memset(s->last_mv, 0, sizeof(s->last_mv));
2732
2733     s->last_mv_dir = 0;
2734
2735     switch(s->codec_id){
2736     case AV_CODEC_ID_H263:
2737     case AV_CODEC_ID_H263P:
2738     case AV_CODEC_ID_FLV1:
2739         if (CONFIG_H263_ENCODER)
2740             s->gob_index = ff_h263_get_gob_height(s);
2741         break;
2742     case AV_CODEC_ID_MPEG4:
2743         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2744             ff_mpeg4_init_partitions(s);
2745         break;
2746     }
2747
2748     s->resync_mb_x=0;
2749     s->resync_mb_y=0;
2750     s->first_slice_line = 1;
2751     s->ptr_lastgob = s->pb.buf;
2752     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2753         s->mb_x=0;
2754         s->mb_y= mb_y;
2755
2756         ff_set_qscale(s, s->qscale);
2757         ff_init_block_index(s);
2758
2759         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2760             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2761             int mb_type= s->mb_type[xy];
2762 //            int d;
2763             int dmin= INT_MAX;
2764             int dir;
2765
2766             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2767                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2768                 return -1;
2769             }
2770             if(s->data_partitioning){
2771                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2772                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2773                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2774                     return -1;
2775                 }
2776             }
2777
2778             s->mb_x = mb_x;
2779             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2780             ff_update_block_index(s);
2781
2782             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2783                 ff_h261_reorder_mb_index(s);
2784                 xy= s->mb_y*s->mb_stride + s->mb_x;
2785                 mb_type= s->mb_type[xy];
2786             }
2787
2788             /* write gob / video packet header  */
2789             if(s->rtp_mode){
2790                 int current_packet_size, is_gob_start;
2791
2792                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2793
2794                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2795
2796                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2797
2798                 switch(s->codec_id){
2799                 case AV_CODEC_ID_H263:
2800                 case AV_CODEC_ID_H263P:
2801                     if(!s->h263_slice_structured)
2802                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2803                     break;
2804                 case AV_CODEC_ID_MPEG2VIDEO:
2805                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2806                 case AV_CODEC_ID_MPEG1VIDEO:
2807                     if(s->mb_skip_run) is_gob_start=0;
2808                     break;
2809                 case AV_CODEC_ID_MJPEG:
2810                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2811                     break;
2812                 }
2813
2814                 if(is_gob_start){
2815                     if(s->start_mb_y != mb_y || mb_x!=0){
2816                         write_slice_end(s);
2817
2818                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2819                             ff_mpeg4_init_partitions(s);
2820                         }
2821                     }
2822
2823                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2824                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2825
2826                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2827                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2828                         int d = 100 / s->error_rate;
2829                         if(r % d == 0){
2830                             current_packet_size=0;
2831                             s->pb.buf_ptr= s->ptr_lastgob;
2832                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2833                         }
2834                     }
2835
2836                     if (s->avctx->rtp_callback){
2837                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2838                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2839                     }
2840                     update_mb_info(s, 1);
2841
2842                     switch(s->codec_id){
2843                     case AV_CODEC_ID_MPEG4:
2844                         if (CONFIG_MPEG4_ENCODER) {
2845                             ff_mpeg4_encode_video_packet_header(s);
2846                             ff_mpeg4_clean_buffers(s);
2847                         }
2848                     break;
2849                     case AV_CODEC_ID_MPEG1VIDEO:
2850                     case AV_CODEC_ID_MPEG2VIDEO:
2851                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2852                             ff_mpeg1_encode_slice_header(s);
2853                             ff_mpeg1_clean_buffers(s);
2854                         }
2855                     break;
2856                     case AV_CODEC_ID_H263:
2857                     case AV_CODEC_ID_H263P:
2858                         if (CONFIG_H263_ENCODER)
2859                             ff_h263_encode_gob_header(s, mb_y);
2860                     break;
2861                     }
2862
2863                     if(s->flags&CODEC_FLAG_PASS1){
2864                         int bits= put_bits_count(&s->pb);
2865                         s->misc_bits+= bits - s->last_bits;
2866                         s->last_bits= bits;
2867                     }
2868
2869                     s->ptr_lastgob += current_packet_size;
2870                     s->first_slice_line=1;
2871                     s->resync_mb_x=mb_x;
2872                     s->resync_mb_y=mb_y;
2873                 }
2874             }
2875
2876             if(  (s->resync_mb_x   == s->mb_x)
2877                && s->resync_mb_y+1 == s->mb_y){
2878                 s->first_slice_line=0;
2879             }
2880
2881             s->mb_skipped=0;
2882             s->dquant=0; //only for QP_RD
2883
2884             update_mb_info(s, 0);
2885
2886             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2887                 int next_block=0;
2888                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2889
2890                 copy_context_before_encode(&backup_s, s, -1);
2891                 backup_s.pb= s->pb;
2892                 best_s.data_partitioning= s->data_partitioning;
2893                 best_s.partitioned_frame= s->partitioned_frame;
2894                 if(s->data_partitioning){
2895                     backup_s.pb2= s->pb2;
2896                     backup_s.tex_pb= s->tex_pb;
2897                 }
2898
2899                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2900                     s->mv_dir = MV_DIR_FORWARD;
2901                     s->mv_type = MV_TYPE_16X16;
2902                     s->mb_intra= 0;
2903                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2904                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2905                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2906                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2907                 }
2908                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2909                     s->mv_dir = MV_DIR_FORWARD;
2910                     s->mv_type = MV_TYPE_FIELD;
2911                     s->mb_intra= 0;
2912                     for(i=0; i<2; i++){
2913                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2914                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2915                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2916                     }
2917                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2918                                  &dmin, &next_block, 0, 0);
2919                 }
2920                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2921                     s->mv_dir = MV_DIR_FORWARD;
2922                     s->mv_type = MV_TYPE_16X16;
2923                     s->mb_intra= 0;
2924                     s->mv[0][0][0] = 0;
2925                     s->mv[0][0][1] = 0;
2926                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2927                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2928                 }
2929                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2930                     s->mv_dir = MV_DIR_FORWARD;
2931                     s->mv_type = MV_TYPE_8X8;
2932                     s->mb_intra= 0;
2933                     for(i=0; i<4; i++){
2934                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2935                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2936                     }
2937                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2938                                  &dmin, &next_block, 0, 0);
2939                 }
2940                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2941                     s->mv_dir = MV_DIR_FORWARD;
2942                     s->mv_type = MV_TYPE_16X16;
2943                     s->mb_intra= 0;
2944                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2945                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2946                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2947                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2948                 }
2949                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2950                     s->mv_dir = MV_DIR_BACKWARD;
2951                     s->mv_type = MV_TYPE_16X16;
2952                     s->mb_intra= 0;
2953                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2954                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2955                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2956                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2957                 }
2958                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2959                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2960                     s->mv_type = MV_TYPE_16X16;
2961                     s->mb_intra= 0;
2962                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2963                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2964                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2965                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2966                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2967                                  &dmin, &next_block, 0, 0);
2968                 }
2969                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2970                     s->mv_dir = MV_DIR_FORWARD;
2971                     s->mv_type = MV_TYPE_FIELD;
2972                     s->mb_intra= 0;
2973                     for(i=0; i<2; i++){
2974                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2975                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2976                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2977                     }
2978                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2979                                  &dmin, &next_block, 0, 0);
2980                 }
2981                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2982                     s->mv_dir = MV_DIR_BACKWARD;
2983                     s->mv_type = MV_TYPE_FIELD;
2984                     s->mb_intra= 0;
2985                     for(i=0; i<2; i++){
2986                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2987                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2988                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2989                     }
2990                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2991                                  &dmin, &next_block, 0, 0);
2992                 }
2993                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2994                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2995                     s->mv_type = MV_TYPE_FIELD;
2996                     s->mb_intra= 0;
2997                     for(dir=0; dir<2; dir++){
2998                         for(i=0; i<2; i++){
2999                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3000                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3001                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3002                         }
3003                     }
3004                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3005                                  &dmin, &next_block, 0, 0);
3006                 }
3007                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3008                     s->mv_dir = 0;
3009                     s->mv_type = MV_TYPE_16X16;
3010                     s->mb_intra= 1;
3011                     s->mv[0][0][0] = 0;
3012                     s->mv[0][0][1] = 0;
3013                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3014                                  &dmin, &next_block, 0, 0);
3015                     if(s->h263_pred || s->h263_aic){
3016                         if(best_s.mb_intra)
3017                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3018                         else
3019                             ff_clean_intra_table_entries(s); //old mode?
3020                     }
3021                 }
3022
3023                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3024                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3025                         const int last_qp= backup_s.qscale;
3026                         int qpi, qp, dc[6];
3027                         int16_t ac[6][16];
3028                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3029                         static const int dquant_tab[4]={-1,1,-2,2};
3030                         int storecoefs = s->mb_intra && s->dc_val[0];
3031
3032                         av_assert2(backup_s.dquant == 0);
3033
3034                         //FIXME intra
3035                         s->mv_dir= best_s.mv_dir;
3036                         s->mv_type = MV_TYPE_16X16;
3037                         s->mb_intra= best_s.mb_intra;
3038                         s->mv[0][0][0] = best_s.mv[0][0][0];
3039                         s->mv[0][0][1] = best_s.mv[0][0][1];
3040                         s->mv[1][0][0] = best_s.mv[1][0][0];
3041                         s->mv[1][0][1] = best_s.mv[1][0][1];
3042
3043                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3044                         for(; qpi<4; qpi++){
3045                             int dquant= dquant_tab[qpi];
3046                             qp= last_qp + dquant;
3047                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3048                                 continue;
3049                             backup_s.dquant= dquant;
3050                             if(storecoefs){
3051                                 for(i=0; i<6; i++){
3052                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3053                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3054                                 }
3055                             }
3056
3057                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3058                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3059                             if(best_s.qscale != qp){
3060                                 if(storecoefs){
3061                                     for(i=0; i<6; i++){
3062                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3063                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3064                                     }
3065                                 }
3066                             }
3067                         }
3068                     }
3069                 }
3070                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3071                     int mx= s->b_direct_mv_table[xy][0];
3072                     int my= s->b_direct_mv_table[xy][1];
3073
3074                     backup_s.dquant = 0;
3075                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3076                     s->mb_intra= 0;
3077                     ff_mpeg4_set_direct_mv(s, mx, my);
3078                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3079                                  &dmin, &next_block, mx, my);
3080                 }
3081                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3082                     backup_s.dquant = 0;
3083                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3084                     s->mb_intra= 0;
3085                     ff_mpeg4_set_direct_mv(s, 0, 0);
3086                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3087                                  &dmin, &next_block, 0, 0);
3088                 }
3089                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3090                     int coded=0;
3091                     for(i=0; i<6; i++)
3092                         coded |= s->block_last_index[i];
3093                     if(coded){
3094                         int mx,my;
3095                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3096                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3097                             mx=my=0; //FIXME find the one we actually used
3098                             ff_mpeg4_set_direct_mv(s, mx, my);
3099                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3100                             mx= s->mv[1][0][0];
3101                             my= s->mv[1][0][1];
3102                         }else{
3103                             mx= s->mv[0][0][0];
3104                             my= s->mv[0][0][1];
3105                         }
3106
3107                         s->mv_dir= best_s.mv_dir;
3108                         s->mv_type = best_s.mv_type;
3109                         s->mb_intra= 0;
3110 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3111                         s->mv[0][0][1] = best_s.mv[0][0][1];
3112                         s->mv[1][0][0] = best_s.mv[1][0][0];
3113                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3114                         backup_s.dquant= 0;
3115                         s->skipdct=1;
3116                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3117                                         &dmin, &next_block, mx, my);
3118                         s->skipdct=0;
3119                     }
3120                 }
3121
3122                 s->current_picture.qscale_table[xy] = best_s.qscale;
3123
3124                 copy_context_after_encode(s, &best_s, -1);
3125
3126                 pb_bits_count= put_bits_count(&s->pb);
3127                 flush_put_bits(&s->pb);
3128                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3129                 s->pb= backup_s.pb;
3130
3131                 if(s->data_partitioning){
3132                     pb2_bits_count= put_bits_count(&s->pb2);
3133                     flush_put_bits(&s->pb2);
3134                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3135                     s->pb2= backup_s.pb2;
3136
3137                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3138                     flush_put_bits(&s->tex_pb);
3139                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3140                     s->tex_pb= backup_s.tex_pb;
3141                 }
3142                 s->last_bits= put_bits_count(&s->pb);
3143
3144                 if (CONFIG_H263_ENCODER &&
3145                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3146                     ff_h263_update_motion_val(s);
3147
3148                 if(next_block==0){ //FIXME 16 vs linesize16
3149                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3150                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3151                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3152                 }
3153
3154                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3155                     ff_MPV_decode_mb(s, s->block);
3156             } else {
3157                 int motion_x = 0, motion_y = 0;
3158                 s->mv_type=MV_TYPE_16X16;
3159                 // only one MB-Type possible
3160
3161                 switch(mb_type){
3162                 case CANDIDATE_MB_TYPE_INTRA:
3163                     s->mv_dir = 0;
3164                     s->mb_intra= 1;
3165                     motion_x= s->mv[0][0][0] = 0;
3166                     motion_y= s->mv[0][0][1] = 0;
3167                     break;
3168                 case CANDIDATE_MB_TYPE_INTER:
3169                     s->mv_dir = MV_DIR_FORWARD;
3170                     s->mb_intra= 0;
3171                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3172                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3173                     break;
3174                 case CANDIDATE_MB_TYPE_INTER_I:
3175                     s->mv_dir = MV_DIR_FORWARD;
3176                     s->mv_type = MV_TYPE_FIELD;
3177                     s->mb_intra= 0;
3178                     for(i=0; i<2; i++){
3179                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3180                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3181                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3182                     }
3183                     break;
3184                 case CANDIDATE_MB_TYPE_INTER4V:
3185                     s->mv_dir = MV_DIR_FORWARD;
3186                     s->mv_type = MV_TYPE_8X8;
3187                     s->mb_intra= 0;
3188                     for(i=0; i<4; i++){
3189                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3190                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3191                     }
3192                     break;
3193                 case CANDIDATE_MB_TYPE_DIRECT:
3194                     if (CONFIG_MPEG4_ENCODER) {
3195                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3196                         s->mb_intra= 0;
3197                         motion_x=s->b_direct_mv_table[xy][0];
3198                         motion_y=s->b_direct_mv_table[xy][1];
3199                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3200                     }
3201                     break;
3202                 case CANDIDATE_MB_TYPE_DIRECT0:
3203                     if (CONFIG_MPEG4_ENCODER) {
3204                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3205                         s->mb_intra= 0;
3206                         ff_mpeg4_set_direct_mv(s, 0, 0);
3207                     }
3208                     break;
3209                 case CANDIDATE_MB_TYPE_BIDIR:
3210                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3211                     s->mb_intra= 0;
3212                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3213                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3214                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3215                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3216                     break;
3217                 case CANDIDATE_MB_TYPE_BACKWARD:
3218                     s->mv_dir = MV_DIR_BACKWARD;
3219                     s->mb_intra= 0;
3220                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3221                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3222                     break;
3223                 case CANDIDATE_MB_TYPE_FORWARD:
3224                     s->mv_dir = MV_DIR_FORWARD;
3225                     s->mb_intra= 0;
3226                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3227                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3228                     break;
3229                 case CANDIDATE_MB_TYPE_FORWARD_I:
3230                     s->mv_dir = MV_DIR_FORWARD;
3231                     s->mv_type = MV_TYPE_FIELD;
3232                     s->mb_intra= 0;
3233                     for(i=0; i<2; i++){
3234                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3235                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3236                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3237                     }
3238                     break;
3239                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3240                     s->mv_dir = MV_DIR_BACKWARD;
3241                     s->mv_type = MV_TYPE_FIELD;
3242                     s->mb_intra= 0;
3243                     for(i=0; i<2; i++){
3244                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3245                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3246                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3247                     }
3248                     break;
3249                 case CANDIDATE_MB_TYPE_BIDIR_I:
3250                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3251                     s->mv_type = MV_TYPE_FIELD;
3252                     s->mb_intra= 0;
3253                     for(dir=0; dir<2; dir++){
3254                         for(i=0; i<2; i++){
3255                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3256                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3257                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3258                         }
3259                     }
3260                     break;
3261                 default:
3262                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3263                 }
3264
3265                 encode_mb(s, motion_x, motion_y);
3266
3267                 // RAL: Update last macroblock type
3268                 s->last_mv_dir = s->mv_dir;
3269
3270                 if (CONFIG_H263_ENCODER &&
3271                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3272                     ff_h263_update_motion_val(s);
3273
3274                 ff_MPV_decode_mb(s, s->block);
3275             }
3276
3277             /* clean the MV table in IPS frames for direct mode in B frames */
3278             if(s->mb_intra /* && I,P,S_TYPE */){
3279                 s->p_mv_table[xy][0]=0;
3280                 s->p_mv_table[xy][1]=0;
3281             }
3282
3283             if(s->flags&CODEC_FLAG_PSNR){
3284                 int w= 16;
3285                 int h= 16;
3286
3287                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3288                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3289
3290                 s->current_picture.error[0] += sse(
3291                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3292                     s->dest[0], w, h, s->linesize);
3293                 s->current_picture.error[1] += sse(
3294                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3295                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3296                 s->current_picture.error[2] += sse(
3297                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3298                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3299             }
3300             if(s->loop_filter){
3301                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3302                     ff_h263_loop_filter(s);
3303             }
3304             av_dlog(s->avctx, "MB %d %d bits\n",
3305                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3306         }
3307     }
3308
3309     //not beautiful here but we must write it before flushing so it has to be here
3310     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3311         ff_msmpeg4_encode_ext_header(s);
3312
3313     write_slice_end(s);
3314
3315     /* Send the last GOB if RTP */
3316     if (s->avctx->rtp_callback) {
3317         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3318         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3319         /* Call the RTP callback to send the last GOB */
3320         emms_c();
3321         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3322     }
3323
3324     return 0;
3325 }
3326
3327 #define MERGE(field) dst->field += src->field; src->field=0
3328 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3329     MERGE(me.scene_change_score);
3330     MERGE(me.mc_mb_var_sum_temp);
3331     MERGE(me.mb_var_sum_temp);
3332 }
3333
3334 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3335     int i;
3336
3337     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3338     MERGE(dct_count[1]);
3339     MERGE(mv_bits);
3340     MERGE(i_tex_bits);
3341     MERGE(p_tex_bits);
3342     MERGE(i_count);
3343     MERGE(f_count);
3344     MERGE(b_count);
3345     MERGE(skip_count);
3346     MERGE(misc_bits);
3347     MERGE(er.error_count);
3348     MERGE(padding_bug_score);
3349     MERGE(current_picture.error[0]);
3350     MERGE(current_picture.error[1]);
3351     MERGE(current_picture.error[2]);
3352
3353     if(dst->avctx->noise_reduction){
3354         for(i=0; i<64; i++){
3355             MERGE(dct_error_sum[0][i]);
3356             MERGE(dct_error_sum[1][i]);
3357         }
3358     }
3359
3360     assert(put_bits_count(&src->pb) % 8 ==0);
3361     assert(put_bits_count(&dst->pb) % 8 ==0);
3362     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3363     flush_put_bits(&dst->pb);
3364 }
3365
3366 static int estimate_qp(MpegEncContext *s, int dry_run){
3367     if (s->next_lambda){
3368         s->current_picture_ptr->f->quality =
3369         s->current_picture.f->quality = s->next_lambda;
3370         if(!dry_run) s->next_lambda= 0;
3371     } else if (!s->fixed_qscale) {
3372         s->current_picture_ptr->f->quality =
3373         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3374         if (s->current_picture.f->quality < 0)
3375             return -1;
3376     }
3377
3378     if(s->adaptive_quant){
3379         switch(s->codec_id){
3380         case AV_CODEC_ID_MPEG4:
3381             if (CONFIG_MPEG4_ENCODER)
3382                 ff_clean_mpeg4_qscales(s);
3383             break;
3384         case AV_CODEC_ID_H263:
3385         case AV_CODEC_ID_H263P:
3386         case AV_CODEC_ID_FLV1:
3387             if (CONFIG_H263_ENCODER)
3388                 ff_clean_h263_qscales(s);
3389             break;
3390         default:
3391             ff_init_qscale_tab(s);
3392         }
3393
3394         s->lambda= s->lambda_table[0];
3395         //FIXME broken
3396     }else
3397         s->lambda = s->current_picture.f->quality;
3398     update_qscale(s);
3399     return 0;
3400 }
3401
3402 /* must be called before writing the header */
3403 static void set_frame_distances(MpegEncContext * s){
3404     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3405     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3406
3407     if(s->pict_type==AV_PICTURE_TYPE_B){
3408         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3409         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3410     }else{
3411         s->pp_time= s->time - s->last_non_b_time;
3412         s->last_non_b_time= s->time;
3413         assert(s->picture_number==0 || s->pp_time > 0);
3414     }
3415 }
3416
3417 static int encode_picture(MpegEncContext *s, int picture_number)
3418 {
3419     int i, ret;
3420     int bits;
3421     int context_count = s->slice_context_count;
3422
3423     s->picture_number = picture_number;
3424
3425     /* Reset the average MB variance */
3426     s->me.mb_var_sum_temp    =
3427     s->me.mc_mb_var_sum_temp = 0;
3428
3429     /* we need to initialize some time vars before we can encode b-frames */
3430     // RAL: Condition added for MPEG1VIDEO
3431     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3432         set_frame_distances(s);
3433     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3434         ff_set_mpeg4_time(s);
3435
3436     s->me.scene_change_score=0;
3437
3438 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3439
3440     if(s->pict_type==AV_PICTURE_TYPE_I){
3441         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3442         else                        s->no_rounding=0;
3443     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3444         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3445             s->no_rounding ^= 1;
3446     }
3447
3448     if(s->flags & CODEC_FLAG_PASS2){
3449         if (estimate_qp(s,1) < 0)
3450             return -1;
3451         ff_get_2pass_fcode(s);
3452     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3453         if(s->pict_type==AV_PICTURE_TYPE_B)
3454             s->lambda= s->last_lambda_for[s->pict_type];
3455         else
3456             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3457         update_qscale(s);
3458     }
3459
3460     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3461         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3462         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3463         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3464         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3465     }
3466
3467     s->mb_intra=0; //for the rate distortion & bit compare functions
3468     for(i=1; i<context_count; i++){
3469         ret = ff_update_duplicate_context(s->thread_context[i], s);
3470         if (ret < 0)
3471             return ret;
3472     }
3473
3474     if(ff_init_me(s)<0)
3475         return -1;
3476
3477     /* Estimate motion for every MB */
3478     if(s->pict_type != AV_PICTURE_TYPE_I){
3479         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3480         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3481         if (s->pict_type != AV_PICTURE_TYPE_B) {
3482             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3483                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3484             }
3485         }
3486
3487         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3488     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3489         /* I-Frame */
3490         for(i=0; i<s->mb_stride*s->mb_height; i++)
3491             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3492
3493         if(!s->fixed_qscale){
3494             /* finding spatial complexity for I-frame rate control */
3495             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3496         }
3497     }
3498     for(i=1; i<context_count; i++){
3499         merge_context_after_me(s, s->thread_context[i]);
3500     }
3501     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3502     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3503     emms_c();
3504
3505     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3506         s->pict_type= AV_PICTURE_TYPE_I;
3507         for(i=0; i<s->mb_stride*s->mb_height; i++)
3508             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3509         if(s->msmpeg4_version >= 3)
3510             s->no_rounding=1;
3511         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3512                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3513     }
3514
3515     if(!s->umvplus){
3516         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3517             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3518
3519             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3520                 int a,b;
3521                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3522                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3523                 s->f_code= FFMAX3(s->f_code, a, b);
3524             }
3525
3526             ff_fix_long_p_mvs(s);
3527             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3528             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3529                 int j;
3530                 for(i=0; i<2; i++){
3531                     for(j=0; j<2; j++)
3532                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3533                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3534                 }
3535             }
3536         }
3537
3538         if(s->pict_type==AV_PICTURE_TYPE_B){
3539             int a, b;
3540
3541             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3542             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3543             s->f_code = FFMAX(a, b);
3544
3545             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3546             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3547             s->b_code = FFMAX(a, b);
3548
3549             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3550             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3551             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3552             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3553             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3554                 int dir, j;
3555                 for(dir=0; dir<2; dir++){
3556                     for(i=0; i<2; i++){
3557                         for(j=0; j<2; j++){
3558                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3559                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3560                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3561                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3562                         }
3563                     }
3564                 }
3565             }
3566         }
3567     }
3568
3569     if (estimate_qp(s, 0) < 0)
3570         return -1;
3571
3572     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3573         s->qscale= 3; //reduce clipping problems
3574
3575     if (s->out_format == FMT_MJPEG) {
3576         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3577         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3578
3579         if (s->avctx->intra_matrix) {
3580             chroma_matrix =
3581             luma_matrix = s->avctx->intra_matrix;
3582         }
3583         if (s->avctx->chroma_intra_matrix)
3584             chroma_matrix = s->avctx->chroma_intra_matrix;
3585
3586         /* for mjpeg, we do include qscale in the matrix */
3587         for(i=1;i<64;i++){
3588             int j = s->idsp.idct_permutation[i];
3589
3590             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3591             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3592         }
3593         s->y_dc_scale_table=
3594         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3595         s->chroma_intra_matrix[0] =
3596         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3597         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3598                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3599         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3600                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3601         s->qscale= 8;
3602     }
3603     if(s->codec_id == AV_CODEC_ID_AMV){
3604         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3605         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3606         for(i=1;i<64;i++){
3607             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3608
3609             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3610             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3611         }
3612         s->y_dc_scale_table= y;
3613         s->c_dc_scale_table= c;
3614         s->intra_matrix[0] = 13;
3615         s->chroma_intra_matrix[0] = 14;
3616         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3617                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3618         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3619                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3620         s->qscale= 8;
3621     }
3622
3623     //FIXME var duplication
3624     s->current_picture_ptr->f->key_frame =
3625     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3626     s->current_picture_ptr->f->pict_type =
3627     s->current_picture.f->pict_type = s->pict_type;
3628
3629     if (s->current_picture.f->key_frame)
3630         s->picture_in_gop_number=0;
3631
3632     s->mb_x = s->mb_y = 0;
3633     s->last_bits= put_bits_count(&s->pb);
3634     switch(s->out_format) {
3635     case FMT_MJPEG:
3636         if (CONFIG_MJPEG_ENCODER)
3637             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3638                                            s->intra_matrix, s->chroma_intra_matrix);
3639         break;
3640     case FMT_H261:
3641         if (CONFIG_H261_ENCODER)
3642             ff_h261_encode_picture_header(s, picture_number);
3643         break;
3644     case FMT_H263:
3645         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3646             ff_wmv2_encode_picture_header(s, picture_number);
3647         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3648             ff_msmpeg4_encode_picture_header(s, picture_number);
3649         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3650             ff_mpeg4_encode_picture_header(s, picture_number);
3651         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3652             ff_rv10_encode_picture_header(s, picture_number);
3653         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3654             ff_rv20_encode_picture_header(s, picture_number);
3655         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3656             ff_flv_encode_picture_header(s, picture_number);
3657         else if (CONFIG_H263_ENCODER)
3658             ff_h263_encode_picture_header(s, picture_number);
3659         break;
3660     case FMT_MPEG1:
3661         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3662             ff_mpeg1_encode_picture_header(s, picture_number);
3663         break;
3664     default:
3665         av_assert0(0);
3666     }
3667     bits= put_bits_count(&s->pb);
3668     s->header_bits= bits - s->last_bits;
3669
3670     for(i=1; i<context_count; i++){
3671         update_duplicate_context_after_me(s->thread_context[i], s);
3672     }
3673     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3674     for(i=1; i<context_count; i++){
3675         merge_context_after_encode(s, s->thread_context[i]);
3676     }
3677     emms_c();
3678     return 0;
3679 }
3680
3681 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3682     const int intra= s->mb_intra;
3683     int i;
3684
3685     s->dct_count[intra]++;
3686
3687     for(i=0; i<64; i++){
3688         int level= block[i];
3689
3690         if(level){
3691             if(level>0){
3692                 s->dct_error_sum[intra][i] += level;
3693                 level -= s->dct_offset[intra][i];
3694                 if(level<0) level=0;
3695             }else{
3696                 s->dct_error_sum[intra][i] -= level;
3697                 level += s->dct_offset[intra][i];
3698                 if(level>0) level=0;
3699             }
3700             block[i]= level;
3701         }
3702     }
3703 }
3704
3705 static int dct_quantize_trellis_c(MpegEncContext *s,
3706                                   int16_t *block, int n,
3707                                   int qscale, int *overflow){
3708     const int *qmat;
3709     const uint8_t *scantable= s->intra_scantable.scantable;
3710     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3711     int max=0;
3712     unsigned int threshold1, threshold2;
3713     int bias=0;
3714     int run_tab[65];
3715     int level_tab[65];
3716     int score_tab[65];
3717     int survivor[65];
3718     int survivor_count;
3719     int last_run=0;
3720     int last_level=0;
3721     int last_score= 0;
3722     int last_i;
3723     int coeff[2][64];
3724     int coeff_count[64];
3725     int qmul, qadd, start_i, last_non_zero, i, dc;
3726     const int esc_length= s->ac_esc_length;
3727     uint8_t * length;
3728     uint8_t * last_length;
3729     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3730
3731     s->fdsp.fdct(block);
3732
3733     if(s->dct_error_sum)
3734         s->denoise_dct(s, block);
3735     qmul= qscale*16;
3736     qadd= ((qscale-1)|1)*8;
3737
3738     if (s->mb_intra) {
3739         int q;
3740         if (!s->h263_aic) {
3741             if (n < 4)
3742                 q = s->y_dc_scale;
3743             else
3744                 q = s->c_dc_scale;
3745             q = q << 3;
3746         } else{
3747             /* For AIC we skip quant/dequant of INTRADC */
3748             q = 1 << 3;
3749             qadd=0;
3750         }
3751
3752         /* note: block[0] is assumed to be positive */
3753         block[0] = (block[0] + (q >> 1)) / q;
3754         start_i = 1;
3755         last_non_zero = 0;
3756         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3757         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3758             bias= 1<<(QMAT_SHIFT-1);
3759         length     = s->intra_ac_vlc_length;
3760         last_length= s->intra_ac_vlc_last_length;
3761     } else {
3762         start_i = 0;
3763         last_non_zero = -1;
3764         qmat = s->q_inter_matrix[qscale];
3765         length     = s->inter_ac_vlc_length;
3766         last_length= s->inter_ac_vlc_last_length;
3767     }
3768     last_i= start_i;
3769
3770     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3771     threshold2= (threshold1<<1);
3772
3773     for(i=63; i>=start_i; i--) {
3774         const int j = scantable[i];
3775         int level = block[j] * qmat[j];
3776
3777         if(((unsigned)(level+threshold1))>threshold2){
3778             last_non_zero = i;
3779             break;
3780         }
3781     }
3782
3783     for(i=start_i; i<=last_non_zero; i++) {
3784         const int j = scantable[i];
3785         int level = block[j] * qmat[j];
3786
3787 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3788 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3789         if(((unsigned)(level+threshold1))>threshold2){
3790             if(level>0){
3791                 level= (bias + level)>>QMAT_SHIFT;
3792                 coeff[0][i]= level;
3793                 coeff[1][i]= level-1;
3794 //                coeff[2][k]= level-2;
3795             }else{
3796                 level= (bias - level)>>QMAT_SHIFT;
3797                 coeff[0][i]= -level;
3798                 coeff[1][i]= -level+1;
3799 //                coeff[2][k]= -level+2;
3800             }
3801             coeff_count[i]= FFMIN(level, 2);
3802             av_assert2(coeff_count[i]);
3803             max |=level;
3804         }else{
3805             coeff[0][i]= (level>>31)|1;
3806             coeff_count[i]= 1;
3807         }
3808     }
3809
3810     *overflow= s->max_qcoeff < max; //overflow might have happened
3811
3812     if(last_non_zero < start_i){
3813         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3814         return last_non_zero;
3815     }
3816
3817     score_tab[start_i]= 0;
3818     survivor[0]= start_i;
3819     survivor_count= 1;
3820
3821     for(i=start_i; i<=last_non_zero; i++){
3822         int level_index, j, zero_distortion;
3823         int dct_coeff= FFABS(block[ scantable[i] ]);
3824         int best_score=256*256*256*120;
3825
3826         if (s->fdsp.fdct == ff_fdct_ifast)
3827             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3828         zero_distortion= dct_coeff*dct_coeff;
3829
3830         for(level_index=0; level_index < coeff_count[i]; level_index++){
3831             int distortion;
3832             int level= coeff[level_index][i];
3833             const int alevel= FFABS(level);
3834             int unquant_coeff;
3835
3836             av_assert2(level);
3837
3838             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3839                 unquant_coeff= alevel*qmul + qadd;
3840             }else{ //MPEG1
3841                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3842                 if(s->mb_intra){
3843                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3844                         unquant_coeff =   (unquant_coeff - 1) | 1;
3845                 }else{
3846                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3847                         unquant_coeff =   (unquant_coeff - 1) | 1;
3848                 }
3849                 unquant_coeff<<= 3;
3850             }
3851
3852             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3853             level+=64;
3854             if((level&(~127)) == 0){
3855                 for(j=survivor_count-1; j>=0; j--){
3856                     int run= i - survivor[j];
3857                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3858                     score += score_tab[i-run];
3859
3860                     if(score < best_score){
3861                         best_score= score;
3862                         run_tab[i+1]= run;
3863                         level_tab[i+1]= level-64;
3864                     }
3865                 }
3866
3867                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3868                     for(j=survivor_count-1; j>=0; j--){
3869                         int run= i - survivor[j];
3870                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3871                         score += score_tab[i-run];
3872                         if(score < last_score){
3873                             last_score= score;
3874                             last_run= run;
3875                             last_level= level-64;
3876                             last_i= i+1;
3877                         }
3878                     }
3879                 }
3880             }else{
3881                 distortion += esc_length*lambda;
3882                 for(j=survivor_count-1; j>=0; j--){
3883                     int run= i - survivor[j];
3884                     int score= distortion + score_tab[i-run];
3885
3886                     if(score < best_score){
3887                         best_score= score;
3888                         run_tab[i+1]= run;
3889                         level_tab[i+1]= level-64;
3890                     }
3891                 }
3892
3893                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3894                   for(j=survivor_count-1; j>=0; j--){
3895                         int run= i - survivor[j];
3896                         int score= distortion + score_tab[i-run];
3897                         if(score < last_score){
3898                             last_score= score;
3899                             last_run= run;
3900                             last_level= level-64;
3901                             last_i= i+1;
3902                         }
3903                     }
3904                 }
3905             }
3906         }
3907
3908         score_tab[i+1]= best_score;
3909
3910         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3911         if(last_non_zero <= 27){
3912             for(; survivor_count; survivor_count--){
3913                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3914                     break;
3915             }
3916         }else{
3917             for(; survivor_count; survivor_count--){
3918                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3919                     break;
3920             }
3921         }
3922
3923         survivor[ survivor_count++ ]= i+1;
3924     }
3925
3926     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3927         last_score= 256*256*256*120;
3928         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3929             int score= score_tab[i];
3930             if(i) score += lambda*2; //FIXME exacter?
3931
3932             if(score < last_score){
3933                 last_score= score;
3934                 last_i= i;
3935                 last_level= level_tab[i];
3936                 last_run= run_tab[i];
3937             }
3938         }
3939     }
3940
3941     s->coded_score[n] = last_score;
3942
3943     dc= FFABS(block[0]);
3944     last_non_zero= last_i - 1;
3945     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3946
3947     if(last_non_zero < start_i)
3948         return last_non_zero;
3949
3950     if(last_non_zero == 0 && start_i == 0){
3951         int best_level= 0;
3952         int best_score= dc * dc;
3953
3954         for(i=0; i<coeff_count[0]; i++){
3955             int level= coeff[i][0];
3956             int alevel= FFABS(level);
3957             int unquant_coeff, score, distortion;
3958
3959             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3960                     unquant_coeff= (alevel*qmul + qadd)>>3;
3961             }else{ //MPEG1
3962                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3963                     unquant_coeff =   (unquant_coeff - 1) | 1;
3964             }
3965             unquant_coeff = (unquant_coeff + 4) >> 3;
3966             unquant_coeff<<= 3 + 3;
3967
3968             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3969             level+=64;
3970             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3971             else                    score= distortion + esc_length*lambda;
3972
3973             if(score < best_score){
3974                 best_score= score;
3975                 best_level= level - 64;
3976             }
3977         }
3978         block[0]= best_level;
3979         s->coded_score[n] = best_score - dc*dc;
3980         if(best_level == 0) return -1;
3981         else                return last_non_zero;
3982     }
3983
3984     i= last_i;
3985     av_assert2(last_level);
3986
3987     block[ perm_scantable[last_non_zero] ]= last_level;
3988     i -= last_run + 1;
3989
3990     for(; i>start_i; i -= run_tab[i] + 1){
3991         block[ perm_scantable[i-1] ]= level_tab[i];
3992     }
3993
3994     return last_non_zero;
3995 }
3996
3997 //#define REFINE_STATS 1
3998 static int16_t basis[64][64];
3999
4000 static void build_basis(uint8_t *perm){
4001     int i, j, x, y;
4002     emms_c();
4003     for(i=0; i<8; i++){
4004         for(j=0; j<8; j++){
4005             for(y=0; y<8; y++){
4006                 for(x=0; x<8; x++){
4007                     double s= 0.25*(1<<BASIS_SHIFT);
4008                     int index= 8*i + j;
4009                     int perm_index= perm[index];
4010                     if(i==0) s*= sqrt(0.5);
4011                     if(j==0) s*= sqrt(0.5);
4012                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4013                 }
4014             }
4015         }
4016     }
4017 }
4018
4019 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4020                         int16_t *block, int16_t *weight, int16_t *orig,
4021                         int n, int qscale){
4022     int16_t rem[64];
4023     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4024     const uint8_t *scantable= s->intra_scantable.scantable;
4025     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4026 //    unsigned int threshold1, threshold2;
4027 //    int bias=0;
4028     int run_tab[65];
4029     int prev_run=0;
4030     int prev_level=0;
4031     int qmul, qadd, start_i, last_non_zero, i, dc;
4032     uint8_t * length;
4033     uint8_t * last_length;
4034     int lambda;
4035     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4036 #ifdef REFINE_STATS
4037 static int count=0;
4038 static int after_last=0;
4039 static int to_zero=0;
4040 static int from_zero=0;
4041 static int raise=0;
4042 static int lower=0;
4043 static int messed_sign=0;
4044 #endif
4045
4046     if(basis[0][0] == 0)
4047         build_basis(s->idsp.idct_permutation);
4048
4049     qmul= qscale*2;
4050     qadd= (qscale-1)|1;
4051     if (s->mb_intra) {
4052         if (!s->h263_aic) {
4053             if (n < 4)
4054                 q = s->y_dc_scale;
4055             else
4056                 q = s->c_dc_scale;
4057         } else{
4058             /* For AIC we skip quant/dequant of INTRADC */
4059             q = 1;
4060             qadd=0;
4061         }
4062         q <<= RECON_SHIFT-3;
4063         /* note: block[0] is assumed to be positive */
4064         dc= block[0]*q;
4065 //        block[0] = (block[0] + (q >> 1)) / q;
4066         start_i = 1;
4067 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4068 //            bias= 1<<(QMAT_SHIFT-1);
4069         length     = s->intra_ac_vlc_length;
4070         last_length= s->intra_ac_vlc_last_length;
4071     } else {
4072         dc= 0;
4073         start_i = 0;
4074         length     = s->inter_ac_vlc_length;
4075         last_length= s->inter_ac_vlc_last_length;
4076     }
4077     last_non_zero = s->block_last_index[n];
4078
4079 #ifdef REFINE_STATS
4080 {START_TIMER
4081 #endif
4082     dc += (1<<(RECON_SHIFT-1));
4083     for(i=0; i<64; i++){
4084         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4085     }
4086 #ifdef REFINE_STATS
4087 STOP_TIMER("memset rem[]")}
4088 #endif
4089     sum=0;
4090     for(i=0; i<64; i++){
4091         int one= 36;
4092         int qns=4;
4093         int w;
4094
4095         w= FFABS(weight[i]) + qns*one;
4096         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4097
4098         weight[i] = w;
4099 //        w=weight[i] = (63*qns + (w/2)) / w;
4100
4101         av_assert2(w>0);
4102         av_assert2(w<(1<<6));
4103         sum += w*w;
4104     }
4105     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4106 #ifdef REFINE_STATS
4107 {START_TIMER
4108 #endif
4109     run=0;
4110     rle_index=0;
4111     for(i=start_i; i<=last_non_zero; i++){
4112         int j= perm_scantable[i];
4113         const int level= block[j];
4114         int coeff;
4115
4116         if(level){
4117             if(level<0) coeff= qmul*level - qadd;
4118             else        coeff= qmul*level + qadd;
4119             run_tab[rle_index++]=run;
4120             run=0;
4121
4122             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4123         }else{
4124             run++;
4125         }
4126     }
4127 #ifdef REFINE_STATS
4128 if(last_non_zero>0){
4129 STOP_TIMER("init rem[]")
4130 }
4131 }
4132
4133 {START_TIMER
4134 #endif
4135     for(;;){
4136         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4137         int best_coeff=0;
4138         int best_change=0;
4139         int run2, best_unquant_change=0, analyze_gradient;
4140 #ifdef REFINE_STATS
4141 {START_TIMER
4142 #endif
4143         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4144
4145         if(analyze_gradient){
4146 #ifdef REFINE_STATS
4147 {START_TIMER
4148 #endif
4149             for(i=0; i<64; i++){
4150                 int w= weight[i];
4151
4152                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4153             }
4154 #ifdef REFINE_STATS
4155 STOP_TIMER("rem*w*w")}
4156 {START_TIMER
4157 #endif
4158             s->fdsp.fdct(d1);
4159 #ifdef REFINE_STATS
4160 STOP_TIMER("dct")}
4161 #endif
4162         }
4163
4164         if(start_i){
4165             const int level= block[0];
4166             int change, old_coeff;
4167
4168             av_assert2(s->mb_intra);
4169
4170             old_coeff= q*level;
4171
4172             for(change=-1; change<=1; change+=2){
4173                 int new_level= level + change;
4174                 int score, new_coeff;
4175
4176                 new_coeff= q*new_level;
4177                 if(new_coeff >= 2048 || new_coeff < 0)
4178                     continue;
4179
4180                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4181                                                   new_coeff - old_coeff);
4182                 if(score<best_score){
4183                     best_score= score;
4184                     best_coeff= 0;
4185                     best_change= change;
4186                     best_unquant_change= new_coeff - old_coeff;
4187                 }
4188             }
4189         }
4190
4191         run=0;
4192         rle_index=0;
4193         run2= run_tab[rle_index++];
4194         prev_level=0;
4195         prev_run=0;
4196
4197         for(i=start_i; i<64; i++){
4198             int j= perm_scantable[i];
4199             const int level= block[j];
4200             int change, old_coeff;
4201
4202             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4203                 break;
4204
4205             if(level){
4206                 if(level<0) old_coeff= qmul*level - qadd;
4207                 else        old_coeff= qmul*level + qadd;
4208                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4209             }else{
4210                 old_coeff=0;
4211                 run2--;
4212                 av_assert2(run2>=0 || i >= last_non_zero );
4213             }
4214
4215             for(change=-1; change<=1; change+=2){
4216                 int new_level= level + change;
4217                 int score, new_coeff, unquant_change;
4218
4219                 score=0;
4220                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4221                    continue;
4222
4223                 if(new_level){
4224                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4225                     else            new_coeff= qmul*new_level + qadd;
4226                     if(new_coeff >= 2048 || new_coeff <= -2048)
4227                         continue;
4228                     //FIXME check for overflow
4229
4230                     if(level){
4231                         if(level < 63 && level > -63){
4232                             if(i < last_non_zero)
4233                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4234                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4235                             else
4236                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4237                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4238                         }
4239                     }else{
4240                         av_assert2(FFABS(new_level)==1);
4241
4242                         if(analyze_gradient){
4243                             int g= d1[ scantable[i] ];
4244                             if(g && (g^new_level) >= 0)
4245                                 continue;
4246                         }
4247
4248                         if(i < last_non_zero){
4249                             int next_i= i + run2 + 1;
4250                             int next_level= block[ perm_scantable[next_i] ] + 64;
4251
4252                             if(next_level&(~127))
4253                                 next_level= 0;
4254
4255                             if(next_i < last_non_zero)
4256                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4257                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4258                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4259                             else
4260                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4261                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4262                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4263                         }else{
4264                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4265                             if(prev_level){
4266                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4267                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4268                             }
4269                         }
4270                     }
4271                 }else{
4272                     new_coeff=0;
4273                     av_assert2(FFABS(level)==1);
4274
4275                     if(i < last_non_zero){
4276                         int next_i= i + run2 + 1;
4277                         int next_level= block[ perm_scantable[next_i] ] + 64;
4278
4279                         if(next_level&(~127))
4280                             next_level= 0;
4281
4282                         if(next_i < last_non_zero)
4283                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4284                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4285                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4286                         else
4287                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4288                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4289                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4290                     }else{
4291                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4292                         if(prev_level){
4293                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4294                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4295                         }
4296                     }
4297                 }
4298
4299                 score *= lambda;
4300
4301                 unquant_change= new_coeff - old_coeff;
4302                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4303
4304                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4305                                                    unquant_change);
4306                 if(score<best_score){
4307                     best_score= score;
4308                     best_coeff= i;
4309                     best_change= change;
4310                     best_unquant_change= unquant_change;
4311                 }
4312             }
4313             if(level){
4314                 prev_level= level + 64;
4315                 if(prev_level&(~127))
4316                     prev_level= 0;
4317                 prev_run= run;
4318                 run=0;
4319             }else{
4320                 run++;
4321             }
4322         }
4323 #ifdef REFINE_STATS
4324 STOP_TIMER("iterative step")}
4325 #endif
4326
4327         if(best_change){
4328             int j= perm_scantable[ best_coeff ];
4329
4330             block[j] += best_change;
4331
4332             if(best_coeff > last_non_zero){
4333                 last_non_zero= best_coeff;
4334                 av_assert2(block[j]);
4335 #ifdef REFINE_STATS
4336 after_last++;
4337 #endif
4338             }else{
4339 #ifdef REFINE_STATS
4340 if(block[j]){
4341     if(block[j] - best_change){
4342         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4343             raise++;
4344         }else{
4345             lower++;
4346         }
4347     }else{
4348         from_zero++;
4349     }
4350 }else{
4351     to_zero++;
4352 }
4353 #endif
4354                 for(; last_non_zero>=start_i; last_non_zero--){
4355                     if(block[perm_scantable[last_non_zero]])
4356                         break;
4357                 }
4358             }
4359 #ifdef REFINE_STATS
4360 count++;
4361 if(256*256*256*64 % count == 0){
4362     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4363 }
4364 #endif
4365             run=0;
4366             rle_index=0;
4367             for(i=start_i; i<=last_non_zero; i++){
4368                 int j= perm_scantable[i];
4369                 const int level= block[j];
4370
4371                  if(level){
4372                      run_tab[rle_index++]=run;
4373                      run=0;
4374                  }else{
4375                      run++;
4376                  }
4377             }
4378
4379             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4380         }else{
4381             break;
4382         }
4383     }
4384 #ifdef REFINE_STATS
4385 if(last_non_zero>0){
4386 STOP_TIMER("iterative search")
4387 }
4388 }
4389 #endif
4390
4391     return last_non_zero;
4392 }
4393
4394 int ff_dct_quantize_c(MpegEncContext *s,
4395                         int16_t *block, int n,
4396                         int qscale, int *overflow)
4397 {
4398     int i, j, level, last_non_zero, q, start_i;
4399     const int *qmat;
4400     const uint8_t *scantable= s->intra_scantable.scantable;
4401     int bias;
4402     int max=0;
4403     unsigned int threshold1, threshold2;
4404
4405     s->fdsp.fdct(block);
4406
4407     if(s->dct_error_sum)
4408         s->denoise_dct(s, block);
4409
4410     if (s->mb_intra) {
4411         if (!s->h263_aic) {
4412             if (n < 4)
4413                 q = s->y_dc_scale;
4414             else
4415                 q = s->c_dc_scale;
4416             q = q << 3;
4417         } else
4418             /* For AIC we skip quant/dequant of INTRADC */
4419             q = 1 << 3;
4420
4421         /* note: block[0] is assumed to be positive */
4422         block[0] = (block[0] + (q >> 1)) / q;
4423         start_i = 1;
4424         last_non_zero = 0;
4425         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4426         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4427     } else {
4428         start_i = 0;
4429         last_non_zero = -1;
4430         qmat = s->q_inter_matrix[qscale];
4431         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4432     }
4433     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4434     threshold2= (threshold1<<1);
4435     for(i=63;i>=start_i;i--) {
4436         j = scantable[i];
4437         level = block[j] * qmat[j];
4438
4439         if(((unsigned)(level+threshold1))>threshold2){
4440             last_non_zero = i;
4441             break;
4442         }else{
4443             block[j]=0;
4444         }
4445     }
4446     for(i=start_i; i<=last_non_zero; i++) {
4447         j = scantable[i];
4448         level = block[j] * qmat[j];
4449
4450 //        if(   bias+level >= (1<<QMAT_SHIFT)
4451 //           || bias-level >= (1<<QMAT_SHIFT)){
4452         if(((unsigned)(level+threshold1))>threshold2){
4453             if(level>0){
4454                 level= (bias + level)>>QMAT_SHIFT;
4455                 block[j]= level;
4456             }else{
4457                 level= (bias - level)>>QMAT_SHIFT;
4458                 block[j]= -level;
4459             }
4460             max |=level;
4461         }else{
4462             block[j]=0;
4463         }
4464     }
4465     *overflow= s->max_qcoeff < max; //overflow might have happened
4466
4467     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4468     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4469         ff_block_permute(block, s->idsp.idct_permutation,
4470                          scantable, last_non_zero);
4471
4472     return last_non_zero;
4473 }
4474
4475 #define OFFSET(x) offsetof(MpegEncContext, x)
4476 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4477 static const AVOption h263_options[] = {
4478     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4479     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4480     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4481     FF_MPV_COMMON_OPTS
4482     { NULL },
4483 };
4484
4485 static const AVClass h263_class = {
4486     .class_name = "H.263 encoder",
4487     .item_name  = av_default_item_name,
4488     .option     = h263_options,
4489     .version    = LIBAVUTIL_VERSION_INT,
4490 };
4491
4492 AVCodec ff_h263_encoder = {
4493     .name           = "h263",
4494     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4495     .type           = AVMEDIA_TYPE_VIDEO,
4496     .id             = AV_CODEC_ID_H263,
4497     .priv_data_size = sizeof(MpegEncContext),
4498     .init           = ff_MPV_encode_init,
4499     .encode2        = ff_MPV_encode_picture,
4500     .close          = ff_MPV_encode_end,
4501     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4502     .priv_class     = &h263_class,
4503 };
4504
4505 static const AVOption h263p_options[] = {
4506     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4507     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4508     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4509     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4510     FF_MPV_COMMON_OPTS
4511     { NULL },
4512 };
4513 static const AVClass h263p_class = {
4514     .class_name = "H.263p encoder",
4515     .item_name  = av_default_item_name,
4516     .option     = h263p_options,
4517     .version    = LIBAVUTIL_VERSION_INT,
4518 };
4519
4520 AVCodec ff_h263p_encoder = {
4521     .name           = "h263p",
4522     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4523     .type           = AVMEDIA_TYPE_VIDEO,
4524     .id             = AV_CODEC_ID_H263P,
4525     .priv_data_size = sizeof(MpegEncContext),
4526     .init           = ff_MPV_encode_init,
4527     .encode2        = ff_MPV_encode_picture,
4528     .close          = ff_MPV_encode_end,
4529     .capabilities   = CODEC_CAP_SLICE_THREADS,
4530     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4531     .priv_class     = &h263p_class,
4532 };
4533
4534 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4535
4536 AVCodec ff_msmpeg4v2_encoder = {
4537     .name           = "msmpeg4v2",
4538     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4539     .type           = AVMEDIA_TYPE_VIDEO,
4540     .id             = AV_CODEC_ID_MSMPEG4V2,
4541     .priv_data_size = sizeof(MpegEncContext),
4542     .init           = ff_MPV_encode_init,
4543     .encode2        = ff_MPV_encode_picture,
4544     .close          = ff_MPV_encode_end,
4545     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4546     .priv_class     = &msmpeg4v2_class,
4547 };
4548
4549 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4550
4551 AVCodec ff_msmpeg4v3_encoder = {
4552     .name           = "msmpeg4",
4553     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4554     .type           = AVMEDIA_TYPE_VIDEO,
4555     .id             = AV_CODEC_ID_MSMPEG4V3,
4556     .priv_data_size = sizeof(MpegEncContext),
4557     .init           = ff_MPV_encode_init,
4558     .encode2        = ff_MPV_encode_picture,
4559     .close          = ff_MPV_encode_end,
4560     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4561     .priv_class     = &msmpeg4v3_class,
4562 };
4563
4564 FF_MPV_GENERIC_CLASS(wmv1)
4565
4566 AVCodec ff_wmv1_encoder = {
4567     .name           = "wmv1",
4568     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4569     .type           = AVMEDIA_TYPE_VIDEO,
4570     .id             = AV_CODEC_ID_WMV1,
4571     .priv_data_size = sizeof(MpegEncContext),
4572     .init           = ff_MPV_encode_init,
4573     .encode2        = ff_MPV_encode_picture,
4574     .close          = ff_MPV_encode_end,
4575     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4576     .priv_class     = &wmv1_class,
4577 };