]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge commit 'a9aee08d900f686e966c64afec5d88a7d9d130a3'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "dsputil.h"
41 #include "idctdsp.h"
42 #include "mpeg12.h"
43 #include "mpegvideo.h"
44 #include "h261.h"
45 #include "h263.h"
46 #include "mjpegenc_common.h"
47 #include "mathops.h"
48 #include "mpegutils.h"
49 #include "mjpegenc.h"
50 #include "msmpeg4.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60 #include "sp5x.h"
61
62 static int encode_picture(MpegEncContext *s, int picture_number);
63 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
64 static int sse_mb(MpegEncContext *s);
65 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
66 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
67
68 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
69 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
70
71 const AVOption ff_mpv_generic_options[] = {
72     FF_MPV_COMMON_OPTS
73     { NULL },
74 };
75
76 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
77                        uint16_t (*qmat16)[2][64],
78                        const uint16_t *quant_matrix,
79                        int bias, int qmin, int qmax, int intra)
80 {
81     FDCTDSPContext *fdsp = &s->fdsp;
82     int qscale;
83     int shift = 0;
84
85     for (qscale = qmin; qscale <= qmax; qscale++) {
86         int i;
87         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
88             fdsp->fdct == ff_jpeg_fdct_islow_10 ||
89             fdsp->fdct == ff_faandct) {
90             for (i = 0; i < 64; i++) {
91                 const int j = s->idsp.idct_permutation[i];
92                 /* 16 <= qscale * quant_matrix[i] <= 7905
93                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
94                  *             19952 <=              x  <= 249205026
95                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
96                  *           3444240 >= (1 << 36) / (x) >= 275 */
97
98                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
99                                         (qscale * quant_matrix[j]));
100             }
101         } else if (fdsp->fdct == ff_fdct_ifast) {
102             for (i = 0; i < 64; i++) {
103                 const int j = s->idsp.idct_permutation[i];
104                 /* 16 <= qscale * quant_matrix[i] <= 7905
105                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
106                  *             19952 <=              x  <= 249205026
107                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
108                  *           3444240 >= (1 << 36) / (x) >= 275 */
109
110                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
111                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
112             }
113         } else {
114             for (i = 0; i < 64; i++) {
115                 const int j = s->idsp.idct_permutation[i];
116                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
117                  * Assume x = qscale * quant_matrix[i]
118                  * So             16 <=              x  <= 7905
119                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
120                  * so          32768 >= (1 << 19) / (x) >= 67 */
121                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
122                                         (qscale * quant_matrix[j]));
123                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
124                 //                    (qscale * quant_matrix[i]);
125                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
126                                        (qscale * quant_matrix[j]);
127
128                 if (qmat16[qscale][0][i] == 0 ||
129                     qmat16[qscale][0][i] == 128 * 256)
130                     qmat16[qscale][0][i] = 128 * 256 - 1;
131                 qmat16[qscale][1][i] =
132                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
133                                 qmat16[qscale][0][i]);
134             }
135         }
136
137         for (i = intra; i < 64; i++) {
138             int64_t max = 8191;
139             if (fdsp->fdct == ff_fdct_ifast) {
140                 max = (8191LL * ff_aanscales[i]) >> 14;
141             }
142             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
143                 shift++;
144             }
145         }
146     }
147     if (shift) {
148         av_log(NULL, AV_LOG_INFO,
149                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
150                QMAT_SHIFT - shift);
151     }
152 }
153
154 static inline void update_qscale(MpegEncContext *s)
155 {
156     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
157                 (FF_LAMBDA_SHIFT + 7);
158     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
159
160     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
161                  FF_LAMBDA_SHIFT;
162 }
163
164 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
165 {
166     int i;
167
168     if (matrix) {
169         put_bits(pb, 1, 1);
170         for (i = 0; i < 64; i++) {
171             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
172         }
173     } else
174         put_bits(pb, 1, 0);
175 }
176
177 /**
178  * init s->current_picture.qscale_table from s->lambda_table
179  */
180 void ff_init_qscale_tab(MpegEncContext *s)
181 {
182     int8_t * const qscale_table = s->current_picture.qscale_table;
183     int i;
184
185     for (i = 0; i < s->mb_num; i++) {
186         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
187         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
188         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
189                                                   s->avctx->qmax);
190     }
191 }
192
193 static void update_duplicate_context_after_me(MpegEncContext *dst,
194                                               MpegEncContext *src)
195 {
196 #define COPY(a) dst->a= src->a
197     COPY(pict_type);
198     COPY(current_picture);
199     COPY(f_code);
200     COPY(b_code);
201     COPY(qscale);
202     COPY(lambda);
203     COPY(lambda2);
204     COPY(picture_in_gop_number);
205     COPY(gop_picture_number);
206     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
207     COPY(progressive_frame);    // FIXME don't set in encode_header
208     COPY(partitioned_frame);    // FIXME don't set in encode_header
209 #undef COPY
210 }
211
212 /**
213  * Set the given MpegEncContext to defaults for encoding.
214  * the changed fields will not depend upon the prior state of the MpegEncContext.
215  */
216 static void MPV_encode_defaults(MpegEncContext *s)
217 {
218     int i;
219     ff_MPV_common_defaults(s);
220
221     for (i = -16; i < 16; i++) {
222         default_fcode_tab[i + MAX_MV] = 1;
223     }
224     s->me.mv_penalty = default_mv_penalty;
225     s->fcode_tab     = default_fcode_tab;
226
227     s->input_picture_number  = 0;
228     s->picture_in_gop_number = 0;
229 }
230
231 av_cold int ff_dct_encode_init(MpegEncContext *s) {
232     if (ARCH_X86)
233         ff_dct_encode_init_x86(s);
234
235     if (CONFIG_H263_ENCODER)
236         ff_h263dsp_init(&s->h263dsp);
237     if (!s->dct_quantize)
238         s->dct_quantize = ff_dct_quantize_c;
239     if (!s->denoise_dct)
240         s->denoise_dct  = denoise_dct_c;
241     s->fast_dct_quantize = s->dct_quantize;
242     if (s->avctx->trellis)
243         s->dct_quantize  = dct_quantize_trellis_c;
244
245     return 0;
246 }
247
248 /* init video encoder */
249 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
250 {
251     MpegEncContext *s = avctx->priv_data;
252     int i, ret, format_supported;
253
254     MPV_encode_defaults(s);
255
256     switch (avctx->codec_id) {
257     case AV_CODEC_ID_MPEG2VIDEO:
258         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
259             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
260             av_log(avctx, AV_LOG_ERROR,
261                    "only YUV420 and YUV422 are supported\n");
262             return -1;
263         }
264         break;
265     case AV_CODEC_ID_MJPEG:
266     case AV_CODEC_ID_AMV:
267         format_supported = 0;
268         /* JPEG color space */
269         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
270             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
271             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
272             (avctx->color_range == AVCOL_RANGE_JPEG &&
273              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
274               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
275               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
276             format_supported = 1;
277         /* MPEG color space */
278         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
279                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
280                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
281                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
282             format_supported = 1;
283
284         if (!format_supported) {
285             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
286             return -1;
287         }
288         break;
289     default:
290         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
291             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
292             return -1;
293         }
294     }
295
296     switch (avctx->pix_fmt) {
297     case AV_PIX_FMT_YUVJ444P:
298     case AV_PIX_FMT_YUV444P:
299         s->chroma_format = CHROMA_444;
300         break;
301     case AV_PIX_FMT_YUVJ422P:
302     case AV_PIX_FMT_YUV422P:
303         s->chroma_format = CHROMA_422;
304         break;
305     case AV_PIX_FMT_YUVJ420P:
306     case AV_PIX_FMT_YUV420P:
307     default:
308         s->chroma_format = CHROMA_420;
309         break;
310     }
311
312     s->bit_rate = avctx->bit_rate;
313     s->width    = avctx->width;
314     s->height   = avctx->height;
315     if (avctx->gop_size > 600 &&
316         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
317         av_log(avctx, AV_LOG_WARNING,
318                "keyframe interval too large!, reducing it from %d to %d\n",
319                avctx->gop_size, 600);
320         avctx->gop_size = 600;
321     }
322     s->gop_size     = avctx->gop_size;
323     s->avctx        = avctx;
324     s->flags        = avctx->flags;
325     s->flags2       = avctx->flags2;
326     if (avctx->max_b_frames > MAX_B_FRAMES) {
327         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
328                "is %d.\n", MAX_B_FRAMES);
329         avctx->max_b_frames = MAX_B_FRAMES;
330     }
331     s->max_b_frames = avctx->max_b_frames;
332     s->codec_id     = avctx->codec->id;
333     s->strict_std_compliance = avctx->strict_std_compliance;
334     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
335     s->mpeg_quant         = avctx->mpeg_quant;
336     s->rtp_mode           = !!avctx->rtp_payload_size;
337     s->intra_dc_precision = avctx->intra_dc_precision;
338     s->user_specified_pts = AV_NOPTS_VALUE;
339
340     if (s->gop_size <= 1) {
341         s->intra_only = 1;
342         s->gop_size   = 12;
343     } else {
344         s->intra_only = 0;
345     }
346
347     s->me_method = avctx->me_method;
348
349     /* Fixed QSCALE */
350     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
351
352     s->adaptive_quant = (s->avctx->lumi_masking ||
353                          s->avctx->dark_masking ||
354                          s->avctx->temporal_cplx_masking ||
355                          s->avctx->spatial_cplx_masking  ||
356                          s->avctx->p_masking      ||
357                          s->avctx->border_masking ||
358                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
359                         !s->fixed_qscale;
360
361     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
362
363     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
364         switch(avctx->codec_id) {
365         case AV_CODEC_ID_MPEG1VIDEO:
366         case AV_CODEC_ID_MPEG2VIDEO:
367             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
368             break;
369         case AV_CODEC_ID_MPEG4:
370         case AV_CODEC_ID_MSMPEG4V1:
371         case AV_CODEC_ID_MSMPEG4V2:
372         case AV_CODEC_ID_MSMPEG4V3:
373             if       (avctx->rc_max_rate >= 15000000) {
374                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
375             } else if(avctx->rc_max_rate >=  2000000) {
376                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
377             } else if(avctx->rc_max_rate >=   384000) {
378                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
379             } else
380                 avctx->rc_buffer_size = 40;
381             avctx->rc_buffer_size *= 16384;
382             break;
383         }
384         if (avctx->rc_buffer_size) {
385             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
386         }
387     }
388
389     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
390         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
391         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
392             return -1;
393     }
394
395     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
396         av_log(avctx, AV_LOG_INFO,
397                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
398     }
399
400     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
401         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
402         return -1;
403     }
404
405     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
406         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
407         return -1;
408     }
409
410     if (avctx->rc_max_rate &&
411         avctx->rc_max_rate == avctx->bit_rate &&
412         avctx->rc_max_rate != avctx->rc_min_rate) {
413         av_log(avctx, AV_LOG_INFO,
414                "impossible bitrate constraints, this will fail\n");
415     }
416
417     if (avctx->rc_buffer_size &&
418         avctx->bit_rate * (int64_t)avctx->time_base.num >
419             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
420         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
421         return -1;
422     }
423
424     if (!s->fixed_qscale &&
425         avctx->bit_rate * av_q2d(avctx->time_base) >
426             avctx->bit_rate_tolerance) {
427         av_log(avctx, AV_LOG_WARNING,
428                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
429         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
430     }
431
432     if (s->avctx->rc_max_rate &&
433         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
434         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
435          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
436         90000LL * (avctx->rc_buffer_size - 1) >
437             s->avctx->rc_max_rate * 0xFFFFLL) {
438         av_log(avctx, AV_LOG_INFO,
439                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
440                "specified vbv buffer is too large for the given bitrate!\n");
441     }
442
443     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
444         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
445         s->codec_id != AV_CODEC_ID_FLV1) {
446         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
447         return -1;
448     }
449
450     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
451         av_log(avctx, AV_LOG_ERROR,
452                "OBMC is only supported with simple mb decision\n");
453         return -1;
454     }
455
456     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
457         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
458         return -1;
459     }
460
461     if (s->max_b_frames                    &&
462         s->codec_id != AV_CODEC_ID_MPEG4      &&
463         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
464         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
465         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
466         return -1;
467     }
468     if (s->max_b_frames < 0) {
469         av_log(avctx, AV_LOG_ERROR,
470                "max b frames must be 0 or positive for mpegvideo based encoders\n");
471         return -1;
472     }
473
474     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
475          s->codec_id == AV_CODEC_ID_H263  ||
476          s->codec_id == AV_CODEC_ID_H263P) &&
477         (avctx->sample_aspect_ratio.num > 255 ||
478          avctx->sample_aspect_ratio.den > 255)) {
479         av_log(avctx, AV_LOG_WARNING,
480                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
481                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
482         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
483                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
484     }
485
486     if ((s->codec_id == AV_CODEC_ID_H263  ||
487          s->codec_id == AV_CODEC_ID_H263P) &&
488         (avctx->width  > 2048 ||
489          avctx->height > 1152 )) {
490         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
491         return -1;
492     }
493     if ((s->codec_id == AV_CODEC_ID_H263  ||
494          s->codec_id == AV_CODEC_ID_H263P) &&
495         ((avctx->width &3) ||
496          (avctx->height&3) )) {
497         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
498         return -1;
499     }
500
501     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
502         (avctx->width  > 4095 ||
503          avctx->height > 4095 )) {
504         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
505         return -1;
506     }
507
508     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
509         (avctx->width  > 16383 ||
510          avctx->height > 16383 )) {
511         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
512         return -1;
513     }
514
515     if (s->codec_id == AV_CODEC_ID_RV10 &&
516         (avctx->width &15 ||
517          avctx->height&15 )) {
518         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
519         return AVERROR(EINVAL);
520     }
521
522     if (s->codec_id == AV_CODEC_ID_RV20 &&
523         (avctx->width &3 ||
524          avctx->height&3 )) {
525         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
526         return AVERROR(EINVAL);
527     }
528
529     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
530          s->codec_id == AV_CODEC_ID_WMV2) &&
531          avctx->width & 1) {
532          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
533          return -1;
534     }
535
536     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
537         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
538         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
539         return -1;
540     }
541
542     // FIXME mpeg2 uses that too
543     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
544                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
545         av_log(avctx, AV_LOG_ERROR,
546                "mpeg2 style quantization not supported by codec\n");
547         return -1;
548     }
549
550     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
551         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
552         return -1;
553     }
554
555     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
556         s->avctx->mb_decision != FF_MB_DECISION_RD) {
557         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
558         return -1;
559     }
560
561     if (s->avctx->scenechange_threshold < 1000000000 &&
562         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
563         av_log(avctx, AV_LOG_ERROR,
564                "closed gop with scene change detection are not supported yet, "
565                "set threshold to 1000000000\n");
566         return -1;
567     }
568
569     if (s->flags & CODEC_FLAG_LOW_DELAY) {
570         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
571             av_log(avctx, AV_LOG_ERROR,
572                   "low delay forcing is only available for mpeg2\n");
573             return -1;
574         }
575         if (s->max_b_frames != 0) {
576             av_log(avctx, AV_LOG_ERROR,
577                    "b frames cannot be used with low delay\n");
578             return -1;
579         }
580     }
581
582     if (s->q_scale_type == 1) {
583         if (avctx->qmax > 12) {
584             av_log(avctx, AV_LOG_ERROR,
585                    "non linear quant only supports qmax <= 12 currently\n");
586             return -1;
587         }
588     }
589
590     if (s->avctx->thread_count > 1         &&
591         s->codec_id != AV_CODEC_ID_MPEG4      &&
592         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
593         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
594         s->codec_id != AV_CODEC_ID_MJPEG      &&
595         (s->codec_id != AV_CODEC_ID_H263P)) {
596         av_log(avctx, AV_LOG_ERROR,
597                "multi threaded encoding not supported by codec\n");
598         return -1;
599     }
600
601     if (s->avctx->thread_count < 1) {
602         av_log(avctx, AV_LOG_ERROR,
603                "automatic thread number detection not supported by codec, "
604                "patch welcome\n");
605         return -1;
606     }
607
608     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
609         s->rtp_mode = 1;
610
611     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
612         s->h263_slice_structured = 1;
613
614     if (!avctx->time_base.den || !avctx->time_base.num) {
615         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
616         return -1;
617     }
618
619     i = (INT_MAX / 2 + 128) >> 8;
620     if (avctx->mb_threshold >= i) {
621         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
622                i - 1);
623         return -1;
624     }
625
626     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
627         av_log(avctx, AV_LOG_INFO,
628                "notice: b_frame_strategy only affects the first pass\n");
629         avctx->b_frame_strategy = 0;
630     }
631
632     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
633     if (i > 1) {
634         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
635         avctx->time_base.den /= i;
636         avctx->time_base.num /= i;
637         //return -1;
638     }
639
640     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
641         // (a + x * 3 / 8) / x
642         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
643         s->inter_quant_bias = 0;
644     } else {
645         s->intra_quant_bias = 0;
646         // (a - x / 4) / x
647         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
648     }
649
650     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
651         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
652         return AVERROR(EINVAL);
653     }
654
655     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
656         s->intra_quant_bias = avctx->intra_quant_bias;
657     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
658         s->inter_quant_bias = avctx->inter_quant_bias;
659
660     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
661
662     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
663         s->avctx->time_base.den > (1 << 16) - 1) {
664         av_log(avctx, AV_LOG_ERROR,
665                "timebase %d/%d not supported by MPEG 4 standard, "
666                "the maximum admitted value for the timebase denominator "
667                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
668                (1 << 16) - 1);
669         return -1;
670     }
671     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
672
673     switch (avctx->codec->id) {
674     case AV_CODEC_ID_MPEG1VIDEO:
675         s->out_format = FMT_MPEG1;
676         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
677         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
678         break;
679     case AV_CODEC_ID_MPEG2VIDEO:
680         s->out_format = FMT_MPEG1;
681         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
682         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
683         s->rtp_mode   = 1;
684         break;
685     case AV_CODEC_ID_MJPEG:
686     case AV_CODEC_ID_AMV:
687         s->out_format = FMT_MJPEG;
688         s->intra_only = 1; /* force intra only for jpeg */
689         if (!CONFIG_MJPEG_ENCODER ||
690             ff_mjpeg_encode_init(s) < 0)
691             return -1;
692         avctx->delay = 0;
693         s->low_delay = 1;
694         break;
695     case AV_CODEC_ID_H261:
696         if (!CONFIG_H261_ENCODER)
697             return -1;
698         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
699             av_log(avctx, AV_LOG_ERROR,
700                    "The specified picture size of %dx%d is not valid for the "
701                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
702                     s->width, s->height);
703             return -1;
704         }
705         s->out_format = FMT_H261;
706         avctx->delay  = 0;
707         s->low_delay  = 1;
708         break;
709     case AV_CODEC_ID_H263:
710         if (!CONFIG_H263_ENCODER)
711             return -1;
712         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
713                              s->width, s->height) == 8) {
714             av_log(avctx, AV_LOG_ERROR,
715                    "The specified picture size of %dx%d is not valid for "
716                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
717                    "352x288, 704x576, and 1408x1152. "
718                    "Try H.263+.\n", s->width, s->height);
719             return -1;
720         }
721         s->out_format = FMT_H263;
722         avctx->delay  = 0;
723         s->low_delay  = 1;
724         break;
725     case AV_CODEC_ID_H263P:
726         s->out_format = FMT_H263;
727         s->h263_plus  = 1;
728         /* Fx */
729         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
730         s->modified_quant  = s->h263_aic;
731         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
732         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
733
734         /* /Fx */
735         /* These are just to be sure */
736         avctx->delay = 0;
737         s->low_delay = 1;
738         break;
739     case AV_CODEC_ID_FLV1:
740         s->out_format      = FMT_H263;
741         s->h263_flv        = 2; /* format = 1; 11-bit codes */
742         s->unrestricted_mv = 1;
743         s->rtp_mode  = 0; /* don't allow GOB */
744         avctx->delay = 0;
745         s->low_delay = 1;
746         break;
747     case AV_CODEC_ID_RV10:
748         s->out_format = FMT_H263;
749         avctx->delay  = 0;
750         s->low_delay  = 1;
751         break;
752     case AV_CODEC_ID_RV20:
753         s->out_format      = FMT_H263;
754         avctx->delay       = 0;
755         s->low_delay       = 1;
756         s->modified_quant  = 1;
757         s->h263_aic        = 1;
758         s->h263_plus       = 1;
759         s->loop_filter     = 1;
760         s->unrestricted_mv = 0;
761         break;
762     case AV_CODEC_ID_MPEG4:
763         s->out_format      = FMT_H263;
764         s->h263_pred       = 1;
765         s->unrestricted_mv = 1;
766         s->low_delay       = s->max_b_frames ? 0 : 1;
767         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
768         break;
769     case AV_CODEC_ID_MSMPEG4V2:
770         s->out_format      = FMT_H263;
771         s->h263_pred       = 1;
772         s->unrestricted_mv = 1;
773         s->msmpeg4_version = 2;
774         avctx->delay       = 0;
775         s->low_delay       = 1;
776         break;
777     case AV_CODEC_ID_MSMPEG4V3:
778         s->out_format        = FMT_H263;
779         s->h263_pred         = 1;
780         s->unrestricted_mv   = 1;
781         s->msmpeg4_version   = 3;
782         s->flipflop_rounding = 1;
783         avctx->delay         = 0;
784         s->low_delay         = 1;
785         break;
786     case AV_CODEC_ID_WMV1:
787         s->out_format        = FMT_H263;
788         s->h263_pred         = 1;
789         s->unrestricted_mv   = 1;
790         s->msmpeg4_version   = 4;
791         s->flipflop_rounding = 1;
792         avctx->delay         = 0;
793         s->low_delay         = 1;
794         break;
795     case AV_CODEC_ID_WMV2:
796         s->out_format        = FMT_H263;
797         s->h263_pred         = 1;
798         s->unrestricted_mv   = 1;
799         s->msmpeg4_version   = 5;
800         s->flipflop_rounding = 1;
801         avctx->delay         = 0;
802         s->low_delay         = 1;
803         break;
804     default:
805         return -1;
806     }
807
808     avctx->has_b_frames = !s->low_delay;
809
810     s->encoding = 1;
811
812     s->progressive_frame    =
813     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
814                                                 CODEC_FLAG_INTERLACED_ME) ||
815                                 s->alternate_scan);
816
817     /* init */
818     if (ff_MPV_common_init(s) < 0)
819         return -1;
820
821     ff_fdctdsp_init(&s->fdsp, avctx);
822     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
823     ff_qpeldsp_init(&s->qdsp);
824
825     s->avctx->coded_frame = s->current_picture.f;
826
827     if (s->msmpeg4_version) {
828         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
829                           2 * 2 * (MAX_LEVEL + 1) *
830                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
831     }
832     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
833
834     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
835     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
836     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
837     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
838     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
839     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
840     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
841                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
842     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
843                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
844
845     if (s->avctx->noise_reduction) {
846         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
847                           2 * 64 * sizeof(uint16_t), fail);
848     }
849
850     ff_dct_encode_init(s);
851
852     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
853         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
854
855     s->quant_precision = 5;
856
857     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
858     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
859
860     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
861         ff_h261_encode_init(s);
862     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
863         ff_h263_encode_init(s);
864     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
865         ff_msmpeg4_encode_init(s);
866     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
867         && s->out_format == FMT_MPEG1)
868         ff_mpeg1_encode_init(s);
869
870     /* init q matrix */
871     for (i = 0; i < 64; i++) {
872         int j = s->idsp.idct_permutation[i];
873         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
874             s->mpeg_quant) {
875             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
876             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
877         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
878             s->intra_matrix[j] =
879             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
880         } else {
881             /* mpeg1/2 */
882             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
883             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
884         }
885         if (s->avctx->intra_matrix)
886             s->intra_matrix[j] = s->avctx->intra_matrix[i];
887         if (s->avctx->inter_matrix)
888             s->inter_matrix[j] = s->avctx->inter_matrix[i];
889     }
890
891     /* precompute matrix */
892     /* for mjpeg, we do include qscale in the matrix */
893     if (s->out_format != FMT_MJPEG) {
894         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
895                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
896                           31, 1);
897         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
898                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
899                           31, 0);
900     }
901
902     if (ff_rate_control_init(s) < 0)
903         return -1;
904
905 #if FF_API_ERROR_RATE
906     FF_DISABLE_DEPRECATION_WARNINGS
907     if (avctx->error_rate)
908         s->error_rate = avctx->error_rate;
909     FF_ENABLE_DEPRECATION_WARNINGS;
910 #endif
911
912 #if FF_API_NORMALIZE_AQP
913     FF_DISABLE_DEPRECATION_WARNINGS
914     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
915         s->mpv_flags |= FF_MPV_FLAG_NAQ;
916     FF_ENABLE_DEPRECATION_WARNINGS;
917 #endif
918
919 #if FF_API_MV0
920     FF_DISABLE_DEPRECATION_WARNINGS
921     if (avctx->flags & CODEC_FLAG_MV0)
922         s->mpv_flags |= FF_MPV_FLAG_MV0;
923     FF_ENABLE_DEPRECATION_WARNINGS
924 #endif
925
926     if (avctx->b_frame_strategy == 2) {
927         for (i = 0; i < s->max_b_frames + 2; i++) {
928             s->tmp_frames[i] = av_frame_alloc();
929             if (!s->tmp_frames[i])
930                 return AVERROR(ENOMEM);
931
932             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
933             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
934             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
935
936             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
937             if (ret < 0)
938                 return ret;
939         }
940     }
941
942     return 0;
943 fail:
944     ff_MPV_encode_end(avctx);
945     return AVERROR_UNKNOWN;
946 }
947
948 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
949 {
950     MpegEncContext *s = avctx->priv_data;
951     int i;
952
953     ff_rate_control_uninit(s);
954
955     ff_MPV_common_end(s);
956     if (CONFIG_MJPEG_ENCODER &&
957         s->out_format == FMT_MJPEG)
958         ff_mjpeg_encode_close(s);
959
960     av_freep(&avctx->extradata);
961
962     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
963         av_frame_free(&s->tmp_frames[i]);
964
965     ff_free_picture_tables(&s->new_picture);
966     ff_mpeg_unref_picture(s, &s->new_picture);
967
968     av_freep(&s->avctx->stats_out);
969     av_freep(&s->ac_stats);
970
971     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
972     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
973     s->q_chroma_intra_matrix=   NULL;
974     s->q_chroma_intra_matrix16= NULL;
975     av_freep(&s->q_intra_matrix);
976     av_freep(&s->q_inter_matrix);
977     av_freep(&s->q_intra_matrix16);
978     av_freep(&s->q_inter_matrix16);
979     av_freep(&s->input_picture);
980     av_freep(&s->reordered_input_picture);
981     av_freep(&s->dct_offset);
982
983     return 0;
984 }
985
986 static int get_sae(uint8_t *src, int ref, int stride)
987 {
988     int x,y;
989     int acc = 0;
990
991     for (y = 0; y < 16; y++) {
992         for (x = 0; x < 16; x++) {
993             acc += FFABS(src[x + y * stride] - ref);
994         }
995     }
996
997     return acc;
998 }
999
1000 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1001                            uint8_t *ref, int stride)
1002 {
1003     int x, y, w, h;
1004     int acc = 0;
1005
1006     w = s->width  & ~15;
1007     h = s->height & ~15;
1008
1009     for (y = 0; y < h; y += 16) {
1010         for (x = 0; x < w; x += 16) {
1011             int offset = x + y * stride;
1012             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
1013                                      16);
1014             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1015             int sae  = get_sae(src + offset, mean, stride);
1016
1017             acc += sae + 500 < sad;
1018         }
1019     }
1020     return acc;
1021 }
1022
1023
1024 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1025 {
1026     Picture *pic = NULL;
1027     int64_t pts;
1028     int i, display_picture_number = 0, ret;
1029     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1030                                                  (s->low_delay ? 0 : 1);
1031     int direct = 1;
1032
1033     if (pic_arg) {
1034         pts = pic_arg->pts;
1035         display_picture_number = s->input_picture_number++;
1036
1037         if (pts != AV_NOPTS_VALUE) {
1038             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1039                 int64_t last = s->user_specified_pts;
1040
1041                 if (pts <= last) {
1042                     av_log(s->avctx, AV_LOG_ERROR,
1043                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1044                            pts, last);
1045                     return AVERROR(EINVAL);
1046                 }
1047
1048                 if (!s->low_delay && display_picture_number == 1)
1049                     s->dts_delta = pts - last;
1050             }
1051             s->user_specified_pts = pts;
1052         } else {
1053             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1054                 s->user_specified_pts =
1055                 pts = s->user_specified_pts + 1;
1056                 av_log(s->avctx, AV_LOG_INFO,
1057                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1058                        pts);
1059             } else {
1060                 pts = display_picture_number;
1061             }
1062         }
1063     }
1064
1065     if (pic_arg) {
1066         if (!pic_arg->buf[0])
1067             direct = 0;
1068         if (pic_arg->linesize[0] != s->linesize)
1069             direct = 0;
1070         if (pic_arg->linesize[1] != s->uvlinesize)
1071             direct = 0;
1072         if (pic_arg->linesize[2] != s->uvlinesize)
1073             direct = 0;
1074         if ((s->width & 15) || (s->height & 15))
1075             direct = 0;
1076         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1077             direct = 0;
1078         if (s->linesize & (STRIDE_ALIGN-1))
1079             direct = 0;
1080
1081         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1082                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1083
1084         if (direct) {
1085             i = ff_find_unused_picture(s, 1);
1086             if (i < 0)
1087                 return i;
1088
1089             pic = &s->picture[i];
1090             pic->reference = 3;
1091
1092             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1093                 return ret;
1094             if (ff_alloc_picture(s, pic, 1) < 0) {
1095                 return -1;
1096             }
1097         } else {
1098             i = ff_find_unused_picture(s, 0);
1099             if (i < 0)
1100                 return i;
1101
1102             pic = &s->picture[i];
1103             pic->reference = 3;
1104
1105             if (ff_alloc_picture(s, pic, 0) < 0) {
1106                 return -1;
1107             }
1108
1109             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1110                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1111                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1112                 // empty
1113             } else {
1114                 int h_chroma_shift, v_chroma_shift;
1115                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1116                                                  &h_chroma_shift,
1117                                                  &v_chroma_shift);
1118
1119                 for (i = 0; i < 3; i++) {
1120                     int src_stride = pic_arg->linesize[i];
1121                     int dst_stride = i ? s->uvlinesize : s->linesize;
1122                     int h_shift = i ? h_chroma_shift : 0;
1123                     int v_shift = i ? v_chroma_shift : 0;
1124                     int w = s->width  >> h_shift;
1125                     int h = s->height >> v_shift;
1126                     uint8_t *src = pic_arg->data[i];
1127                     uint8_t *dst = pic->f->data[i];
1128                     int vpad = 16;
1129
1130                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1131                         && !s->progressive_sequence)
1132                         vpad = 32;
1133
1134                     if (!s->avctx->rc_buffer_size)
1135                         dst += INPLACE_OFFSET;
1136
1137                     if (src_stride == dst_stride)
1138                         memcpy(dst, src, src_stride * h);
1139                     else {
1140                         int h2 = h;
1141                         uint8_t *dst2 = dst;
1142                         while (h2--) {
1143                             memcpy(dst2, src, w);
1144                             dst2 += dst_stride;
1145                             src += src_stride;
1146                         }
1147                     }
1148                     if ((s->width & 15) || (s->height & (vpad-1))) {
1149                         s->mpvencdsp.draw_edges(dst, dst_stride,
1150                                                 w, h,
1151                                                 16>>h_shift,
1152                                                 vpad>>v_shift,
1153                                                 EDGE_BOTTOM);
1154                     }
1155                 }
1156             }
1157         }
1158         ret = av_frame_copy_props(pic->f, pic_arg);
1159         if (ret < 0)
1160             return ret;
1161
1162         pic->f->display_picture_number = display_picture_number;
1163         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1164     }
1165
1166     /* shift buffer entries */
1167     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1168         s->input_picture[i - 1] = s->input_picture[i];
1169
1170     s->input_picture[encoding_delay] = (Picture*) pic;
1171
1172     return 0;
1173 }
1174
1175 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1176 {
1177     int x, y, plane;
1178     int score = 0;
1179     int64_t score64 = 0;
1180
1181     for (plane = 0; plane < 3; plane++) {
1182         const int stride = p->f->linesize[plane];
1183         const int bw = plane ? 1 : 2;
1184         for (y = 0; y < s->mb_height * bw; y++) {
1185             for (x = 0; x < s->mb_width * bw; x++) {
1186                 int off = p->shared ? 0 : 16;
1187                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1188                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1189                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1190
1191                 switch (FFABS(s->avctx->frame_skip_exp)) {
1192                 case 0: score    =  FFMAX(score, v);          break;
1193                 case 1: score   += FFABS(v);                  break;
1194                 case 2: score64 += v * (int64_t)v;                       break;
1195                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1196                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1197                 }
1198             }
1199         }
1200     }
1201     emms_c();
1202
1203     if (score)
1204         score64 = score;
1205     if (s->avctx->frame_skip_exp < 0)
1206         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1207                       -1.0/s->avctx->frame_skip_exp);
1208
1209     if (score64 < s->avctx->frame_skip_threshold)
1210         return 1;
1211     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1212         return 1;
1213     return 0;
1214 }
1215
1216 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1217 {
1218     AVPacket pkt = { 0 };
1219     int ret, got_output;
1220
1221     av_init_packet(&pkt);
1222     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1223     if (ret < 0)
1224         return ret;
1225
1226     ret = pkt.size;
1227     av_free_packet(&pkt);
1228     return ret;
1229 }
1230
1231 static int estimate_best_b_count(MpegEncContext *s)
1232 {
1233     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1234     AVCodecContext *c = avcodec_alloc_context3(NULL);
1235     const int scale = s->avctx->brd_scale;
1236     int i, j, out_size, p_lambda, b_lambda, lambda2;
1237     int64_t best_rd  = INT64_MAX;
1238     int best_b_count = -1;
1239
1240     av_assert0(scale >= 0 && scale <= 3);
1241
1242     //emms_c();
1243     //s->next_picture_ptr->quality;
1244     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1245     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1246     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1247     if (!b_lambda) // FIXME we should do this somewhere else
1248         b_lambda = p_lambda;
1249     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1250                FF_LAMBDA_SHIFT;
1251
1252     c->width        = s->width  >> scale;
1253     c->height       = s->height >> scale;
1254     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1255     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1256     c->mb_decision  = s->avctx->mb_decision;
1257     c->me_cmp       = s->avctx->me_cmp;
1258     c->mb_cmp       = s->avctx->mb_cmp;
1259     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1260     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1261     c->time_base    = s->avctx->time_base;
1262     c->max_b_frames = s->max_b_frames;
1263
1264     if (avcodec_open2(c, codec, NULL) < 0)
1265         return -1;
1266
1267     for (i = 0; i < s->max_b_frames + 2; i++) {
1268         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1269                                                 s->next_picture_ptr;
1270         uint8_t *data[4];
1271
1272         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1273             pre_input = *pre_input_ptr;
1274             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1275
1276             if (!pre_input.shared && i) {
1277                 data[0] += INPLACE_OFFSET;
1278                 data[1] += INPLACE_OFFSET;
1279                 data[2] += INPLACE_OFFSET;
1280             }
1281
1282             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1283                                        s->tmp_frames[i]->linesize[0],
1284                                        data[0],
1285                                        pre_input.f->linesize[0],
1286                                        c->width, c->height);
1287             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1288                                        s->tmp_frames[i]->linesize[1],
1289                                        data[1],
1290                                        pre_input.f->linesize[1],
1291                                        c->width >> 1, c->height >> 1);
1292             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1293                                        s->tmp_frames[i]->linesize[2],
1294                                        data[2],
1295                                        pre_input.f->linesize[2],
1296                                        c->width >> 1, c->height >> 1);
1297         }
1298     }
1299
1300     for (j = 0; j < s->max_b_frames + 1; j++) {
1301         int64_t rd = 0;
1302
1303         if (!s->input_picture[j])
1304             break;
1305
1306         c->error[0] = c->error[1] = c->error[2] = 0;
1307
1308         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1309         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1310
1311         out_size = encode_frame(c, s->tmp_frames[0]);
1312
1313         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1314
1315         for (i = 0; i < s->max_b_frames + 1; i++) {
1316             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1317
1318             s->tmp_frames[i + 1]->pict_type = is_p ?
1319                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1320             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1321
1322             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1323
1324             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1325         }
1326
1327         /* get the delayed frames */
1328         while (out_size) {
1329             out_size = encode_frame(c, NULL);
1330             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1331         }
1332
1333         rd += c->error[0] + c->error[1] + c->error[2];
1334
1335         if (rd < best_rd) {
1336             best_rd = rd;
1337             best_b_count = j;
1338         }
1339     }
1340
1341     avcodec_close(c);
1342     av_freep(&c);
1343
1344     return best_b_count;
1345 }
1346
1347 static int select_input_picture(MpegEncContext *s)
1348 {
1349     int i, ret;
1350
1351     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1352         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1353     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1354
1355     /* set next picture type & ordering */
1356     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1357         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1358             if (s->picture_in_gop_number < s->gop_size &&
1359                 s->next_picture_ptr &&
1360                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1361                 // FIXME check that te gop check above is +-1 correct
1362                 av_frame_unref(s->input_picture[0]->f);
1363
1364                 ff_vbv_update(s, 0);
1365
1366                 goto no_output_pic;
1367             }
1368         }
1369
1370         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1371             s->next_picture_ptr == NULL || s->intra_only) {
1372             s->reordered_input_picture[0] = s->input_picture[0];
1373             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1374             s->reordered_input_picture[0]->f->coded_picture_number =
1375                 s->coded_picture_number++;
1376         } else {
1377             int b_frames;
1378
1379             if (s->flags & CODEC_FLAG_PASS2) {
1380                 for (i = 0; i < s->max_b_frames + 1; i++) {
1381                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1382
1383                     if (pict_num >= s->rc_context.num_entries)
1384                         break;
1385                     if (!s->input_picture[i]) {
1386                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1387                         break;
1388                     }
1389
1390                     s->input_picture[i]->f->pict_type =
1391                         s->rc_context.entry[pict_num].new_pict_type;
1392                 }
1393             }
1394
1395             if (s->avctx->b_frame_strategy == 0) {
1396                 b_frames = s->max_b_frames;
1397                 while (b_frames && !s->input_picture[b_frames])
1398                     b_frames--;
1399             } else if (s->avctx->b_frame_strategy == 1) {
1400                 for (i = 1; i < s->max_b_frames + 1; i++) {
1401                     if (s->input_picture[i] &&
1402                         s->input_picture[i]->b_frame_score == 0) {
1403                         s->input_picture[i]->b_frame_score =
1404                             get_intra_count(s,
1405                                             s->input_picture[i    ]->f->data[0],
1406                                             s->input_picture[i - 1]->f->data[0],
1407                                             s->linesize) + 1;
1408                     }
1409                 }
1410                 for (i = 0; i < s->max_b_frames + 1; i++) {
1411                     if (s->input_picture[i] == NULL ||
1412                         s->input_picture[i]->b_frame_score - 1 >
1413                             s->mb_num / s->avctx->b_sensitivity)
1414                         break;
1415                 }
1416
1417                 b_frames = FFMAX(0, i - 1);
1418
1419                 /* reset scores */
1420                 for (i = 0; i < b_frames + 1; i++) {
1421                     s->input_picture[i]->b_frame_score = 0;
1422                 }
1423             } else if (s->avctx->b_frame_strategy == 2) {
1424                 b_frames = estimate_best_b_count(s);
1425             } else {
1426                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1427                 b_frames = 0;
1428             }
1429
1430             emms_c();
1431
1432             for (i = b_frames - 1; i >= 0; i--) {
1433                 int type = s->input_picture[i]->f->pict_type;
1434                 if (type && type != AV_PICTURE_TYPE_B)
1435                     b_frames = i;
1436             }
1437             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1438                 b_frames == s->max_b_frames) {
1439                 av_log(s->avctx, AV_LOG_ERROR,
1440                        "warning, too many b frames in a row\n");
1441             }
1442
1443             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1444                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1445                     s->gop_size > s->picture_in_gop_number) {
1446                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1447                 } else {
1448                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1449                         b_frames = 0;
1450                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1451                 }
1452             }
1453
1454             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1455                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1456                 b_frames--;
1457
1458             s->reordered_input_picture[0] = s->input_picture[b_frames];
1459             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1460                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1461             s->reordered_input_picture[0]->f->coded_picture_number =
1462                 s->coded_picture_number++;
1463             for (i = 0; i < b_frames; i++) {
1464                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1465                 s->reordered_input_picture[i + 1]->f->pict_type =
1466                     AV_PICTURE_TYPE_B;
1467                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1468                     s->coded_picture_number++;
1469             }
1470         }
1471     }
1472 no_output_pic:
1473     if (s->reordered_input_picture[0]) {
1474         s->reordered_input_picture[0]->reference =
1475            s->reordered_input_picture[0]->f->pict_type !=
1476                AV_PICTURE_TYPE_B ? 3 : 0;
1477
1478         ff_mpeg_unref_picture(s, &s->new_picture);
1479         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1480             return ret;
1481
1482         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1483             // input is a shared pix, so we can't modifiy it -> alloc a new
1484             // one & ensure that the shared one is reuseable
1485
1486             Picture *pic;
1487             int i = ff_find_unused_picture(s, 0);
1488             if (i < 0)
1489                 return i;
1490             pic = &s->picture[i];
1491
1492             pic->reference = s->reordered_input_picture[0]->reference;
1493             if (ff_alloc_picture(s, pic, 0) < 0) {
1494                 return -1;
1495             }
1496
1497             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1498             if (ret < 0)
1499                 return ret;
1500
1501             /* mark us unused / free shared pic */
1502             av_frame_unref(s->reordered_input_picture[0]->f);
1503             s->reordered_input_picture[0]->shared = 0;
1504
1505             s->current_picture_ptr = pic;
1506         } else {
1507             // input is not a shared pix -> reuse buffer for current_pix
1508             s->current_picture_ptr = s->reordered_input_picture[0];
1509             for (i = 0; i < 4; i++) {
1510                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1511             }
1512         }
1513         ff_mpeg_unref_picture(s, &s->current_picture);
1514         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1515                                        s->current_picture_ptr)) < 0)
1516             return ret;
1517
1518         s->picture_number = s->new_picture.f->display_picture_number;
1519     } else {
1520         ff_mpeg_unref_picture(s, &s->new_picture);
1521     }
1522     return 0;
1523 }
1524
1525 static void frame_end(MpegEncContext *s)
1526 {
1527     if (s->unrestricted_mv &&
1528         s->current_picture.reference &&
1529         !s->intra_only) {
1530         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1531         int hshift = desc->log2_chroma_w;
1532         int vshift = desc->log2_chroma_h;
1533         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1534                                 s->current_picture.f->linesize[0],
1535                                 s->h_edge_pos, s->v_edge_pos,
1536                                 EDGE_WIDTH, EDGE_WIDTH,
1537                                 EDGE_TOP | EDGE_BOTTOM);
1538         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1539                                 s->current_picture.f->linesize[1],
1540                                 s->h_edge_pos >> hshift,
1541                                 s->v_edge_pos >> vshift,
1542                                 EDGE_WIDTH >> hshift,
1543                                 EDGE_WIDTH >> vshift,
1544                                 EDGE_TOP | EDGE_BOTTOM);
1545         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1546                                 s->current_picture.f->linesize[2],
1547                                 s->h_edge_pos >> hshift,
1548                                 s->v_edge_pos >> vshift,
1549                                 EDGE_WIDTH >> hshift,
1550                                 EDGE_WIDTH >> vshift,
1551                                 EDGE_TOP | EDGE_BOTTOM);
1552     }
1553
1554     emms_c();
1555
1556     s->last_pict_type                 = s->pict_type;
1557     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1558     if (s->pict_type!= AV_PICTURE_TYPE_B)
1559         s->last_non_b_pict_type = s->pict_type;
1560
1561     s->avctx->coded_frame = s->current_picture_ptr->f;
1562
1563 }
1564
1565 static void update_noise_reduction(MpegEncContext *s)
1566 {
1567     int intra, i;
1568
1569     for (intra = 0; intra < 2; intra++) {
1570         if (s->dct_count[intra] > (1 << 16)) {
1571             for (i = 0; i < 64; i++) {
1572                 s->dct_error_sum[intra][i] >>= 1;
1573             }
1574             s->dct_count[intra] >>= 1;
1575         }
1576
1577         for (i = 0; i < 64; i++) {
1578             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1579                                        s->dct_count[intra] +
1580                                        s->dct_error_sum[intra][i] / 2) /
1581                                       (s->dct_error_sum[intra][i] + 1);
1582         }
1583     }
1584 }
1585
1586 static int frame_start(MpegEncContext *s)
1587 {
1588     int ret;
1589
1590     /* mark & release old frames */
1591     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1592         s->last_picture_ptr != s->next_picture_ptr &&
1593         s->last_picture_ptr->f->buf[0]) {
1594         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1595     }
1596
1597     s->current_picture_ptr->f->pict_type = s->pict_type;
1598     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1599
1600     ff_mpeg_unref_picture(s, &s->current_picture);
1601     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1602                                    s->current_picture_ptr)) < 0)
1603         return ret;
1604
1605     if (s->pict_type != AV_PICTURE_TYPE_B) {
1606         s->last_picture_ptr = s->next_picture_ptr;
1607         if (!s->droppable)
1608             s->next_picture_ptr = s->current_picture_ptr;
1609     }
1610
1611     if (s->last_picture_ptr) {
1612         ff_mpeg_unref_picture(s, &s->last_picture);
1613         if (s->last_picture_ptr->f->buf[0] &&
1614             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1615                                        s->last_picture_ptr)) < 0)
1616             return ret;
1617     }
1618     if (s->next_picture_ptr) {
1619         ff_mpeg_unref_picture(s, &s->next_picture);
1620         if (s->next_picture_ptr->f->buf[0] &&
1621             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1622                                        s->next_picture_ptr)) < 0)
1623             return ret;
1624     }
1625
1626     if (s->picture_structure!= PICT_FRAME) {
1627         int i;
1628         for (i = 0; i < 4; i++) {
1629             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1630                 s->current_picture.f->data[i] +=
1631                     s->current_picture.f->linesize[i];
1632             }
1633             s->current_picture.f->linesize[i] *= 2;
1634             s->last_picture.f->linesize[i]    *= 2;
1635             s->next_picture.f->linesize[i]    *= 2;
1636         }
1637     }
1638
1639     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1640         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1641         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1642     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1643         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1644         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1645     } else {
1646         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1647         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1648     }
1649
1650     if (s->dct_error_sum) {
1651         av_assert2(s->avctx->noise_reduction && s->encoding);
1652         update_noise_reduction(s);
1653     }
1654
1655     return 0;
1656 }
1657
1658 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1659                           const AVFrame *pic_arg, int *got_packet)
1660 {
1661     MpegEncContext *s = avctx->priv_data;
1662     int i, stuffing_count, ret;
1663     int context_count = s->slice_context_count;
1664
1665     s->picture_in_gop_number++;
1666
1667     if (load_input_picture(s, pic_arg) < 0)
1668         return -1;
1669
1670     if (select_input_picture(s) < 0) {
1671         return -1;
1672     }
1673
1674     /* output? */
1675     if (s->new_picture.f->data[0]) {
1676         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1677             return ret;
1678         if (s->mb_info) {
1679             s->mb_info_ptr = av_packet_new_side_data(pkt,
1680                                  AV_PKT_DATA_H263_MB_INFO,
1681                                  s->mb_width*s->mb_height*12);
1682             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1683         }
1684
1685         for (i = 0; i < context_count; i++) {
1686             int start_y = s->thread_context[i]->start_mb_y;
1687             int   end_y = s->thread_context[i]->  end_mb_y;
1688             int h       = s->mb_height;
1689             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1690             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1691
1692             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1693         }
1694
1695         s->pict_type = s->new_picture.f->pict_type;
1696         //emms_c();
1697         ret = frame_start(s);
1698         if (ret < 0)
1699             return ret;
1700 vbv_retry:
1701         if (encode_picture(s, s->picture_number) < 0)
1702             return -1;
1703
1704         avctx->header_bits = s->header_bits;
1705         avctx->mv_bits     = s->mv_bits;
1706         avctx->misc_bits   = s->misc_bits;
1707         avctx->i_tex_bits  = s->i_tex_bits;
1708         avctx->p_tex_bits  = s->p_tex_bits;
1709         avctx->i_count     = s->i_count;
1710         // FIXME f/b_count in avctx
1711         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1712         avctx->skip_count  = s->skip_count;
1713
1714         frame_end(s);
1715
1716         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1717             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1718
1719         if (avctx->rc_buffer_size) {
1720             RateControlContext *rcc = &s->rc_context;
1721             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1722
1723             if (put_bits_count(&s->pb) > max_size &&
1724                 s->lambda < s->avctx->lmax) {
1725                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1726                                        (s->qscale + 1) / s->qscale);
1727                 if (s->adaptive_quant) {
1728                     int i;
1729                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1730                         s->lambda_table[i] =
1731                             FFMAX(s->lambda_table[i] + 1,
1732                                   s->lambda_table[i] * (s->qscale + 1) /
1733                                   s->qscale);
1734                 }
1735                 s->mb_skipped = 0;        // done in frame_start()
1736                 // done in encode_picture() so we must undo it
1737                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1738                     if (s->flipflop_rounding          ||
1739                         s->codec_id == AV_CODEC_ID_H263P ||
1740                         s->codec_id == AV_CODEC_ID_MPEG4)
1741                         s->no_rounding ^= 1;
1742                 }
1743                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1744                     s->time_base       = s->last_time_base;
1745                     s->last_non_b_time = s->time - s->pp_time;
1746                 }
1747                 for (i = 0; i < context_count; i++) {
1748                     PutBitContext *pb = &s->thread_context[i]->pb;
1749                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1750                 }
1751                 goto vbv_retry;
1752             }
1753
1754             av_assert0(s->avctx->rc_max_rate);
1755         }
1756
1757         if (s->flags & CODEC_FLAG_PASS1)
1758             ff_write_pass1_stats(s);
1759
1760         for (i = 0; i < 4; i++) {
1761             s->current_picture_ptr->f->error[i] =
1762             s->current_picture.f->error[i] =
1763                 s->current_picture.error[i];
1764             avctx->error[i] += s->current_picture_ptr->f->error[i];
1765         }
1766
1767         if (s->flags & CODEC_FLAG_PASS1)
1768             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1769                    avctx->i_tex_bits + avctx->p_tex_bits ==
1770                        put_bits_count(&s->pb));
1771         flush_put_bits(&s->pb);
1772         s->frame_bits  = put_bits_count(&s->pb);
1773
1774         stuffing_count = ff_vbv_update(s, s->frame_bits);
1775         s->stuffing_bits = 8*stuffing_count;
1776         if (stuffing_count) {
1777             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1778                     stuffing_count + 50) {
1779                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1780                 return -1;
1781             }
1782
1783             switch (s->codec_id) {
1784             case AV_CODEC_ID_MPEG1VIDEO:
1785             case AV_CODEC_ID_MPEG2VIDEO:
1786                 while (stuffing_count--) {
1787                     put_bits(&s->pb, 8, 0);
1788                 }
1789             break;
1790             case AV_CODEC_ID_MPEG4:
1791                 put_bits(&s->pb, 16, 0);
1792                 put_bits(&s->pb, 16, 0x1C3);
1793                 stuffing_count -= 4;
1794                 while (stuffing_count--) {
1795                     put_bits(&s->pb, 8, 0xFF);
1796                 }
1797             break;
1798             default:
1799                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1800             }
1801             flush_put_bits(&s->pb);
1802             s->frame_bits  = put_bits_count(&s->pb);
1803         }
1804
1805         /* update mpeg1/2 vbv_delay for CBR */
1806         if (s->avctx->rc_max_rate                          &&
1807             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1808             s->out_format == FMT_MPEG1                     &&
1809             90000LL * (avctx->rc_buffer_size - 1) <=
1810                 s->avctx->rc_max_rate * 0xFFFFLL) {
1811             int vbv_delay, min_delay;
1812             double inbits  = s->avctx->rc_max_rate *
1813                              av_q2d(s->avctx->time_base);
1814             int    minbits = s->frame_bits - 8 *
1815                              (s->vbv_delay_ptr - s->pb.buf - 1);
1816             double bits    = s->rc_context.buffer_index + minbits - inbits;
1817
1818             if (bits < 0)
1819                 av_log(s->avctx, AV_LOG_ERROR,
1820                        "Internal error, negative bits\n");
1821
1822             assert(s->repeat_first_field == 0);
1823
1824             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1825             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1826                         s->avctx->rc_max_rate;
1827
1828             vbv_delay = FFMAX(vbv_delay, min_delay);
1829
1830             av_assert0(vbv_delay < 0xFFFF);
1831
1832             s->vbv_delay_ptr[0] &= 0xF8;
1833             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1834             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1835             s->vbv_delay_ptr[2] &= 0x07;
1836             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1837             avctx->vbv_delay     = vbv_delay * 300;
1838         }
1839         s->total_bits     += s->frame_bits;
1840         avctx->frame_bits  = s->frame_bits;
1841
1842         pkt->pts = s->current_picture.f->pts;
1843         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1844             if (!s->current_picture.f->coded_picture_number)
1845                 pkt->dts = pkt->pts - s->dts_delta;
1846             else
1847                 pkt->dts = s->reordered_pts;
1848             s->reordered_pts = pkt->pts;
1849         } else
1850             pkt->dts = pkt->pts;
1851         if (s->current_picture.f->key_frame)
1852             pkt->flags |= AV_PKT_FLAG_KEY;
1853         if (s->mb_info)
1854             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1855     } else {
1856         s->frame_bits = 0;
1857     }
1858
1859     /* release non-reference frames */
1860     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1861         if (!s->picture[i].reference)
1862             ff_mpeg_unref_picture(s, &s->picture[i]);
1863     }
1864
1865     av_assert1((s->frame_bits & 7) == 0);
1866
1867     pkt->size = s->frame_bits / 8;
1868     *got_packet = !!pkt->size;
1869     return 0;
1870 }
1871
1872 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1873                                                 int n, int threshold)
1874 {
1875     static const char tab[64] = {
1876         3, 2, 2, 1, 1, 1, 1, 1,
1877         1, 1, 1, 1, 1, 1, 1, 1,
1878         1, 1, 1, 1, 1, 1, 1, 1,
1879         0, 0, 0, 0, 0, 0, 0, 0,
1880         0, 0, 0, 0, 0, 0, 0, 0,
1881         0, 0, 0, 0, 0, 0, 0, 0,
1882         0, 0, 0, 0, 0, 0, 0, 0,
1883         0, 0, 0, 0, 0, 0, 0, 0
1884     };
1885     int score = 0;
1886     int run = 0;
1887     int i;
1888     int16_t *block = s->block[n];
1889     const int last_index = s->block_last_index[n];
1890     int skip_dc;
1891
1892     if (threshold < 0) {
1893         skip_dc = 0;
1894         threshold = -threshold;
1895     } else
1896         skip_dc = 1;
1897
1898     /* Are all we could set to zero already zero? */
1899     if (last_index <= skip_dc - 1)
1900         return;
1901
1902     for (i = 0; i <= last_index; i++) {
1903         const int j = s->intra_scantable.permutated[i];
1904         const int level = FFABS(block[j]);
1905         if (level == 1) {
1906             if (skip_dc && i == 0)
1907                 continue;
1908             score += tab[run];
1909             run = 0;
1910         } else if (level > 1) {
1911             return;
1912         } else {
1913             run++;
1914         }
1915     }
1916     if (score >= threshold)
1917         return;
1918     for (i = skip_dc; i <= last_index; i++) {
1919         const int j = s->intra_scantable.permutated[i];
1920         block[j] = 0;
1921     }
1922     if (block[0])
1923         s->block_last_index[n] = 0;
1924     else
1925         s->block_last_index[n] = -1;
1926 }
1927
1928 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1929                                int last_index)
1930 {
1931     int i;
1932     const int maxlevel = s->max_qcoeff;
1933     const int minlevel = s->min_qcoeff;
1934     int overflow = 0;
1935
1936     if (s->mb_intra) {
1937         i = 1; // skip clipping of intra dc
1938     } else
1939         i = 0;
1940
1941     for (; i <= last_index; i++) {
1942         const int j = s->intra_scantable.permutated[i];
1943         int level = block[j];
1944
1945         if (level > maxlevel) {
1946             level = maxlevel;
1947             overflow++;
1948         } else if (level < minlevel) {
1949             level = minlevel;
1950             overflow++;
1951         }
1952
1953         block[j] = level;
1954     }
1955
1956     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1957         av_log(s->avctx, AV_LOG_INFO,
1958                "warning, clipping %d dct coefficients to %d..%d\n",
1959                overflow, minlevel, maxlevel);
1960 }
1961
1962 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1963 {
1964     int x, y;
1965     // FIXME optimize
1966     for (y = 0; y < 8; y++) {
1967         for (x = 0; x < 8; x++) {
1968             int x2, y2;
1969             int sum = 0;
1970             int sqr = 0;
1971             int count = 0;
1972
1973             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1974                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1975                     int v = ptr[x2 + y2 * stride];
1976                     sum += v;
1977                     sqr += v * v;
1978                     count++;
1979                 }
1980             }
1981             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1982         }
1983     }
1984 }
1985
1986 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1987                                                 int motion_x, int motion_y,
1988                                                 int mb_block_height,
1989                                                 int mb_block_width,
1990                                                 int mb_block_count)
1991 {
1992     int16_t weight[12][64];
1993     int16_t orig[12][64];
1994     const int mb_x = s->mb_x;
1995     const int mb_y = s->mb_y;
1996     int i;
1997     int skip_dct[12];
1998     int dct_offset = s->linesize * 8; // default for progressive frames
1999     int uv_dct_offset = s->uvlinesize * 8;
2000     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2001     ptrdiff_t wrap_y, wrap_c;
2002
2003     for (i = 0; i < mb_block_count; i++)
2004         skip_dct[i] = s->skipdct;
2005
2006     if (s->adaptive_quant) {
2007         const int last_qp = s->qscale;
2008         const int mb_xy = mb_x + mb_y * s->mb_stride;
2009
2010         s->lambda = s->lambda_table[mb_xy];
2011         update_qscale(s);
2012
2013         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2014             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2015             s->dquant = s->qscale - last_qp;
2016
2017             if (s->out_format == FMT_H263) {
2018                 s->dquant = av_clip(s->dquant, -2, 2);
2019
2020                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2021                     if (!s->mb_intra) {
2022                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2023                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2024                                 s->dquant = 0;
2025                         }
2026                         if (s->mv_type == MV_TYPE_8X8)
2027                             s->dquant = 0;
2028                     }
2029                 }
2030             }
2031         }
2032         ff_set_qscale(s, last_qp + s->dquant);
2033     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2034         ff_set_qscale(s, s->qscale + s->dquant);
2035
2036     wrap_y = s->linesize;
2037     wrap_c = s->uvlinesize;
2038     ptr_y  = s->new_picture.f->data[0] +
2039              (mb_y * 16 * wrap_y)              + mb_x * 16;
2040     ptr_cb = s->new_picture.f->data[1] +
2041              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2042     ptr_cr = s->new_picture.f->data[2] +
2043              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2044
2045     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2046         uint8_t *ebuf = s->edge_emu_buffer + 36 * wrap_y;
2047         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2048         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2049         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2050                                  wrap_y, wrap_y,
2051                                  16, 16, mb_x * 16, mb_y * 16,
2052                                  s->width, s->height);
2053         ptr_y = ebuf;
2054         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2055                                  wrap_c, wrap_c,
2056                                  mb_block_width, mb_block_height,
2057                                  mb_x * mb_block_width, mb_y * mb_block_height,
2058                                  cw, ch);
2059         ptr_cb = ebuf + 16 * wrap_y;
2060         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2061                                  wrap_c, wrap_c,
2062                                  mb_block_width, mb_block_height,
2063                                  mb_x * mb_block_width, mb_y * mb_block_height,
2064                                  cw, ch);
2065         ptr_cr = ebuf + 16 * wrap_y + 16;
2066     }
2067
2068     if (s->mb_intra) {
2069         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2070             int progressive_score, interlaced_score;
2071
2072             s->interlaced_dct = 0;
2073             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
2074                                                     NULL, wrap_y, 8) +
2075                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2076                                                     NULL, wrap_y, 8) - 400;
2077
2078             if (progressive_score > 0) {
2079                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
2080                                                        NULL, wrap_y * 2, 8) +
2081                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
2082                                                        NULL, wrap_y * 2, 8);
2083                 if (progressive_score > interlaced_score) {
2084                     s->interlaced_dct = 1;
2085
2086                     dct_offset = wrap_y;
2087                     uv_dct_offset = wrap_c;
2088                     wrap_y <<= 1;
2089                     if (s->chroma_format == CHROMA_422 ||
2090                         s->chroma_format == CHROMA_444)
2091                         wrap_c <<= 1;
2092                 }
2093             }
2094         }
2095
2096         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
2097         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
2098         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
2099         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
2100
2101         if (s->flags & CODEC_FLAG_GRAY) {
2102             skip_dct[4] = 1;
2103             skip_dct[5] = 1;
2104         } else {
2105             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2106             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2107             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2108                 s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2109                 s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2110             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2111                 s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c);
2112                 s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c);
2113                 s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c);
2114                 s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c);
2115                 s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2116                 s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2117             }
2118         }
2119     } else {
2120         op_pixels_func (*op_pix)[4];
2121         qpel_mc_func (*op_qpix)[16];
2122         uint8_t *dest_y, *dest_cb, *dest_cr;
2123
2124         dest_y  = s->dest[0];
2125         dest_cb = s->dest[1];
2126         dest_cr = s->dest[2];
2127
2128         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2129             op_pix  = s->hdsp.put_pixels_tab;
2130             op_qpix = s->qdsp.put_qpel_pixels_tab;
2131         } else {
2132             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2133             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2134         }
2135
2136         if (s->mv_dir & MV_DIR_FORWARD) {
2137             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
2138                           s->last_picture.f->data,
2139                           op_pix, op_qpix);
2140             op_pix  = s->hdsp.avg_pixels_tab;
2141             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2142         }
2143         if (s->mv_dir & MV_DIR_BACKWARD) {
2144             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
2145                           s->next_picture.f->data,
2146                           op_pix, op_qpix);
2147         }
2148
2149         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2150             int progressive_score, interlaced_score;
2151
2152             s->interlaced_dct = 0;
2153             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
2154                                                     ptr_y,              wrap_y,
2155                                                     8) +
2156                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
2157                                                     ptr_y + wrap_y * 8, wrap_y,
2158                                                     8) - 400;
2159
2160             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2161                 progressive_score -= 400;
2162
2163             if (progressive_score > 0) {
2164                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
2165                                                        ptr_y,
2166                                                        wrap_y * 2, 8) +
2167                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
2168                                                        ptr_y + wrap_y,
2169                                                        wrap_y * 2, 8);
2170
2171                 if (progressive_score > interlaced_score) {
2172                     s->interlaced_dct = 1;
2173
2174                     dct_offset = wrap_y;
2175                     uv_dct_offset = wrap_c;
2176                     wrap_y <<= 1;
2177                     if (s->chroma_format == CHROMA_422)
2178                         wrap_c <<= 1;
2179                 }
2180             }
2181         }
2182
2183         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2184         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2185         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2186                            dest_y + dct_offset, wrap_y);
2187         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2188                            dest_y + dct_offset + 8, wrap_y);
2189
2190         if (s->flags & CODEC_FLAG_GRAY) {
2191             skip_dct[4] = 1;
2192             skip_dct[5] = 1;
2193         } else {
2194             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2195             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2196             if (!s->chroma_y_shift) { /* 422 */
2197                 s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2198                                    dest_cb + uv_dct_offset, wrap_c);
2199                 s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2200                                    dest_cr + uv_dct_offset, wrap_c);
2201             }
2202         }
2203         /* pre quantization */
2204         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2205                 2 * s->qscale * s->qscale) {
2206             // FIXME optimize
2207             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2208                               wrap_y, 8) < 20 * s->qscale)
2209                 skip_dct[0] = 1;
2210             if (s->dsp.sad[1](NULL, ptr_y + 8,
2211                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2212                 skip_dct[1] = 1;
2213             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2214                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2215                 skip_dct[2] = 1;
2216             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2217                               dest_y + dct_offset + 8,
2218                               wrap_y, 8) < 20 * s->qscale)
2219                 skip_dct[3] = 1;
2220             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2221                               wrap_c, 8) < 20 * s->qscale)
2222                 skip_dct[4] = 1;
2223             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2224                               wrap_c, 8) < 20 * s->qscale)
2225                 skip_dct[5] = 1;
2226             if (!s->chroma_y_shift) { /* 422 */
2227                 if (s->dsp.sad[1](NULL, ptr_cb + uv_dct_offset,
2228                                   dest_cb + uv_dct_offset,
2229                                   wrap_c, 8) < 20 * s->qscale)
2230                     skip_dct[6] = 1;
2231                 if (s->dsp.sad[1](NULL, ptr_cr + uv_dct_offset,
2232                                   dest_cr + uv_dct_offset,
2233                                   wrap_c, 8) < 20 * s->qscale)
2234                     skip_dct[7] = 1;
2235             }
2236         }
2237     }
2238
2239     if (s->quantizer_noise_shaping) {
2240         if (!skip_dct[0])
2241             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2242         if (!skip_dct[1])
2243             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2244         if (!skip_dct[2])
2245             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2246         if (!skip_dct[3])
2247             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2248         if (!skip_dct[4])
2249             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2250         if (!skip_dct[5])
2251             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2252         if (!s->chroma_y_shift) { /* 422 */
2253             if (!skip_dct[6])
2254                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2255                                   wrap_c);
2256             if (!skip_dct[7])
2257                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2258                                   wrap_c);
2259         }
2260         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2261     }
2262
2263     /* DCT & quantize */
2264     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2265     {
2266         for (i = 0; i < mb_block_count; i++) {
2267             if (!skip_dct[i]) {
2268                 int overflow;
2269                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2270                 // FIXME we could decide to change to quantizer instead of
2271                 // clipping
2272                 // JS: I don't think that would be a good idea it could lower
2273                 //     quality instead of improve it. Just INTRADC clipping
2274                 //     deserves changes in quantizer
2275                 if (overflow)
2276                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2277             } else
2278                 s->block_last_index[i] = -1;
2279         }
2280         if (s->quantizer_noise_shaping) {
2281             for (i = 0; i < mb_block_count; i++) {
2282                 if (!skip_dct[i]) {
2283                     s->block_last_index[i] =
2284                         dct_quantize_refine(s, s->block[i], weight[i],
2285                                             orig[i], i, s->qscale);
2286                 }
2287             }
2288         }
2289
2290         if (s->luma_elim_threshold && !s->mb_intra)
2291             for (i = 0; i < 4; i++)
2292                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2293         if (s->chroma_elim_threshold && !s->mb_intra)
2294             for (i = 4; i < mb_block_count; i++)
2295                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2296
2297         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2298             for (i = 0; i < mb_block_count; i++) {
2299                 if (s->block_last_index[i] == -1)
2300                     s->coded_score[i] = INT_MAX / 256;
2301             }
2302         }
2303     }
2304
2305     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2306         s->block_last_index[4] =
2307         s->block_last_index[5] = 0;
2308         s->block[4][0] =
2309         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2310         if (!s->chroma_y_shift) { /* 422 / 444 */
2311             for (i=6; i<12; i++) {
2312                 s->block_last_index[i] = 0;
2313                 s->block[i][0] = s->block[4][0];
2314             }
2315         }
2316     }
2317
2318     // non c quantize code returns incorrect block_last_index FIXME
2319     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2320         for (i = 0; i < mb_block_count; i++) {
2321             int j;
2322             if (s->block_last_index[i] > 0) {
2323                 for (j = 63; j > 0; j--) {
2324                     if (s->block[i][s->intra_scantable.permutated[j]])
2325                         break;
2326                 }
2327                 s->block_last_index[i] = j;
2328             }
2329         }
2330     }
2331
2332     /* huffman encode */
2333     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2334     case AV_CODEC_ID_MPEG1VIDEO:
2335     case AV_CODEC_ID_MPEG2VIDEO:
2336         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2337             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2338         break;
2339     case AV_CODEC_ID_MPEG4:
2340         if (CONFIG_MPEG4_ENCODER)
2341             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2342         break;
2343     case AV_CODEC_ID_MSMPEG4V2:
2344     case AV_CODEC_ID_MSMPEG4V3:
2345     case AV_CODEC_ID_WMV1:
2346         if (CONFIG_MSMPEG4_ENCODER)
2347             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2348         break;
2349     case AV_CODEC_ID_WMV2:
2350         if (CONFIG_WMV2_ENCODER)
2351             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2352         break;
2353     case AV_CODEC_ID_H261:
2354         if (CONFIG_H261_ENCODER)
2355             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2356         break;
2357     case AV_CODEC_ID_H263:
2358     case AV_CODEC_ID_H263P:
2359     case AV_CODEC_ID_FLV1:
2360     case AV_CODEC_ID_RV10:
2361     case AV_CODEC_ID_RV20:
2362         if (CONFIG_H263_ENCODER)
2363             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2364         break;
2365     case AV_CODEC_ID_MJPEG:
2366     case AV_CODEC_ID_AMV:
2367         if (CONFIG_MJPEG_ENCODER)
2368             ff_mjpeg_encode_mb(s, s->block);
2369         break;
2370     default:
2371         av_assert1(0);
2372     }
2373 }
2374
2375 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2376 {
2377     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2378     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2379     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2380 }
2381
2382 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2383     int i;
2384
2385     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2386
2387     /* mpeg1 */
2388     d->mb_skip_run= s->mb_skip_run;
2389     for(i=0; i<3; i++)
2390         d->last_dc[i] = s->last_dc[i];
2391
2392     /* statistics */
2393     d->mv_bits= s->mv_bits;
2394     d->i_tex_bits= s->i_tex_bits;
2395     d->p_tex_bits= s->p_tex_bits;
2396     d->i_count= s->i_count;
2397     d->f_count= s->f_count;
2398     d->b_count= s->b_count;
2399     d->skip_count= s->skip_count;
2400     d->misc_bits= s->misc_bits;
2401     d->last_bits= 0;
2402
2403     d->mb_skipped= 0;
2404     d->qscale= s->qscale;
2405     d->dquant= s->dquant;
2406
2407     d->esc3_level_length= s->esc3_level_length;
2408 }
2409
2410 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2411     int i;
2412
2413     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2414     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2415
2416     /* mpeg1 */
2417     d->mb_skip_run= s->mb_skip_run;
2418     for(i=0; i<3; i++)
2419         d->last_dc[i] = s->last_dc[i];
2420
2421     /* statistics */
2422     d->mv_bits= s->mv_bits;
2423     d->i_tex_bits= s->i_tex_bits;
2424     d->p_tex_bits= s->p_tex_bits;
2425     d->i_count= s->i_count;
2426     d->f_count= s->f_count;
2427     d->b_count= s->b_count;
2428     d->skip_count= s->skip_count;
2429     d->misc_bits= s->misc_bits;
2430
2431     d->mb_intra= s->mb_intra;
2432     d->mb_skipped= s->mb_skipped;
2433     d->mv_type= s->mv_type;
2434     d->mv_dir= s->mv_dir;
2435     d->pb= s->pb;
2436     if(s->data_partitioning){
2437         d->pb2= s->pb2;
2438         d->tex_pb= s->tex_pb;
2439     }
2440     d->block= s->block;
2441     for(i=0; i<8; i++)
2442         d->block_last_index[i]= s->block_last_index[i];
2443     d->interlaced_dct= s->interlaced_dct;
2444     d->qscale= s->qscale;
2445
2446     d->esc3_level_length= s->esc3_level_length;
2447 }
2448
2449 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2450                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2451                            int *dmin, int *next_block, int motion_x, int motion_y)
2452 {
2453     int score;
2454     uint8_t *dest_backup[3];
2455
2456     copy_context_before_encode(s, backup, type);
2457
2458     s->block= s->blocks[*next_block];
2459     s->pb= pb[*next_block];
2460     if(s->data_partitioning){
2461         s->pb2   = pb2   [*next_block];
2462         s->tex_pb= tex_pb[*next_block];
2463     }
2464
2465     if(*next_block){
2466         memcpy(dest_backup, s->dest, sizeof(s->dest));
2467         s->dest[0] = s->rd_scratchpad;
2468         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2469         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2470         av_assert0(s->linesize >= 32); //FIXME
2471     }
2472
2473     encode_mb(s, motion_x, motion_y);
2474
2475     score= put_bits_count(&s->pb);
2476     if(s->data_partitioning){
2477         score+= put_bits_count(&s->pb2);
2478         score+= put_bits_count(&s->tex_pb);
2479     }
2480
2481     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2482         ff_MPV_decode_mb(s, s->block);
2483
2484         score *= s->lambda2;
2485         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2486     }
2487
2488     if(*next_block){
2489         memcpy(s->dest, dest_backup, sizeof(s->dest));
2490     }
2491
2492     if(score<*dmin){
2493         *dmin= score;
2494         *next_block^=1;
2495
2496         copy_context_after_encode(best, s, type);
2497     }
2498 }
2499
2500 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2501     uint32_t *sq = ff_square_tab + 256;
2502     int acc=0;
2503     int x,y;
2504
2505     if(w==16 && h==16)
2506         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2507     else if(w==8 && h==8)
2508         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2509
2510     for(y=0; y<h; y++){
2511         for(x=0; x<w; x++){
2512             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2513         }
2514     }
2515
2516     av_assert2(acc>=0);
2517
2518     return acc;
2519 }
2520
2521 static int sse_mb(MpegEncContext *s){
2522     int w= 16;
2523     int h= 16;
2524
2525     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2526     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2527
2528     if(w==16 && h==16)
2529       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2530         return  s->dsp.nsse[0](s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2531                +s->dsp.nsse[1](s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2532                +s->dsp.nsse[1](s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2533       }else{
2534         return  s->dsp.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2535                +s->dsp.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2536                +s->dsp.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2537       }
2538     else
2539         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2540                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2541                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2542 }
2543
2544 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2545     MpegEncContext *s= *(void**)arg;
2546
2547
2548     s->me.pre_pass=1;
2549     s->me.dia_size= s->avctx->pre_dia_size;
2550     s->first_slice_line=1;
2551     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2552         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2553             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2554         }
2555         s->first_slice_line=0;
2556     }
2557
2558     s->me.pre_pass=0;
2559
2560     return 0;
2561 }
2562
2563 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2564     MpegEncContext *s= *(void**)arg;
2565
2566     ff_check_alignment();
2567
2568     s->me.dia_size= s->avctx->dia_size;
2569     s->first_slice_line=1;
2570     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2571         s->mb_x=0; //for block init below
2572         ff_init_block_index(s);
2573         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2574             s->block_index[0]+=2;
2575             s->block_index[1]+=2;
2576             s->block_index[2]+=2;
2577             s->block_index[3]+=2;
2578
2579             /* compute motion vector & mb_type and store in context */
2580             if(s->pict_type==AV_PICTURE_TYPE_B)
2581                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2582             else
2583                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2584         }
2585         s->first_slice_line=0;
2586     }
2587     return 0;
2588 }
2589
2590 static int mb_var_thread(AVCodecContext *c, void *arg){
2591     MpegEncContext *s= *(void**)arg;
2592     int mb_x, mb_y;
2593
2594     ff_check_alignment();
2595
2596     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2597         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2598             int xx = mb_x * 16;
2599             int yy = mb_y * 16;
2600             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2601             int varc;
2602             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2603
2604             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2605                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2606
2607             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2608             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2609             s->me.mb_var_sum_temp    += varc;
2610         }
2611     }
2612     return 0;
2613 }
2614
2615 static void write_slice_end(MpegEncContext *s){
2616     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2617         if(s->partitioned_frame){
2618             ff_mpeg4_merge_partitions(s);
2619         }
2620
2621         ff_mpeg4_stuffing(&s->pb);
2622     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2623         ff_mjpeg_encode_stuffing(s);
2624     }
2625
2626     avpriv_align_put_bits(&s->pb);
2627     flush_put_bits(&s->pb);
2628
2629     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2630         s->misc_bits+= get_bits_diff(s);
2631 }
2632
2633 static void write_mb_info(MpegEncContext *s)
2634 {
2635     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2636     int offset = put_bits_count(&s->pb);
2637     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2638     int gobn = s->mb_y / s->gob_index;
2639     int pred_x, pred_y;
2640     if (CONFIG_H263_ENCODER)
2641         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2642     bytestream_put_le32(&ptr, offset);
2643     bytestream_put_byte(&ptr, s->qscale);
2644     bytestream_put_byte(&ptr, gobn);
2645     bytestream_put_le16(&ptr, mba);
2646     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2647     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2648     /* 4MV not implemented */
2649     bytestream_put_byte(&ptr, 0); /* hmv2 */
2650     bytestream_put_byte(&ptr, 0); /* vmv2 */
2651 }
2652
2653 static void update_mb_info(MpegEncContext *s, int startcode)
2654 {
2655     if (!s->mb_info)
2656         return;
2657     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2658         s->mb_info_size += 12;
2659         s->prev_mb_info = s->last_mb_info;
2660     }
2661     if (startcode) {
2662         s->prev_mb_info = put_bits_count(&s->pb)/8;
2663         /* This might have incremented mb_info_size above, and we return without
2664          * actually writing any info into that slot yet. But in that case,
2665          * this will be called again at the start of the after writing the
2666          * start code, actually writing the mb info. */
2667         return;
2668     }
2669
2670     s->last_mb_info = put_bits_count(&s->pb)/8;
2671     if (!s->mb_info_size)
2672         s->mb_info_size += 12;
2673     write_mb_info(s);
2674 }
2675
2676 static int encode_thread(AVCodecContext *c, void *arg){
2677     MpegEncContext *s= *(void**)arg;
2678     int mb_x, mb_y, pdif = 0;
2679     int chr_h= 16>>s->chroma_y_shift;
2680     int i, j;
2681     MpegEncContext best_s, backup_s;
2682     uint8_t bit_buf[2][MAX_MB_BYTES];
2683     uint8_t bit_buf2[2][MAX_MB_BYTES];
2684     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2685     PutBitContext pb[2], pb2[2], tex_pb[2];
2686
2687     ff_check_alignment();
2688
2689     for(i=0; i<2; i++){
2690         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2691         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2692         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2693     }
2694
2695     s->last_bits= put_bits_count(&s->pb);
2696     s->mv_bits=0;
2697     s->misc_bits=0;
2698     s->i_tex_bits=0;
2699     s->p_tex_bits=0;
2700     s->i_count=0;
2701     s->f_count=0;
2702     s->b_count=0;
2703     s->skip_count=0;
2704
2705     for(i=0; i<3; i++){
2706         /* init last dc values */
2707         /* note: quant matrix value (8) is implied here */
2708         s->last_dc[i] = 128 << s->intra_dc_precision;
2709
2710         s->current_picture.error[i] = 0;
2711     }
2712     if(s->codec_id==AV_CODEC_ID_AMV){
2713         s->last_dc[0] = 128*8/13;
2714         s->last_dc[1] = 128*8/14;
2715         s->last_dc[2] = 128*8/14;
2716     }
2717     s->mb_skip_run = 0;
2718     memset(s->last_mv, 0, sizeof(s->last_mv));
2719
2720     s->last_mv_dir = 0;
2721
2722     switch(s->codec_id){
2723     case AV_CODEC_ID_H263:
2724     case AV_CODEC_ID_H263P:
2725     case AV_CODEC_ID_FLV1:
2726         if (CONFIG_H263_ENCODER)
2727             s->gob_index = ff_h263_get_gob_height(s);
2728         break;
2729     case AV_CODEC_ID_MPEG4:
2730         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2731             ff_mpeg4_init_partitions(s);
2732         break;
2733     }
2734
2735     s->resync_mb_x=0;
2736     s->resync_mb_y=0;
2737     s->first_slice_line = 1;
2738     s->ptr_lastgob = s->pb.buf;
2739     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2740         s->mb_x=0;
2741         s->mb_y= mb_y;
2742
2743         ff_set_qscale(s, s->qscale);
2744         ff_init_block_index(s);
2745
2746         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2747             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2748             int mb_type= s->mb_type[xy];
2749 //            int d;
2750             int dmin= INT_MAX;
2751             int dir;
2752
2753             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2754                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2755                 return -1;
2756             }
2757             if(s->data_partitioning){
2758                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2759                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2760                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2761                     return -1;
2762                 }
2763             }
2764
2765             s->mb_x = mb_x;
2766             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2767             ff_update_block_index(s);
2768
2769             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2770                 ff_h261_reorder_mb_index(s);
2771                 xy= s->mb_y*s->mb_stride + s->mb_x;
2772                 mb_type= s->mb_type[xy];
2773             }
2774
2775             /* write gob / video packet header  */
2776             if(s->rtp_mode){
2777                 int current_packet_size, is_gob_start;
2778
2779                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2780
2781                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2782
2783                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2784
2785                 switch(s->codec_id){
2786                 case AV_CODEC_ID_H263:
2787                 case AV_CODEC_ID_H263P:
2788                     if(!s->h263_slice_structured)
2789                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2790                     break;
2791                 case AV_CODEC_ID_MPEG2VIDEO:
2792                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2793                 case AV_CODEC_ID_MPEG1VIDEO:
2794                     if(s->mb_skip_run) is_gob_start=0;
2795                     break;
2796                 case AV_CODEC_ID_MJPEG:
2797                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2798                     break;
2799                 }
2800
2801                 if(is_gob_start){
2802                     if(s->start_mb_y != mb_y || mb_x!=0){
2803                         write_slice_end(s);
2804
2805                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2806                             ff_mpeg4_init_partitions(s);
2807                         }
2808                     }
2809
2810                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2811                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2812
2813                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2814                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2815                         int d = 100 / s->error_rate;
2816                         if(r % d == 0){
2817                             current_packet_size=0;
2818                             s->pb.buf_ptr= s->ptr_lastgob;
2819                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2820                         }
2821                     }
2822
2823                     if (s->avctx->rtp_callback){
2824                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2825                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2826                     }
2827                     update_mb_info(s, 1);
2828
2829                     switch(s->codec_id){
2830                     case AV_CODEC_ID_MPEG4:
2831                         if (CONFIG_MPEG4_ENCODER) {
2832                             ff_mpeg4_encode_video_packet_header(s);
2833                             ff_mpeg4_clean_buffers(s);
2834                         }
2835                     break;
2836                     case AV_CODEC_ID_MPEG1VIDEO:
2837                     case AV_CODEC_ID_MPEG2VIDEO:
2838                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2839                             ff_mpeg1_encode_slice_header(s);
2840                             ff_mpeg1_clean_buffers(s);
2841                         }
2842                     break;
2843                     case AV_CODEC_ID_H263:
2844                     case AV_CODEC_ID_H263P:
2845                         if (CONFIG_H263_ENCODER)
2846                             ff_h263_encode_gob_header(s, mb_y);
2847                     break;
2848                     }
2849
2850                     if(s->flags&CODEC_FLAG_PASS1){
2851                         int bits= put_bits_count(&s->pb);
2852                         s->misc_bits+= bits - s->last_bits;
2853                         s->last_bits= bits;
2854                     }
2855
2856                     s->ptr_lastgob += current_packet_size;
2857                     s->first_slice_line=1;
2858                     s->resync_mb_x=mb_x;
2859                     s->resync_mb_y=mb_y;
2860                 }
2861             }
2862
2863             if(  (s->resync_mb_x   == s->mb_x)
2864                && s->resync_mb_y+1 == s->mb_y){
2865                 s->first_slice_line=0;
2866             }
2867
2868             s->mb_skipped=0;
2869             s->dquant=0; //only for QP_RD
2870
2871             update_mb_info(s, 0);
2872
2873             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2874                 int next_block=0;
2875                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2876
2877                 copy_context_before_encode(&backup_s, s, -1);
2878                 backup_s.pb= s->pb;
2879                 best_s.data_partitioning= s->data_partitioning;
2880                 best_s.partitioned_frame= s->partitioned_frame;
2881                 if(s->data_partitioning){
2882                     backup_s.pb2= s->pb2;
2883                     backup_s.tex_pb= s->tex_pb;
2884                 }
2885
2886                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2887                     s->mv_dir = MV_DIR_FORWARD;
2888                     s->mv_type = MV_TYPE_16X16;
2889                     s->mb_intra= 0;
2890                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2891                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2892                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2893                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2894                 }
2895                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2896                     s->mv_dir = MV_DIR_FORWARD;
2897                     s->mv_type = MV_TYPE_FIELD;
2898                     s->mb_intra= 0;
2899                     for(i=0; i<2; i++){
2900                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2901                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2902                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2903                     }
2904                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2905                                  &dmin, &next_block, 0, 0);
2906                 }
2907                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2908                     s->mv_dir = MV_DIR_FORWARD;
2909                     s->mv_type = MV_TYPE_16X16;
2910                     s->mb_intra= 0;
2911                     s->mv[0][0][0] = 0;
2912                     s->mv[0][0][1] = 0;
2913                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2914                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2915                 }
2916                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2917                     s->mv_dir = MV_DIR_FORWARD;
2918                     s->mv_type = MV_TYPE_8X8;
2919                     s->mb_intra= 0;
2920                     for(i=0; i<4; i++){
2921                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2922                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2923                     }
2924                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2925                                  &dmin, &next_block, 0, 0);
2926                 }
2927                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2928                     s->mv_dir = MV_DIR_FORWARD;
2929                     s->mv_type = MV_TYPE_16X16;
2930                     s->mb_intra= 0;
2931                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2932                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2933                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2934                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2935                 }
2936                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2937                     s->mv_dir = MV_DIR_BACKWARD;
2938                     s->mv_type = MV_TYPE_16X16;
2939                     s->mb_intra= 0;
2940                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2941                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2942                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2943                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2944                 }
2945                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2946                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2947                     s->mv_type = MV_TYPE_16X16;
2948                     s->mb_intra= 0;
2949                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2950                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2951                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2952                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2953                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2954                                  &dmin, &next_block, 0, 0);
2955                 }
2956                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2957                     s->mv_dir = MV_DIR_FORWARD;
2958                     s->mv_type = MV_TYPE_FIELD;
2959                     s->mb_intra= 0;
2960                     for(i=0; i<2; i++){
2961                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2962                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2963                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2964                     }
2965                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2966                                  &dmin, &next_block, 0, 0);
2967                 }
2968                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2969                     s->mv_dir = MV_DIR_BACKWARD;
2970                     s->mv_type = MV_TYPE_FIELD;
2971                     s->mb_intra= 0;
2972                     for(i=0; i<2; i++){
2973                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2974                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2975                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2976                     }
2977                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2978                                  &dmin, &next_block, 0, 0);
2979                 }
2980                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2981                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2982                     s->mv_type = MV_TYPE_FIELD;
2983                     s->mb_intra= 0;
2984                     for(dir=0; dir<2; dir++){
2985                         for(i=0; i<2; i++){
2986                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2987                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2988                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2989                         }
2990                     }
2991                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2992                                  &dmin, &next_block, 0, 0);
2993                 }
2994                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2995                     s->mv_dir = 0;
2996                     s->mv_type = MV_TYPE_16X16;
2997                     s->mb_intra= 1;
2998                     s->mv[0][0][0] = 0;
2999                     s->mv[0][0][1] = 0;
3000                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3001                                  &dmin, &next_block, 0, 0);
3002                     if(s->h263_pred || s->h263_aic){
3003                         if(best_s.mb_intra)
3004                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3005                         else
3006                             ff_clean_intra_table_entries(s); //old mode?
3007                     }
3008                 }
3009
3010                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3011                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3012                         const int last_qp= backup_s.qscale;
3013                         int qpi, qp, dc[6];
3014                         int16_t ac[6][16];
3015                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3016                         static const int dquant_tab[4]={-1,1,-2,2};
3017                         int storecoefs = s->mb_intra && s->dc_val[0];
3018
3019                         av_assert2(backup_s.dquant == 0);
3020
3021                         //FIXME intra
3022                         s->mv_dir= best_s.mv_dir;
3023                         s->mv_type = MV_TYPE_16X16;
3024                         s->mb_intra= best_s.mb_intra;
3025                         s->mv[0][0][0] = best_s.mv[0][0][0];
3026                         s->mv[0][0][1] = best_s.mv[0][0][1];
3027                         s->mv[1][0][0] = best_s.mv[1][0][0];
3028                         s->mv[1][0][1] = best_s.mv[1][0][1];
3029
3030                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3031                         for(; qpi<4; qpi++){
3032                             int dquant= dquant_tab[qpi];
3033                             qp= last_qp + dquant;
3034                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3035                                 continue;
3036                             backup_s.dquant= dquant;
3037                             if(storecoefs){
3038                                 for(i=0; i<6; i++){
3039                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3040                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3041                                 }
3042                             }
3043
3044                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3045                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3046                             if(best_s.qscale != qp){
3047                                 if(storecoefs){
3048                                     for(i=0; i<6; i++){
3049                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3050                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3051                                     }
3052                                 }
3053                             }
3054                         }
3055                     }
3056                 }
3057                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3058                     int mx= s->b_direct_mv_table[xy][0];
3059                     int my= s->b_direct_mv_table[xy][1];
3060
3061                     backup_s.dquant = 0;
3062                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3063                     s->mb_intra= 0;
3064                     ff_mpeg4_set_direct_mv(s, mx, my);
3065                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3066                                  &dmin, &next_block, mx, my);
3067                 }
3068                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3069                     backup_s.dquant = 0;
3070                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3071                     s->mb_intra= 0;
3072                     ff_mpeg4_set_direct_mv(s, 0, 0);
3073                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3074                                  &dmin, &next_block, 0, 0);
3075                 }
3076                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3077                     int coded=0;
3078                     for(i=0; i<6; i++)
3079                         coded |= s->block_last_index[i];
3080                     if(coded){
3081                         int mx,my;
3082                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3083                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3084                             mx=my=0; //FIXME find the one we actually used
3085                             ff_mpeg4_set_direct_mv(s, mx, my);
3086                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3087                             mx= s->mv[1][0][0];
3088                             my= s->mv[1][0][1];
3089                         }else{
3090                             mx= s->mv[0][0][0];
3091                             my= s->mv[0][0][1];
3092                         }
3093
3094                         s->mv_dir= best_s.mv_dir;
3095                         s->mv_type = best_s.mv_type;
3096                         s->mb_intra= 0;
3097 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3098                         s->mv[0][0][1] = best_s.mv[0][0][1];
3099                         s->mv[1][0][0] = best_s.mv[1][0][0];
3100                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3101                         backup_s.dquant= 0;
3102                         s->skipdct=1;
3103                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3104                                         &dmin, &next_block, mx, my);
3105                         s->skipdct=0;
3106                     }
3107                 }
3108
3109                 s->current_picture.qscale_table[xy] = best_s.qscale;
3110
3111                 copy_context_after_encode(s, &best_s, -1);
3112
3113                 pb_bits_count= put_bits_count(&s->pb);
3114                 flush_put_bits(&s->pb);
3115                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3116                 s->pb= backup_s.pb;
3117
3118                 if(s->data_partitioning){
3119                     pb2_bits_count= put_bits_count(&s->pb2);
3120                     flush_put_bits(&s->pb2);
3121                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3122                     s->pb2= backup_s.pb2;
3123
3124                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3125                     flush_put_bits(&s->tex_pb);
3126                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3127                     s->tex_pb= backup_s.tex_pb;
3128                 }
3129                 s->last_bits= put_bits_count(&s->pb);
3130
3131                 if (CONFIG_H263_ENCODER &&
3132                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3133                     ff_h263_update_motion_val(s);
3134
3135                 if(next_block==0){ //FIXME 16 vs linesize16
3136                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3137                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3138                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3139                 }
3140
3141                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3142                     ff_MPV_decode_mb(s, s->block);
3143             } else {
3144                 int motion_x = 0, motion_y = 0;
3145                 s->mv_type=MV_TYPE_16X16;
3146                 // only one MB-Type possible
3147
3148                 switch(mb_type){
3149                 case CANDIDATE_MB_TYPE_INTRA:
3150                     s->mv_dir = 0;
3151                     s->mb_intra= 1;
3152                     motion_x= s->mv[0][0][0] = 0;
3153                     motion_y= s->mv[0][0][1] = 0;
3154                     break;
3155                 case CANDIDATE_MB_TYPE_INTER:
3156                     s->mv_dir = MV_DIR_FORWARD;
3157                     s->mb_intra= 0;
3158                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3159                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3160                     break;
3161                 case CANDIDATE_MB_TYPE_INTER_I:
3162                     s->mv_dir = MV_DIR_FORWARD;
3163                     s->mv_type = MV_TYPE_FIELD;
3164                     s->mb_intra= 0;
3165                     for(i=0; i<2; i++){
3166                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3167                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3168                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3169                     }
3170                     break;
3171                 case CANDIDATE_MB_TYPE_INTER4V:
3172                     s->mv_dir = MV_DIR_FORWARD;
3173                     s->mv_type = MV_TYPE_8X8;
3174                     s->mb_intra= 0;
3175                     for(i=0; i<4; i++){
3176                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3177                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3178                     }
3179                     break;
3180                 case CANDIDATE_MB_TYPE_DIRECT:
3181                     if (CONFIG_MPEG4_ENCODER) {
3182                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3183                         s->mb_intra= 0;
3184                         motion_x=s->b_direct_mv_table[xy][0];
3185                         motion_y=s->b_direct_mv_table[xy][1];
3186                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3187                     }
3188                     break;
3189                 case CANDIDATE_MB_TYPE_DIRECT0:
3190                     if (CONFIG_MPEG4_ENCODER) {
3191                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3192                         s->mb_intra= 0;
3193                         ff_mpeg4_set_direct_mv(s, 0, 0);
3194                     }
3195                     break;
3196                 case CANDIDATE_MB_TYPE_BIDIR:
3197                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3198                     s->mb_intra= 0;
3199                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3200                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3201                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3202                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3203                     break;
3204                 case CANDIDATE_MB_TYPE_BACKWARD:
3205                     s->mv_dir = MV_DIR_BACKWARD;
3206                     s->mb_intra= 0;
3207                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3208                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3209                     break;
3210                 case CANDIDATE_MB_TYPE_FORWARD:
3211                     s->mv_dir = MV_DIR_FORWARD;
3212                     s->mb_intra= 0;
3213                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3214                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3215                     break;
3216                 case CANDIDATE_MB_TYPE_FORWARD_I:
3217                     s->mv_dir = MV_DIR_FORWARD;
3218                     s->mv_type = MV_TYPE_FIELD;
3219                     s->mb_intra= 0;
3220                     for(i=0; i<2; i++){
3221                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3222                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3223                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3224                     }
3225                     break;
3226                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3227                     s->mv_dir = MV_DIR_BACKWARD;
3228                     s->mv_type = MV_TYPE_FIELD;
3229                     s->mb_intra= 0;
3230                     for(i=0; i<2; i++){
3231                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3232                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3233                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3234                     }
3235                     break;
3236                 case CANDIDATE_MB_TYPE_BIDIR_I:
3237                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3238                     s->mv_type = MV_TYPE_FIELD;
3239                     s->mb_intra= 0;
3240                     for(dir=0; dir<2; dir++){
3241                         for(i=0; i<2; i++){
3242                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3243                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3244                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3245                         }
3246                     }
3247                     break;
3248                 default:
3249                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3250                 }
3251
3252                 encode_mb(s, motion_x, motion_y);
3253
3254                 // RAL: Update last macroblock type
3255                 s->last_mv_dir = s->mv_dir;
3256
3257                 if (CONFIG_H263_ENCODER &&
3258                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3259                     ff_h263_update_motion_val(s);
3260
3261                 ff_MPV_decode_mb(s, s->block);
3262             }
3263
3264             /* clean the MV table in IPS frames for direct mode in B frames */
3265             if(s->mb_intra /* && I,P,S_TYPE */){
3266                 s->p_mv_table[xy][0]=0;
3267                 s->p_mv_table[xy][1]=0;
3268             }
3269
3270             if(s->flags&CODEC_FLAG_PSNR){
3271                 int w= 16;
3272                 int h= 16;
3273
3274                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3275                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3276
3277                 s->current_picture.error[0] += sse(
3278                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3279                     s->dest[0], w, h, s->linesize);
3280                 s->current_picture.error[1] += sse(
3281                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3282                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3283                 s->current_picture.error[2] += sse(
3284                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3285                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3286             }
3287             if(s->loop_filter){
3288                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3289                     ff_h263_loop_filter(s);
3290             }
3291             av_dlog(s->avctx, "MB %d %d bits\n",
3292                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3293         }
3294     }
3295
3296     //not beautiful here but we must write it before flushing so it has to be here
3297     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3298         ff_msmpeg4_encode_ext_header(s);
3299
3300     write_slice_end(s);
3301
3302     /* Send the last GOB if RTP */
3303     if (s->avctx->rtp_callback) {
3304         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3305         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3306         /* Call the RTP callback to send the last GOB */
3307         emms_c();
3308         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3309     }
3310
3311     return 0;
3312 }
3313
3314 #define MERGE(field) dst->field += src->field; src->field=0
3315 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3316     MERGE(me.scene_change_score);
3317     MERGE(me.mc_mb_var_sum_temp);
3318     MERGE(me.mb_var_sum_temp);
3319 }
3320
3321 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3322     int i;
3323
3324     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3325     MERGE(dct_count[1]);
3326     MERGE(mv_bits);
3327     MERGE(i_tex_bits);
3328     MERGE(p_tex_bits);
3329     MERGE(i_count);
3330     MERGE(f_count);
3331     MERGE(b_count);
3332     MERGE(skip_count);
3333     MERGE(misc_bits);
3334     MERGE(er.error_count);
3335     MERGE(padding_bug_score);
3336     MERGE(current_picture.error[0]);
3337     MERGE(current_picture.error[1]);
3338     MERGE(current_picture.error[2]);
3339
3340     if(dst->avctx->noise_reduction){
3341         for(i=0; i<64; i++){
3342             MERGE(dct_error_sum[0][i]);
3343             MERGE(dct_error_sum[1][i]);
3344         }
3345     }
3346
3347     assert(put_bits_count(&src->pb) % 8 ==0);
3348     assert(put_bits_count(&dst->pb) % 8 ==0);
3349     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3350     flush_put_bits(&dst->pb);
3351 }
3352
3353 static int estimate_qp(MpegEncContext *s, int dry_run){
3354     if (s->next_lambda){
3355         s->current_picture_ptr->f->quality =
3356         s->current_picture.f->quality = s->next_lambda;
3357         if(!dry_run) s->next_lambda= 0;
3358     } else if (!s->fixed_qscale) {
3359         s->current_picture_ptr->f->quality =
3360         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3361         if (s->current_picture.f->quality < 0)
3362             return -1;
3363     }
3364
3365     if(s->adaptive_quant){
3366         switch(s->codec_id){
3367         case AV_CODEC_ID_MPEG4:
3368             if (CONFIG_MPEG4_ENCODER)
3369                 ff_clean_mpeg4_qscales(s);
3370             break;
3371         case AV_CODEC_ID_H263:
3372         case AV_CODEC_ID_H263P:
3373         case AV_CODEC_ID_FLV1:
3374             if (CONFIG_H263_ENCODER)
3375                 ff_clean_h263_qscales(s);
3376             break;
3377         default:
3378             ff_init_qscale_tab(s);
3379         }
3380
3381         s->lambda= s->lambda_table[0];
3382         //FIXME broken
3383     }else
3384         s->lambda = s->current_picture.f->quality;
3385     update_qscale(s);
3386     return 0;
3387 }
3388
3389 /* must be called before writing the header */
3390 static void set_frame_distances(MpegEncContext * s){
3391     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3392     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3393
3394     if(s->pict_type==AV_PICTURE_TYPE_B){
3395         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3396         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3397     }else{
3398         s->pp_time= s->time - s->last_non_b_time;
3399         s->last_non_b_time= s->time;
3400         assert(s->picture_number==0 || s->pp_time > 0);
3401     }
3402 }
3403
3404 static int encode_picture(MpegEncContext *s, int picture_number)
3405 {
3406     int i, ret;
3407     int bits;
3408     int context_count = s->slice_context_count;
3409
3410     s->picture_number = picture_number;
3411
3412     /* Reset the average MB variance */
3413     s->me.mb_var_sum_temp    =
3414     s->me.mc_mb_var_sum_temp = 0;
3415
3416     /* we need to initialize some time vars before we can encode b-frames */
3417     // RAL: Condition added for MPEG1VIDEO
3418     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3419         set_frame_distances(s);
3420     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3421         ff_set_mpeg4_time(s);
3422
3423     s->me.scene_change_score=0;
3424
3425 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3426
3427     if(s->pict_type==AV_PICTURE_TYPE_I){
3428         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3429         else                        s->no_rounding=0;
3430     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3431         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3432             s->no_rounding ^= 1;
3433     }
3434
3435     if(s->flags & CODEC_FLAG_PASS2){
3436         if (estimate_qp(s,1) < 0)
3437             return -1;
3438         ff_get_2pass_fcode(s);
3439     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3440         if(s->pict_type==AV_PICTURE_TYPE_B)
3441             s->lambda= s->last_lambda_for[s->pict_type];
3442         else
3443             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3444         update_qscale(s);
3445     }
3446
3447     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3448         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3449         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3450         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3451         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3452     }
3453
3454     s->mb_intra=0; //for the rate distortion & bit compare functions
3455     for(i=1; i<context_count; i++){
3456         ret = ff_update_duplicate_context(s->thread_context[i], s);
3457         if (ret < 0)
3458             return ret;
3459     }
3460
3461     if(ff_init_me(s)<0)
3462         return -1;
3463
3464     /* Estimate motion for every MB */
3465     if(s->pict_type != AV_PICTURE_TYPE_I){
3466         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3467         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3468         if (s->pict_type != AV_PICTURE_TYPE_B) {
3469             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3470                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3471             }
3472         }
3473
3474         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3475     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3476         /* I-Frame */
3477         for(i=0; i<s->mb_stride*s->mb_height; i++)
3478             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3479
3480         if(!s->fixed_qscale){
3481             /* finding spatial complexity for I-frame rate control */
3482             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3483         }
3484     }
3485     for(i=1; i<context_count; i++){
3486         merge_context_after_me(s, s->thread_context[i]);
3487     }
3488     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3489     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3490     emms_c();
3491
3492     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3493         s->pict_type= AV_PICTURE_TYPE_I;
3494         for(i=0; i<s->mb_stride*s->mb_height; i++)
3495             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3496         if(s->msmpeg4_version >= 3)
3497             s->no_rounding=1;
3498         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3499                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3500     }
3501
3502     if(!s->umvplus){
3503         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3504             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3505
3506             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3507                 int a,b;
3508                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3509                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3510                 s->f_code= FFMAX3(s->f_code, a, b);
3511             }
3512
3513             ff_fix_long_p_mvs(s);
3514             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3515             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3516                 int j;
3517                 for(i=0; i<2; i++){
3518                     for(j=0; j<2; j++)
3519                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3520                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3521                 }
3522             }
3523         }
3524
3525         if(s->pict_type==AV_PICTURE_TYPE_B){
3526             int a, b;
3527
3528             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3529             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3530             s->f_code = FFMAX(a, b);
3531
3532             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3533             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3534             s->b_code = FFMAX(a, b);
3535
3536             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3537             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3538             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3539             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3540             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3541                 int dir, j;
3542                 for(dir=0; dir<2; dir++){
3543                     for(i=0; i<2; i++){
3544                         for(j=0; j<2; j++){
3545                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3546                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3547                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3548                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3549                         }
3550                     }
3551                 }
3552             }
3553         }
3554     }
3555
3556     if (estimate_qp(s, 0) < 0)
3557         return -1;
3558
3559     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3560         s->qscale= 3; //reduce clipping problems
3561
3562     if (s->out_format == FMT_MJPEG) {
3563         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3564         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3565
3566         if (s->avctx->intra_matrix) {
3567             chroma_matrix =
3568             luma_matrix = s->avctx->intra_matrix;
3569         }
3570         if (s->avctx->chroma_intra_matrix)
3571             chroma_matrix = s->avctx->chroma_intra_matrix;
3572
3573         /* for mjpeg, we do include qscale in the matrix */
3574         for(i=1;i<64;i++){
3575             int j = s->idsp.idct_permutation[i];
3576
3577             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3578             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3579         }
3580         s->y_dc_scale_table=
3581         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3582         s->chroma_intra_matrix[0] =
3583         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3584         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3585                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3586         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3587                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3588         s->qscale= 8;
3589     }
3590     if(s->codec_id == AV_CODEC_ID_AMV){
3591         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3592         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3593         for(i=1;i<64;i++){
3594             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3595
3596             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3597             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3598         }
3599         s->y_dc_scale_table= y;
3600         s->c_dc_scale_table= c;
3601         s->intra_matrix[0] = 13;
3602         s->chroma_intra_matrix[0] = 14;
3603         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3604                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3605         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3606                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3607         s->qscale= 8;
3608     }
3609
3610     //FIXME var duplication
3611     s->current_picture_ptr->f->key_frame =
3612     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3613     s->current_picture_ptr->f->pict_type =
3614     s->current_picture.f->pict_type = s->pict_type;
3615
3616     if (s->current_picture.f->key_frame)
3617         s->picture_in_gop_number=0;
3618
3619     s->mb_x = s->mb_y = 0;
3620     s->last_bits= put_bits_count(&s->pb);
3621     switch(s->out_format) {
3622     case FMT_MJPEG:
3623         if (CONFIG_MJPEG_ENCODER)
3624             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3625                                            s->intra_matrix, s->chroma_intra_matrix);
3626         break;
3627     case FMT_H261:
3628         if (CONFIG_H261_ENCODER)
3629             ff_h261_encode_picture_header(s, picture_number);
3630         break;
3631     case FMT_H263:
3632         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3633             ff_wmv2_encode_picture_header(s, picture_number);
3634         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3635             ff_msmpeg4_encode_picture_header(s, picture_number);
3636         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3637             ff_mpeg4_encode_picture_header(s, picture_number);
3638         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3639             ff_rv10_encode_picture_header(s, picture_number);
3640         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3641             ff_rv20_encode_picture_header(s, picture_number);
3642         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3643             ff_flv_encode_picture_header(s, picture_number);
3644         else if (CONFIG_H263_ENCODER)
3645             ff_h263_encode_picture_header(s, picture_number);
3646         break;
3647     case FMT_MPEG1:
3648         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3649             ff_mpeg1_encode_picture_header(s, picture_number);
3650         break;
3651     default:
3652         av_assert0(0);
3653     }
3654     bits= put_bits_count(&s->pb);
3655     s->header_bits= bits - s->last_bits;
3656
3657     for(i=1; i<context_count; i++){
3658         update_duplicate_context_after_me(s->thread_context[i], s);
3659     }
3660     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3661     for(i=1; i<context_count; i++){
3662         merge_context_after_encode(s, s->thread_context[i]);
3663     }
3664     emms_c();
3665     return 0;
3666 }
3667
3668 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3669     const int intra= s->mb_intra;
3670     int i;
3671
3672     s->dct_count[intra]++;
3673
3674     for(i=0; i<64; i++){
3675         int level= block[i];
3676
3677         if(level){
3678             if(level>0){
3679                 s->dct_error_sum[intra][i] += level;
3680                 level -= s->dct_offset[intra][i];
3681                 if(level<0) level=0;
3682             }else{
3683                 s->dct_error_sum[intra][i] -= level;
3684                 level += s->dct_offset[intra][i];
3685                 if(level>0) level=0;
3686             }
3687             block[i]= level;
3688         }
3689     }
3690 }
3691
3692 static int dct_quantize_trellis_c(MpegEncContext *s,
3693                                   int16_t *block, int n,
3694                                   int qscale, int *overflow){
3695     const int *qmat;
3696     const uint8_t *scantable= s->intra_scantable.scantable;
3697     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3698     int max=0;
3699     unsigned int threshold1, threshold2;
3700     int bias=0;
3701     int run_tab[65];
3702     int level_tab[65];
3703     int score_tab[65];
3704     int survivor[65];
3705     int survivor_count;
3706     int last_run=0;
3707     int last_level=0;
3708     int last_score= 0;
3709     int last_i;
3710     int coeff[2][64];
3711     int coeff_count[64];
3712     int qmul, qadd, start_i, last_non_zero, i, dc;
3713     const int esc_length= s->ac_esc_length;
3714     uint8_t * length;
3715     uint8_t * last_length;
3716     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3717
3718     s->fdsp.fdct(block);
3719
3720     if(s->dct_error_sum)
3721         s->denoise_dct(s, block);
3722     qmul= qscale*16;
3723     qadd= ((qscale-1)|1)*8;
3724
3725     if (s->mb_intra) {
3726         int q;
3727         if (!s->h263_aic) {
3728             if (n < 4)
3729                 q = s->y_dc_scale;
3730             else
3731                 q = s->c_dc_scale;
3732             q = q << 3;
3733         } else{
3734             /* For AIC we skip quant/dequant of INTRADC */
3735             q = 1 << 3;
3736             qadd=0;
3737         }
3738
3739         /* note: block[0] is assumed to be positive */
3740         block[0] = (block[0] + (q >> 1)) / q;
3741         start_i = 1;
3742         last_non_zero = 0;
3743         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3744         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3745             bias= 1<<(QMAT_SHIFT-1);
3746         length     = s->intra_ac_vlc_length;
3747         last_length= s->intra_ac_vlc_last_length;
3748     } else {
3749         start_i = 0;
3750         last_non_zero = -1;
3751         qmat = s->q_inter_matrix[qscale];
3752         length     = s->inter_ac_vlc_length;
3753         last_length= s->inter_ac_vlc_last_length;
3754     }
3755     last_i= start_i;
3756
3757     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3758     threshold2= (threshold1<<1);
3759
3760     for(i=63; i>=start_i; i--) {
3761         const int j = scantable[i];
3762         int level = block[j] * qmat[j];
3763
3764         if(((unsigned)(level+threshold1))>threshold2){
3765             last_non_zero = i;
3766             break;
3767         }
3768     }
3769
3770     for(i=start_i; i<=last_non_zero; i++) {
3771         const int j = scantable[i];
3772         int level = block[j] * qmat[j];
3773
3774 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3775 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3776         if(((unsigned)(level+threshold1))>threshold2){
3777             if(level>0){
3778                 level= (bias + level)>>QMAT_SHIFT;
3779                 coeff[0][i]= level;
3780                 coeff[1][i]= level-1;
3781 //                coeff[2][k]= level-2;
3782             }else{
3783                 level= (bias - level)>>QMAT_SHIFT;
3784                 coeff[0][i]= -level;
3785                 coeff[1][i]= -level+1;
3786 //                coeff[2][k]= -level+2;
3787             }
3788             coeff_count[i]= FFMIN(level, 2);
3789             av_assert2(coeff_count[i]);
3790             max |=level;
3791         }else{
3792             coeff[0][i]= (level>>31)|1;
3793             coeff_count[i]= 1;
3794         }
3795     }
3796
3797     *overflow= s->max_qcoeff < max; //overflow might have happened
3798
3799     if(last_non_zero < start_i){
3800         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3801         return last_non_zero;
3802     }
3803
3804     score_tab[start_i]= 0;
3805     survivor[0]= start_i;
3806     survivor_count= 1;
3807
3808     for(i=start_i; i<=last_non_zero; i++){
3809         int level_index, j, zero_distortion;
3810         int dct_coeff= FFABS(block[ scantable[i] ]);
3811         int best_score=256*256*256*120;
3812
3813         if (s->fdsp.fdct == ff_fdct_ifast)
3814             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3815         zero_distortion= dct_coeff*dct_coeff;
3816
3817         for(level_index=0; level_index < coeff_count[i]; level_index++){
3818             int distortion;
3819             int level= coeff[level_index][i];
3820             const int alevel= FFABS(level);
3821             int unquant_coeff;
3822
3823             av_assert2(level);
3824
3825             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3826                 unquant_coeff= alevel*qmul + qadd;
3827             }else{ //MPEG1
3828                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3829                 if(s->mb_intra){
3830                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3831                         unquant_coeff =   (unquant_coeff - 1) | 1;
3832                 }else{
3833                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3834                         unquant_coeff =   (unquant_coeff - 1) | 1;
3835                 }
3836                 unquant_coeff<<= 3;
3837             }
3838
3839             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3840             level+=64;
3841             if((level&(~127)) == 0){
3842                 for(j=survivor_count-1; j>=0; j--){
3843                     int run= i - survivor[j];
3844                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3845                     score += score_tab[i-run];
3846
3847                     if(score < best_score){
3848                         best_score= score;
3849                         run_tab[i+1]= run;
3850                         level_tab[i+1]= level-64;
3851                     }
3852                 }
3853
3854                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3855                     for(j=survivor_count-1; j>=0; j--){
3856                         int run= i - survivor[j];
3857                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3858                         score += score_tab[i-run];
3859                         if(score < last_score){
3860                             last_score= score;
3861                             last_run= run;
3862                             last_level= level-64;
3863                             last_i= i+1;
3864                         }
3865                     }
3866                 }
3867             }else{
3868                 distortion += esc_length*lambda;
3869                 for(j=survivor_count-1; j>=0; j--){
3870                     int run= i - survivor[j];
3871                     int score= distortion + score_tab[i-run];
3872
3873                     if(score < best_score){
3874                         best_score= score;
3875                         run_tab[i+1]= run;
3876                         level_tab[i+1]= level-64;
3877                     }
3878                 }
3879
3880                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3881                   for(j=survivor_count-1; j>=0; j--){
3882                         int run= i - survivor[j];
3883                         int score= distortion + score_tab[i-run];
3884                         if(score < last_score){
3885                             last_score= score;
3886                             last_run= run;
3887                             last_level= level-64;
3888                             last_i= i+1;
3889                         }
3890                     }
3891                 }
3892             }
3893         }
3894
3895         score_tab[i+1]= best_score;
3896
3897         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3898         if(last_non_zero <= 27){
3899             for(; survivor_count; survivor_count--){
3900                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3901                     break;
3902             }
3903         }else{
3904             for(; survivor_count; survivor_count--){
3905                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3906                     break;
3907             }
3908         }
3909
3910         survivor[ survivor_count++ ]= i+1;
3911     }
3912
3913     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3914         last_score= 256*256*256*120;
3915         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3916             int score= score_tab[i];
3917             if(i) score += lambda*2; //FIXME exacter?
3918
3919             if(score < last_score){
3920                 last_score= score;
3921                 last_i= i;
3922                 last_level= level_tab[i];
3923                 last_run= run_tab[i];
3924             }
3925         }
3926     }
3927
3928     s->coded_score[n] = last_score;
3929
3930     dc= FFABS(block[0]);
3931     last_non_zero= last_i - 1;
3932     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3933
3934     if(last_non_zero < start_i)
3935         return last_non_zero;
3936
3937     if(last_non_zero == 0 && start_i == 0){
3938         int best_level= 0;
3939         int best_score= dc * dc;
3940
3941         for(i=0; i<coeff_count[0]; i++){
3942             int level= coeff[i][0];
3943             int alevel= FFABS(level);
3944             int unquant_coeff, score, distortion;
3945
3946             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3947                     unquant_coeff= (alevel*qmul + qadd)>>3;
3948             }else{ //MPEG1
3949                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3950                     unquant_coeff =   (unquant_coeff - 1) | 1;
3951             }
3952             unquant_coeff = (unquant_coeff + 4) >> 3;
3953             unquant_coeff<<= 3 + 3;
3954
3955             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3956             level+=64;
3957             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3958             else                    score= distortion + esc_length*lambda;
3959
3960             if(score < best_score){
3961                 best_score= score;
3962                 best_level= level - 64;
3963             }
3964         }
3965         block[0]= best_level;
3966         s->coded_score[n] = best_score - dc*dc;
3967         if(best_level == 0) return -1;
3968         else                return last_non_zero;
3969     }
3970
3971     i= last_i;
3972     av_assert2(last_level);
3973
3974     block[ perm_scantable[last_non_zero] ]= last_level;
3975     i -= last_run + 1;
3976
3977     for(; i>start_i; i -= run_tab[i] + 1){
3978         block[ perm_scantable[i-1] ]= level_tab[i];
3979     }
3980
3981     return last_non_zero;
3982 }
3983
3984 //#define REFINE_STATS 1
3985 static int16_t basis[64][64];
3986
3987 static void build_basis(uint8_t *perm){
3988     int i, j, x, y;
3989     emms_c();
3990     for(i=0; i<8; i++){
3991         for(j=0; j<8; j++){
3992             for(y=0; y<8; y++){
3993                 for(x=0; x<8; x++){
3994                     double s= 0.25*(1<<BASIS_SHIFT);
3995                     int index= 8*i + j;
3996                     int perm_index= perm[index];
3997                     if(i==0) s*= sqrt(0.5);
3998                     if(j==0) s*= sqrt(0.5);
3999                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4000                 }
4001             }
4002         }
4003     }
4004 }
4005
4006 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4007                         int16_t *block, int16_t *weight, int16_t *orig,
4008                         int n, int qscale){
4009     int16_t rem[64];
4010     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4011     const uint8_t *scantable= s->intra_scantable.scantable;
4012     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4013 //    unsigned int threshold1, threshold2;
4014 //    int bias=0;
4015     int run_tab[65];
4016     int prev_run=0;
4017     int prev_level=0;
4018     int qmul, qadd, start_i, last_non_zero, i, dc;
4019     uint8_t * length;
4020     uint8_t * last_length;
4021     int lambda;
4022     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4023 #ifdef REFINE_STATS
4024 static int count=0;
4025 static int after_last=0;
4026 static int to_zero=0;
4027 static int from_zero=0;
4028 static int raise=0;
4029 static int lower=0;
4030 static int messed_sign=0;
4031 #endif
4032
4033     if(basis[0][0] == 0)
4034         build_basis(s->idsp.idct_permutation);
4035
4036     qmul= qscale*2;
4037     qadd= (qscale-1)|1;
4038     if (s->mb_intra) {
4039         if (!s->h263_aic) {
4040             if (n < 4)
4041                 q = s->y_dc_scale;
4042             else
4043                 q = s->c_dc_scale;
4044         } else{
4045             /* For AIC we skip quant/dequant of INTRADC */
4046             q = 1;
4047             qadd=0;
4048         }
4049         q <<= RECON_SHIFT-3;
4050         /* note: block[0] is assumed to be positive */
4051         dc= block[0]*q;
4052 //        block[0] = (block[0] + (q >> 1)) / q;
4053         start_i = 1;
4054 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4055 //            bias= 1<<(QMAT_SHIFT-1);
4056         length     = s->intra_ac_vlc_length;
4057         last_length= s->intra_ac_vlc_last_length;
4058     } else {
4059         dc= 0;
4060         start_i = 0;
4061         length     = s->inter_ac_vlc_length;
4062         last_length= s->inter_ac_vlc_last_length;
4063     }
4064     last_non_zero = s->block_last_index[n];
4065
4066 #ifdef REFINE_STATS
4067 {START_TIMER
4068 #endif
4069     dc += (1<<(RECON_SHIFT-1));
4070     for(i=0; i<64; i++){
4071         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4072     }
4073 #ifdef REFINE_STATS
4074 STOP_TIMER("memset rem[]")}
4075 #endif
4076     sum=0;
4077     for(i=0; i<64; i++){
4078         int one= 36;
4079         int qns=4;
4080         int w;
4081
4082         w= FFABS(weight[i]) + qns*one;
4083         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4084
4085         weight[i] = w;
4086 //        w=weight[i] = (63*qns + (w/2)) / w;
4087
4088         av_assert2(w>0);
4089         av_assert2(w<(1<<6));
4090         sum += w*w;
4091     }
4092     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4093 #ifdef REFINE_STATS
4094 {START_TIMER
4095 #endif
4096     run=0;
4097     rle_index=0;
4098     for(i=start_i; i<=last_non_zero; i++){
4099         int j= perm_scantable[i];
4100         const int level= block[j];
4101         int coeff;
4102
4103         if(level){
4104             if(level<0) coeff= qmul*level - qadd;
4105             else        coeff= qmul*level + qadd;
4106             run_tab[rle_index++]=run;
4107             run=0;
4108
4109             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4110         }else{
4111             run++;
4112         }
4113     }
4114 #ifdef REFINE_STATS
4115 if(last_non_zero>0){
4116 STOP_TIMER("init rem[]")
4117 }
4118 }
4119
4120 {START_TIMER
4121 #endif
4122     for(;;){
4123         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4124         int best_coeff=0;
4125         int best_change=0;
4126         int run2, best_unquant_change=0, analyze_gradient;
4127 #ifdef REFINE_STATS
4128 {START_TIMER
4129 #endif
4130         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4131
4132         if(analyze_gradient){
4133 #ifdef REFINE_STATS
4134 {START_TIMER
4135 #endif
4136             for(i=0; i<64; i++){
4137                 int w= weight[i];
4138
4139                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4140             }
4141 #ifdef REFINE_STATS
4142 STOP_TIMER("rem*w*w")}
4143 {START_TIMER
4144 #endif
4145             s->fdsp.fdct(d1);
4146 #ifdef REFINE_STATS
4147 STOP_TIMER("dct")}
4148 #endif
4149         }
4150
4151         if(start_i){
4152             const int level= block[0];
4153             int change, old_coeff;
4154
4155             av_assert2(s->mb_intra);
4156
4157             old_coeff= q*level;
4158
4159             for(change=-1; change<=1; change+=2){
4160                 int new_level= level + change;
4161                 int score, new_coeff;
4162
4163                 new_coeff= q*new_level;
4164                 if(new_coeff >= 2048 || new_coeff < 0)
4165                     continue;
4166
4167                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4168                                                   new_coeff - old_coeff);
4169                 if(score<best_score){
4170                     best_score= score;
4171                     best_coeff= 0;
4172                     best_change= change;
4173                     best_unquant_change= new_coeff - old_coeff;
4174                 }
4175             }
4176         }
4177
4178         run=0;
4179         rle_index=0;
4180         run2= run_tab[rle_index++];
4181         prev_level=0;
4182         prev_run=0;
4183
4184         for(i=start_i; i<64; i++){
4185             int j= perm_scantable[i];
4186             const int level= block[j];
4187             int change, old_coeff;
4188
4189             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4190                 break;
4191
4192             if(level){
4193                 if(level<0) old_coeff= qmul*level - qadd;
4194                 else        old_coeff= qmul*level + qadd;
4195                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4196             }else{
4197                 old_coeff=0;
4198                 run2--;
4199                 av_assert2(run2>=0 || i >= last_non_zero );
4200             }
4201
4202             for(change=-1; change<=1; change+=2){
4203                 int new_level= level + change;
4204                 int score, new_coeff, unquant_change;
4205
4206                 score=0;
4207                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4208                    continue;
4209
4210                 if(new_level){
4211                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4212                     else            new_coeff= qmul*new_level + qadd;
4213                     if(new_coeff >= 2048 || new_coeff <= -2048)
4214                         continue;
4215                     //FIXME check for overflow
4216
4217                     if(level){
4218                         if(level < 63 && level > -63){
4219                             if(i < last_non_zero)
4220                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4221                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4222                             else
4223                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4224                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4225                         }
4226                     }else{
4227                         av_assert2(FFABS(new_level)==1);
4228
4229                         if(analyze_gradient){
4230                             int g= d1[ scantable[i] ];
4231                             if(g && (g^new_level) >= 0)
4232                                 continue;
4233                         }
4234
4235                         if(i < last_non_zero){
4236                             int next_i= i + run2 + 1;
4237                             int next_level= block[ perm_scantable[next_i] ] + 64;
4238
4239                             if(next_level&(~127))
4240                                 next_level= 0;
4241
4242                             if(next_i < last_non_zero)
4243                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4244                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4245                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4246                             else
4247                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4248                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4249                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4250                         }else{
4251                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4252                             if(prev_level){
4253                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4254                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4255                             }
4256                         }
4257                     }
4258                 }else{
4259                     new_coeff=0;
4260                     av_assert2(FFABS(level)==1);
4261
4262                     if(i < last_non_zero){
4263                         int next_i= i + run2 + 1;
4264                         int next_level= block[ perm_scantable[next_i] ] + 64;
4265
4266                         if(next_level&(~127))
4267                             next_level= 0;
4268
4269                         if(next_i < last_non_zero)
4270                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4271                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4272                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4273                         else
4274                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4275                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4276                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4277                     }else{
4278                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4279                         if(prev_level){
4280                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4281                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4282                         }
4283                     }
4284                 }
4285
4286                 score *= lambda;
4287
4288                 unquant_change= new_coeff - old_coeff;
4289                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4290
4291                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4292                                                    unquant_change);
4293                 if(score<best_score){
4294                     best_score= score;
4295                     best_coeff= i;
4296                     best_change= change;
4297                     best_unquant_change= unquant_change;
4298                 }
4299             }
4300             if(level){
4301                 prev_level= level + 64;
4302                 if(prev_level&(~127))
4303                     prev_level= 0;
4304                 prev_run= run;
4305                 run=0;
4306             }else{
4307                 run++;
4308             }
4309         }
4310 #ifdef REFINE_STATS
4311 STOP_TIMER("iterative step")}
4312 #endif
4313
4314         if(best_change){
4315             int j= perm_scantable[ best_coeff ];
4316
4317             block[j] += best_change;
4318
4319             if(best_coeff > last_non_zero){
4320                 last_non_zero= best_coeff;
4321                 av_assert2(block[j]);
4322 #ifdef REFINE_STATS
4323 after_last++;
4324 #endif
4325             }else{
4326 #ifdef REFINE_STATS
4327 if(block[j]){
4328     if(block[j] - best_change){
4329         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4330             raise++;
4331         }else{
4332             lower++;
4333         }
4334     }else{
4335         from_zero++;
4336     }
4337 }else{
4338     to_zero++;
4339 }
4340 #endif
4341                 for(; last_non_zero>=start_i; last_non_zero--){
4342                     if(block[perm_scantable[last_non_zero]])
4343                         break;
4344                 }
4345             }
4346 #ifdef REFINE_STATS
4347 count++;
4348 if(256*256*256*64 % count == 0){
4349     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4350 }
4351 #endif
4352             run=0;
4353             rle_index=0;
4354             for(i=start_i; i<=last_non_zero; i++){
4355                 int j= perm_scantable[i];
4356                 const int level= block[j];
4357
4358                  if(level){
4359                      run_tab[rle_index++]=run;
4360                      run=0;
4361                  }else{
4362                      run++;
4363                  }
4364             }
4365
4366             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4367         }else{
4368             break;
4369         }
4370     }
4371 #ifdef REFINE_STATS
4372 if(last_non_zero>0){
4373 STOP_TIMER("iterative search")
4374 }
4375 }
4376 #endif
4377
4378     return last_non_zero;
4379 }
4380
4381 int ff_dct_quantize_c(MpegEncContext *s,
4382                         int16_t *block, int n,
4383                         int qscale, int *overflow)
4384 {
4385     int i, j, level, last_non_zero, q, start_i;
4386     const int *qmat;
4387     const uint8_t *scantable= s->intra_scantable.scantable;
4388     int bias;
4389     int max=0;
4390     unsigned int threshold1, threshold2;
4391
4392     s->fdsp.fdct(block);
4393
4394     if(s->dct_error_sum)
4395         s->denoise_dct(s, block);
4396
4397     if (s->mb_intra) {
4398         if (!s->h263_aic) {
4399             if (n < 4)
4400                 q = s->y_dc_scale;
4401             else
4402                 q = s->c_dc_scale;
4403             q = q << 3;
4404         } else
4405             /* For AIC we skip quant/dequant of INTRADC */
4406             q = 1 << 3;
4407
4408         /* note: block[0] is assumed to be positive */
4409         block[0] = (block[0] + (q >> 1)) / q;
4410         start_i = 1;
4411         last_non_zero = 0;
4412         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4413         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4414     } else {
4415         start_i = 0;
4416         last_non_zero = -1;
4417         qmat = s->q_inter_matrix[qscale];
4418         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4419     }
4420     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4421     threshold2= (threshold1<<1);
4422     for(i=63;i>=start_i;i--) {
4423         j = scantable[i];
4424         level = block[j] * qmat[j];
4425
4426         if(((unsigned)(level+threshold1))>threshold2){
4427             last_non_zero = i;
4428             break;
4429         }else{
4430             block[j]=0;
4431         }
4432     }
4433     for(i=start_i; i<=last_non_zero; i++) {
4434         j = scantable[i];
4435         level = block[j] * qmat[j];
4436
4437 //        if(   bias+level >= (1<<QMAT_SHIFT)
4438 //           || bias-level >= (1<<QMAT_SHIFT)){
4439         if(((unsigned)(level+threshold1))>threshold2){
4440             if(level>0){
4441                 level= (bias + level)>>QMAT_SHIFT;
4442                 block[j]= level;
4443             }else{
4444                 level= (bias - level)>>QMAT_SHIFT;
4445                 block[j]= -level;
4446             }
4447             max |=level;
4448         }else{
4449             block[j]=0;
4450         }
4451     }
4452     *overflow= s->max_qcoeff < max; //overflow might have happened
4453
4454     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4455     if (s->idsp.idct_permutation_type != FF_NO_IDCT_PERM)
4456         ff_block_permute(block, s->idsp.idct_permutation,
4457                          scantable, last_non_zero);
4458
4459     return last_non_zero;
4460 }
4461
4462 #define OFFSET(x) offsetof(MpegEncContext, x)
4463 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4464 static const AVOption h263_options[] = {
4465     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4466     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4467     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4468     FF_MPV_COMMON_OPTS
4469     { NULL },
4470 };
4471
4472 static const AVClass h263_class = {
4473     .class_name = "H.263 encoder",
4474     .item_name  = av_default_item_name,
4475     .option     = h263_options,
4476     .version    = LIBAVUTIL_VERSION_INT,
4477 };
4478
4479 AVCodec ff_h263_encoder = {
4480     .name           = "h263",
4481     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4482     .type           = AVMEDIA_TYPE_VIDEO,
4483     .id             = AV_CODEC_ID_H263,
4484     .priv_data_size = sizeof(MpegEncContext),
4485     .init           = ff_MPV_encode_init,
4486     .encode2        = ff_MPV_encode_picture,
4487     .close          = ff_MPV_encode_end,
4488     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4489     .priv_class     = &h263_class,
4490 };
4491
4492 static const AVOption h263p_options[] = {
4493     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4494     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4495     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4496     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4497     FF_MPV_COMMON_OPTS
4498     { NULL },
4499 };
4500 static const AVClass h263p_class = {
4501     .class_name = "H.263p encoder",
4502     .item_name  = av_default_item_name,
4503     .option     = h263p_options,
4504     .version    = LIBAVUTIL_VERSION_INT,
4505 };
4506
4507 AVCodec ff_h263p_encoder = {
4508     .name           = "h263p",
4509     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4510     .type           = AVMEDIA_TYPE_VIDEO,
4511     .id             = AV_CODEC_ID_H263P,
4512     .priv_data_size = sizeof(MpegEncContext),
4513     .init           = ff_MPV_encode_init,
4514     .encode2        = ff_MPV_encode_picture,
4515     .close          = ff_MPV_encode_end,
4516     .capabilities   = CODEC_CAP_SLICE_THREADS,
4517     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4518     .priv_class     = &h263p_class,
4519 };
4520
4521 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4522
4523 AVCodec ff_msmpeg4v2_encoder = {
4524     .name           = "msmpeg4v2",
4525     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4526     .type           = AVMEDIA_TYPE_VIDEO,
4527     .id             = AV_CODEC_ID_MSMPEG4V2,
4528     .priv_data_size = sizeof(MpegEncContext),
4529     .init           = ff_MPV_encode_init,
4530     .encode2        = ff_MPV_encode_picture,
4531     .close          = ff_MPV_encode_end,
4532     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4533     .priv_class     = &msmpeg4v2_class,
4534 };
4535
4536 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4537
4538 AVCodec ff_msmpeg4v3_encoder = {
4539     .name           = "msmpeg4",
4540     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4541     .type           = AVMEDIA_TYPE_VIDEO,
4542     .id             = AV_CODEC_ID_MSMPEG4V3,
4543     .priv_data_size = sizeof(MpegEncContext),
4544     .init           = ff_MPV_encode_init,
4545     .encode2        = ff_MPV_encode_picture,
4546     .close          = ff_MPV_encode_end,
4547     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4548     .priv_class     = &msmpeg4v3_class,
4549 };
4550
4551 FF_MPV_GENERIC_CLASS(wmv1)
4552
4553 AVCodec ff_wmv1_encoder = {
4554     .name           = "wmv1",
4555     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4556     .type           = AVMEDIA_TYPE_VIDEO,
4557     .id             = AV_CODEC_ID_WMV1,
4558     .priv_data_size = sizeof(MpegEncContext),
4559     .init           = ff_MPV_encode_init,
4560     .encode2        = ff_MPV_encode_picture,
4561     .close          = ff_MPV_encode_end,
4562     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4563     .priv_class     = &wmv1_class,
4564 };