]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge commit 'adcb8392c9b185fd8a91a95fa256d15ab1432a30'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "dsputil.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mjpegenc_common.h"
46 #include "mathops.h"
47 #include "mpegutils.h"
48 #include "mjpegenc.h"
49 #include "msmpeg4.h"
50 #include "qpeldsp.h"
51 #include "faandct.h"
52 #include "thread.h"
53 #include "aandcttab.h"
54 #include "flv.h"
55 #include "mpeg4video.h"
56 #include "internal.h"
57 #include "bytestream.h"
58 #include <limits.h>
59 #include "sp5x.h"
60
61 static int encode_picture(MpegEncContext *s, int picture_number);
62 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
63 static int sse_mb(MpegEncContext *s);
64 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
65 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
66
67 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
68 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
69
70 const AVOption ff_mpv_generic_options[] = {
71     FF_MPV_COMMON_OPTS
72     { NULL },
73 };
74
75 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
76                        uint16_t (*qmat16)[2][64],
77                        const uint16_t *quant_matrix,
78                        int bias, int qmin, int qmax, int intra)
79 {
80     DSPContext *dsp = &s->dsp;
81     int qscale;
82     int shift = 0;
83
84     for (qscale = qmin; qscale <= qmax; qscale++) {
85         int i;
86         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
87             dsp->fdct == ff_jpeg_fdct_islow_10 ||
88             dsp->fdct == ff_faandct) {
89             for (i = 0; i < 64; i++) {
90                 const int j = dsp->idct_permutation[i];
91                 /* 16 <= qscale * quant_matrix[i] <= 7905
92                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
93                  *             19952 <=              x  <= 249205026
94                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
95                  *           3444240 >= (1 << 36) / (x) >= 275 */
96
97                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
98                                         (qscale * quant_matrix[j]));
99             }
100         } else if (dsp->fdct == ff_fdct_ifast) {
101             for (i = 0; i < 64; i++) {
102                 const int j = dsp->idct_permutation[i];
103                 /* 16 <= qscale * quant_matrix[i] <= 7905
104                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
105                  *             19952 <=              x  <= 249205026
106                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
107                  *           3444240 >= (1 << 36) / (x) >= 275 */
108
109                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
110                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
111             }
112         } else {
113             for (i = 0; i < 64; i++) {
114                 const int j = dsp->idct_permutation[i];
115                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
116                  * Assume x = qscale * quant_matrix[i]
117                  * So             16 <=              x  <= 7905
118                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
119                  * so          32768 >= (1 << 19) / (x) >= 67 */
120                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
121                                         (qscale * quant_matrix[j]));
122                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
123                 //                    (qscale * quant_matrix[i]);
124                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
125                                        (qscale * quant_matrix[j]);
126
127                 if (qmat16[qscale][0][i] == 0 ||
128                     qmat16[qscale][0][i] == 128 * 256)
129                     qmat16[qscale][0][i] = 128 * 256 - 1;
130                 qmat16[qscale][1][i] =
131                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
132                                 qmat16[qscale][0][i]);
133             }
134         }
135
136         for (i = intra; i < 64; i++) {
137             int64_t max = 8191;
138             if (dsp->fdct == ff_fdct_ifast) {
139                 max = (8191LL * ff_aanscales[i]) >> 14;
140             }
141             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
142                 shift++;
143             }
144         }
145     }
146     if (shift) {
147         av_log(NULL, AV_LOG_INFO,
148                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
149                QMAT_SHIFT - shift);
150     }
151 }
152
153 static inline void update_qscale(MpegEncContext *s)
154 {
155     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
156                 (FF_LAMBDA_SHIFT + 7);
157     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
158
159     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
160                  FF_LAMBDA_SHIFT;
161 }
162
163 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
164 {
165     int i;
166
167     if (matrix) {
168         put_bits(pb, 1, 1);
169         for (i = 0; i < 64; i++) {
170             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
171         }
172     } else
173         put_bits(pb, 1, 0);
174 }
175
176 /**
177  * init s->current_picture.qscale_table from s->lambda_table
178  */
179 void ff_init_qscale_tab(MpegEncContext *s)
180 {
181     int8_t * const qscale_table = s->current_picture.qscale_table;
182     int i;
183
184     for (i = 0; i < s->mb_num; i++) {
185         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
186         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
187         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
188                                                   s->avctx->qmax);
189     }
190 }
191
192 static void update_duplicate_context_after_me(MpegEncContext *dst,
193                                               MpegEncContext *src)
194 {
195 #define COPY(a) dst->a= src->a
196     COPY(pict_type);
197     COPY(current_picture);
198     COPY(f_code);
199     COPY(b_code);
200     COPY(qscale);
201     COPY(lambda);
202     COPY(lambda2);
203     COPY(picture_in_gop_number);
204     COPY(gop_picture_number);
205     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
206     COPY(progressive_frame);    // FIXME don't set in encode_header
207     COPY(partitioned_frame);    // FIXME don't set in encode_header
208 #undef COPY
209 }
210
211 /**
212  * Set the given MpegEncContext to defaults for encoding.
213  * the changed fields will not depend upon the prior state of the MpegEncContext.
214  */
215 static void MPV_encode_defaults(MpegEncContext *s)
216 {
217     int i;
218     ff_MPV_common_defaults(s);
219
220     for (i = -16; i < 16; i++) {
221         default_fcode_tab[i + MAX_MV] = 1;
222     }
223     s->me.mv_penalty = default_mv_penalty;
224     s->fcode_tab     = default_fcode_tab;
225
226     s->input_picture_number  = 0;
227     s->picture_in_gop_number = 0;
228 }
229
230 av_cold int ff_dct_encode_init(MpegEncContext *s) {
231     if (ARCH_X86)
232         ff_dct_encode_init_x86(s);
233
234     if (CONFIG_H263_ENCODER)
235         ff_h263dsp_init(&s->h263dsp);
236     if (!s->dct_quantize)
237         s->dct_quantize = ff_dct_quantize_c;
238     if (!s->denoise_dct)
239         s->denoise_dct  = denoise_dct_c;
240     s->fast_dct_quantize = s->dct_quantize;
241     if (s->avctx->trellis)
242         s->dct_quantize  = dct_quantize_trellis_c;
243
244     return 0;
245 }
246
247 /* init video encoder */
248 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
249 {
250     MpegEncContext *s = avctx->priv_data;
251     int i, ret, format_supported;
252
253     MPV_encode_defaults(s);
254
255     switch (avctx->codec_id) {
256     case AV_CODEC_ID_MPEG2VIDEO:
257         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
258             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
259             av_log(avctx, AV_LOG_ERROR,
260                    "only YUV420 and YUV422 are supported\n");
261             return -1;
262         }
263         break;
264     case AV_CODEC_ID_MJPEG:
265     case AV_CODEC_ID_AMV:
266         format_supported = 0;
267         /* JPEG color space */
268         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
269             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
270             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
271             (avctx->color_range == AVCOL_RANGE_JPEG &&
272              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
273               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
274               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
275             format_supported = 1;
276         /* MPEG color space */
277         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
278                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
279                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
280                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
281             format_supported = 1;
282
283         if (!format_supported) {
284             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
285             return -1;
286         }
287         break;
288     default:
289         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
290             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
291             return -1;
292         }
293     }
294
295     switch (avctx->pix_fmt) {
296     case AV_PIX_FMT_YUVJ444P:
297     case AV_PIX_FMT_YUV444P:
298         s->chroma_format = CHROMA_444;
299         break;
300     case AV_PIX_FMT_YUVJ422P:
301     case AV_PIX_FMT_YUV422P:
302         s->chroma_format = CHROMA_422;
303         break;
304     case AV_PIX_FMT_YUVJ420P:
305     case AV_PIX_FMT_YUV420P:
306     default:
307         s->chroma_format = CHROMA_420;
308         break;
309     }
310
311     s->bit_rate = avctx->bit_rate;
312     s->width    = avctx->width;
313     s->height   = avctx->height;
314     if (avctx->gop_size > 600 &&
315         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
316         av_log(avctx, AV_LOG_WARNING,
317                "keyframe interval too large!, reducing it from %d to %d\n",
318                avctx->gop_size, 600);
319         avctx->gop_size = 600;
320     }
321     s->gop_size     = avctx->gop_size;
322     s->avctx        = avctx;
323     s->flags        = avctx->flags;
324     s->flags2       = avctx->flags2;
325     if (avctx->max_b_frames > MAX_B_FRAMES) {
326         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
327                "is %d.\n", MAX_B_FRAMES);
328         avctx->max_b_frames = MAX_B_FRAMES;
329     }
330     s->max_b_frames = avctx->max_b_frames;
331     s->codec_id     = avctx->codec->id;
332     s->strict_std_compliance = avctx->strict_std_compliance;
333     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
334     s->mpeg_quant         = avctx->mpeg_quant;
335     s->rtp_mode           = !!avctx->rtp_payload_size;
336     s->intra_dc_precision = avctx->intra_dc_precision;
337     s->user_specified_pts = AV_NOPTS_VALUE;
338
339     if (s->gop_size <= 1) {
340         s->intra_only = 1;
341         s->gop_size   = 12;
342     } else {
343         s->intra_only = 0;
344     }
345
346     s->me_method = avctx->me_method;
347
348     /* Fixed QSCALE */
349     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
350
351     s->adaptive_quant = (s->avctx->lumi_masking ||
352                          s->avctx->dark_masking ||
353                          s->avctx->temporal_cplx_masking ||
354                          s->avctx->spatial_cplx_masking  ||
355                          s->avctx->p_masking      ||
356                          s->avctx->border_masking ||
357                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
358                         !s->fixed_qscale;
359
360     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
361
362     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
363         switch(avctx->codec_id) {
364         case AV_CODEC_ID_MPEG1VIDEO:
365         case AV_CODEC_ID_MPEG2VIDEO:
366             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
367             break;
368         case AV_CODEC_ID_MPEG4:
369         case AV_CODEC_ID_MSMPEG4V1:
370         case AV_CODEC_ID_MSMPEG4V2:
371         case AV_CODEC_ID_MSMPEG4V3:
372             if       (avctx->rc_max_rate >= 15000000) {
373                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
374             } else if(avctx->rc_max_rate >=  2000000) {
375                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
376             } else if(avctx->rc_max_rate >=   384000) {
377                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
378             } else
379                 avctx->rc_buffer_size = 40;
380             avctx->rc_buffer_size *= 16384;
381             break;
382         }
383         if (avctx->rc_buffer_size) {
384             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
385         }
386     }
387
388     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
389         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
390         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
391             return -1;
392     }
393
394     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
395         av_log(avctx, AV_LOG_INFO,
396                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
397     }
398
399     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
400         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
401         return -1;
402     }
403
404     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
405         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
406         return -1;
407     }
408
409     if (avctx->rc_max_rate &&
410         avctx->rc_max_rate == avctx->bit_rate &&
411         avctx->rc_max_rate != avctx->rc_min_rate) {
412         av_log(avctx, AV_LOG_INFO,
413                "impossible bitrate constraints, this will fail\n");
414     }
415
416     if (avctx->rc_buffer_size &&
417         avctx->bit_rate * (int64_t)avctx->time_base.num >
418             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
419         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
420         return -1;
421     }
422
423     if (!s->fixed_qscale &&
424         avctx->bit_rate * av_q2d(avctx->time_base) >
425             avctx->bit_rate_tolerance) {
426         av_log(avctx, AV_LOG_WARNING,
427                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
428         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
429     }
430
431     if (s->avctx->rc_max_rate &&
432         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
433         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
434          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
435         90000LL * (avctx->rc_buffer_size - 1) >
436             s->avctx->rc_max_rate * 0xFFFFLL) {
437         av_log(avctx, AV_LOG_INFO,
438                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
439                "specified vbv buffer is too large for the given bitrate!\n");
440     }
441
442     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
443         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
444         s->codec_id != AV_CODEC_ID_FLV1) {
445         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
446         return -1;
447     }
448
449     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
450         av_log(avctx, AV_LOG_ERROR,
451                "OBMC is only supported with simple mb decision\n");
452         return -1;
453     }
454
455     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
456         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
457         return -1;
458     }
459
460     if (s->max_b_frames                    &&
461         s->codec_id != AV_CODEC_ID_MPEG4      &&
462         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
463         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
464         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
465         return -1;
466     }
467     if (s->max_b_frames < 0) {
468         av_log(avctx, AV_LOG_ERROR,
469                "max b frames must be 0 or positive for mpegvideo based encoders\n");
470         return -1;
471     }
472
473     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
474          s->codec_id == AV_CODEC_ID_H263  ||
475          s->codec_id == AV_CODEC_ID_H263P) &&
476         (avctx->sample_aspect_ratio.num > 255 ||
477          avctx->sample_aspect_ratio.den > 255)) {
478         av_log(avctx, AV_LOG_WARNING,
479                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
480                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
481         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
482                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
483     }
484
485     if ((s->codec_id == AV_CODEC_ID_H263  ||
486          s->codec_id == AV_CODEC_ID_H263P) &&
487         (avctx->width  > 2048 ||
488          avctx->height > 1152 )) {
489         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
490         return -1;
491     }
492     if ((s->codec_id == AV_CODEC_ID_H263  ||
493          s->codec_id == AV_CODEC_ID_H263P) &&
494         ((avctx->width &3) ||
495          (avctx->height&3) )) {
496         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
497         return -1;
498     }
499
500     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
501         (avctx->width  > 4095 ||
502          avctx->height > 4095 )) {
503         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
504         return -1;
505     }
506
507     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
508         (avctx->width  > 16383 ||
509          avctx->height > 16383 )) {
510         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
511         return -1;
512     }
513
514     if (s->codec_id == AV_CODEC_ID_RV10 &&
515         (avctx->width &15 ||
516          avctx->height&15 )) {
517         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
518         return AVERROR(EINVAL);
519     }
520
521     if (s->codec_id == AV_CODEC_ID_RV20 &&
522         (avctx->width &3 ||
523          avctx->height&3 )) {
524         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
525         return AVERROR(EINVAL);
526     }
527
528     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
529          s->codec_id == AV_CODEC_ID_WMV2) &&
530          avctx->width & 1) {
531          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
532          return -1;
533     }
534
535     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
536         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
537         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
538         return -1;
539     }
540
541     // FIXME mpeg2 uses that too
542     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
543                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
544         av_log(avctx, AV_LOG_ERROR,
545                "mpeg2 style quantization not supported by codec\n");
546         return -1;
547     }
548
549     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
550         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
551         return -1;
552     }
553
554     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
555         s->avctx->mb_decision != FF_MB_DECISION_RD) {
556         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
557         return -1;
558     }
559
560     if (s->avctx->scenechange_threshold < 1000000000 &&
561         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
562         av_log(avctx, AV_LOG_ERROR,
563                "closed gop with scene change detection are not supported yet, "
564                "set threshold to 1000000000\n");
565         return -1;
566     }
567
568     if (s->flags & CODEC_FLAG_LOW_DELAY) {
569         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
570             av_log(avctx, AV_LOG_ERROR,
571                   "low delay forcing is only available for mpeg2\n");
572             return -1;
573         }
574         if (s->max_b_frames != 0) {
575             av_log(avctx, AV_LOG_ERROR,
576                    "b frames cannot be used with low delay\n");
577             return -1;
578         }
579     }
580
581     if (s->q_scale_type == 1) {
582         if (avctx->qmax > 12) {
583             av_log(avctx, AV_LOG_ERROR,
584                    "non linear quant only supports qmax <= 12 currently\n");
585             return -1;
586         }
587     }
588
589     if (s->avctx->thread_count > 1         &&
590         s->codec_id != AV_CODEC_ID_MPEG4      &&
591         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
592         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
593         s->codec_id != AV_CODEC_ID_MJPEG      &&
594         (s->codec_id != AV_CODEC_ID_H263P)) {
595         av_log(avctx, AV_LOG_ERROR,
596                "multi threaded encoding not supported by codec\n");
597         return -1;
598     }
599
600     if (s->avctx->thread_count < 1) {
601         av_log(avctx, AV_LOG_ERROR,
602                "automatic thread number detection not supported by codec, "
603                "patch welcome\n");
604         return -1;
605     }
606
607     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
608         s->rtp_mode = 1;
609
610     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
611         s->h263_slice_structured = 1;
612
613     if (!avctx->time_base.den || !avctx->time_base.num) {
614         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
615         return -1;
616     }
617
618     i = (INT_MAX / 2 + 128) >> 8;
619     if (avctx->mb_threshold >= i) {
620         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
621                i - 1);
622         return -1;
623     }
624
625     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
626         av_log(avctx, AV_LOG_INFO,
627                "notice: b_frame_strategy only affects the first pass\n");
628         avctx->b_frame_strategy = 0;
629     }
630
631     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
632     if (i > 1) {
633         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
634         avctx->time_base.den /= i;
635         avctx->time_base.num /= i;
636         //return -1;
637     }
638
639     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
640         // (a + x * 3 / 8) / x
641         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
642         s->inter_quant_bias = 0;
643     } else {
644         s->intra_quant_bias = 0;
645         // (a - x / 4) / x
646         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
647     }
648
649     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
650         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
651         return AVERROR(EINVAL);
652     }
653
654     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
655         s->intra_quant_bias = avctx->intra_quant_bias;
656     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
657         s->inter_quant_bias = avctx->inter_quant_bias;
658
659     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
660
661     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
662         s->avctx->time_base.den > (1 << 16) - 1) {
663         av_log(avctx, AV_LOG_ERROR,
664                "timebase %d/%d not supported by MPEG 4 standard, "
665                "the maximum admitted value for the timebase denominator "
666                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
667                (1 << 16) - 1);
668         return -1;
669     }
670     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
671
672     switch (avctx->codec->id) {
673     case AV_CODEC_ID_MPEG1VIDEO:
674         s->out_format = FMT_MPEG1;
675         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
676         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
677         break;
678     case AV_CODEC_ID_MPEG2VIDEO:
679         s->out_format = FMT_MPEG1;
680         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
681         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
682         s->rtp_mode   = 1;
683         break;
684     case AV_CODEC_ID_MJPEG:
685     case AV_CODEC_ID_AMV:
686         s->out_format = FMT_MJPEG;
687         s->intra_only = 1; /* force intra only for jpeg */
688         if (!CONFIG_MJPEG_ENCODER ||
689             ff_mjpeg_encode_init(s) < 0)
690             return -1;
691         avctx->delay = 0;
692         s->low_delay = 1;
693         break;
694     case AV_CODEC_ID_H261:
695         if (!CONFIG_H261_ENCODER)
696             return -1;
697         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
698             av_log(avctx, AV_LOG_ERROR,
699                    "The specified picture size of %dx%d is not valid for the "
700                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
701                     s->width, s->height);
702             return -1;
703         }
704         s->out_format = FMT_H261;
705         avctx->delay  = 0;
706         s->low_delay  = 1;
707         break;
708     case AV_CODEC_ID_H263:
709         if (!CONFIG_H263_ENCODER)
710             return -1;
711         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
712                              s->width, s->height) == 8) {
713             av_log(avctx, AV_LOG_ERROR,
714                    "The specified picture size of %dx%d is not valid for "
715                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
716                    "352x288, 704x576, and 1408x1152. "
717                    "Try H.263+.\n", s->width, s->height);
718             return -1;
719         }
720         s->out_format = FMT_H263;
721         avctx->delay  = 0;
722         s->low_delay  = 1;
723         break;
724     case AV_CODEC_ID_H263P:
725         s->out_format = FMT_H263;
726         s->h263_plus  = 1;
727         /* Fx */
728         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
729         s->modified_quant  = s->h263_aic;
730         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
731         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
732
733         /* /Fx */
734         /* These are just to be sure */
735         avctx->delay = 0;
736         s->low_delay = 1;
737         break;
738     case AV_CODEC_ID_FLV1:
739         s->out_format      = FMT_H263;
740         s->h263_flv        = 2; /* format = 1; 11-bit codes */
741         s->unrestricted_mv = 1;
742         s->rtp_mode  = 0; /* don't allow GOB */
743         avctx->delay = 0;
744         s->low_delay = 1;
745         break;
746     case AV_CODEC_ID_RV10:
747         s->out_format = FMT_H263;
748         avctx->delay  = 0;
749         s->low_delay  = 1;
750         break;
751     case AV_CODEC_ID_RV20:
752         s->out_format      = FMT_H263;
753         avctx->delay       = 0;
754         s->low_delay       = 1;
755         s->modified_quant  = 1;
756         s->h263_aic        = 1;
757         s->h263_plus       = 1;
758         s->loop_filter     = 1;
759         s->unrestricted_mv = 0;
760         break;
761     case AV_CODEC_ID_MPEG4:
762         s->out_format      = FMT_H263;
763         s->h263_pred       = 1;
764         s->unrestricted_mv = 1;
765         s->low_delay       = s->max_b_frames ? 0 : 1;
766         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
767         break;
768     case AV_CODEC_ID_MSMPEG4V2:
769         s->out_format      = FMT_H263;
770         s->h263_pred       = 1;
771         s->unrestricted_mv = 1;
772         s->msmpeg4_version = 2;
773         avctx->delay       = 0;
774         s->low_delay       = 1;
775         break;
776     case AV_CODEC_ID_MSMPEG4V3:
777         s->out_format        = FMT_H263;
778         s->h263_pred         = 1;
779         s->unrestricted_mv   = 1;
780         s->msmpeg4_version   = 3;
781         s->flipflop_rounding = 1;
782         avctx->delay         = 0;
783         s->low_delay         = 1;
784         break;
785     case AV_CODEC_ID_WMV1:
786         s->out_format        = FMT_H263;
787         s->h263_pred         = 1;
788         s->unrestricted_mv   = 1;
789         s->msmpeg4_version   = 4;
790         s->flipflop_rounding = 1;
791         avctx->delay         = 0;
792         s->low_delay         = 1;
793         break;
794     case AV_CODEC_ID_WMV2:
795         s->out_format        = FMT_H263;
796         s->h263_pred         = 1;
797         s->unrestricted_mv   = 1;
798         s->msmpeg4_version   = 5;
799         s->flipflop_rounding = 1;
800         avctx->delay         = 0;
801         s->low_delay         = 1;
802         break;
803     default:
804         return -1;
805     }
806
807     avctx->has_b_frames = !s->low_delay;
808
809     s->encoding = 1;
810
811     s->progressive_frame    =
812     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
813                                                 CODEC_FLAG_INTERLACED_ME) ||
814                                 s->alternate_scan);
815
816     /* init */
817     if (ff_MPV_common_init(s) < 0)
818         return -1;
819
820     ff_qpeldsp_init(&s->qdsp);
821
822     s->avctx->coded_frame = s->current_picture.f;
823
824     if (s->msmpeg4_version) {
825         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
826                           2 * 2 * (MAX_LEVEL + 1) *
827                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
828     }
829     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
830
831     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
832     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
833     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
834     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
835     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
836     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
837     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
838                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
839     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
840                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
841
842     if (s->avctx->noise_reduction) {
843         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
844                           2 * 64 * sizeof(uint16_t), fail);
845     }
846
847     ff_dct_encode_init(s);
848
849     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
850         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
851
852     s->quant_precision = 5;
853
854     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
855     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
856
857     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
858         ff_h261_encode_init(s);
859     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
860         ff_h263_encode_init(s);
861     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
862         ff_msmpeg4_encode_init(s);
863     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
864         && s->out_format == FMT_MPEG1)
865         ff_mpeg1_encode_init(s);
866
867     /* init q matrix */
868     for (i = 0; i < 64; i++) {
869         int j = s->dsp.idct_permutation[i];
870         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
871             s->mpeg_quant) {
872             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
873             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
874         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
875             s->intra_matrix[j] =
876             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
877         } else {
878             /* mpeg1/2 */
879             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
880             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
881         }
882         if (s->avctx->intra_matrix)
883             s->intra_matrix[j] = s->avctx->intra_matrix[i];
884         if (s->avctx->inter_matrix)
885             s->inter_matrix[j] = s->avctx->inter_matrix[i];
886     }
887
888     /* precompute matrix */
889     /* for mjpeg, we do include qscale in the matrix */
890     if (s->out_format != FMT_MJPEG) {
891         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
892                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
893                           31, 1);
894         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
895                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
896                           31, 0);
897     }
898
899     if (ff_rate_control_init(s) < 0)
900         return -1;
901
902 #if FF_API_ERROR_RATE
903     FF_DISABLE_DEPRECATION_WARNINGS
904     if (avctx->error_rate)
905         s->error_rate = avctx->error_rate;
906     FF_ENABLE_DEPRECATION_WARNINGS;
907 #endif
908
909 #if FF_API_NORMALIZE_AQP
910     FF_DISABLE_DEPRECATION_WARNINGS
911     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
912         s->mpv_flags |= FF_MPV_FLAG_NAQ;
913     FF_ENABLE_DEPRECATION_WARNINGS;
914 #endif
915
916 #if FF_API_MV0
917     FF_DISABLE_DEPRECATION_WARNINGS
918     if (avctx->flags & CODEC_FLAG_MV0)
919         s->mpv_flags |= FF_MPV_FLAG_MV0;
920     FF_ENABLE_DEPRECATION_WARNINGS
921 #endif
922
923     if (avctx->b_frame_strategy == 2) {
924         for (i = 0; i < s->max_b_frames + 2; i++) {
925             s->tmp_frames[i] = av_frame_alloc();
926             if (!s->tmp_frames[i])
927                 return AVERROR(ENOMEM);
928
929             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
930             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
931             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
932
933             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
934             if (ret < 0)
935                 return ret;
936         }
937     }
938
939     return 0;
940 fail:
941     ff_MPV_encode_end(avctx);
942     return AVERROR_UNKNOWN;
943 }
944
945 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
946 {
947     MpegEncContext *s = avctx->priv_data;
948     int i;
949
950     ff_rate_control_uninit(s);
951
952     ff_MPV_common_end(s);
953     if (CONFIG_MJPEG_ENCODER &&
954         s->out_format == FMT_MJPEG)
955         ff_mjpeg_encode_close(s);
956
957     av_freep(&avctx->extradata);
958
959     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
960         av_frame_free(&s->tmp_frames[i]);
961
962     ff_free_picture_tables(&s->new_picture);
963     ff_mpeg_unref_picture(s, &s->new_picture);
964
965     av_freep(&s->avctx->stats_out);
966     av_freep(&s->ac_stats);
967
968     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
969     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
970     s->q_chroma_intra_matrix=   NULL;
971     s->q_chroma_intra_matrix16= NULL;
972     av_freep(&s->q_intra_matrix);
973     av_freep(&s->q_inter_matrix);
974     av_freep(&s->q_intra_matrix16);
975     av_freep(&s->q_inter_matrix16);
976     av_freep(&s->input_picture);
977     av_freep(&s->reordered_input_picture);
978     av_freep(&s->dct_offset);
979
980     return 0;
981 }
982
983 static int get_sae(uint8_t *src, int ref, int stride)
984 {
985     int x,y;
986     int acc = 0;
987
988     for (y = 0; y < 16; y++) {
989         for (x = 0; x < 16; x++) {
990             acc += FFABS(src[x + y * stride] - ref);
991         }
992     }
993
994     return acc;
995 }
996
997 static int get_intra_count(MpegEncContext *s, uint8_t *src,
998                            uint8_t *ref, int stride)
999 {
1000     int x, y, w, h;
1001     int acc = 0;
1002
1003     w = s->width  & ~15;
1004     h = s->height & ~15;
1005
1006     for (y = 0; y < h; y += 16) {
1007         for (x = 0; x < w; x += 16) {
1008             int offset = x + y * stride;
1009             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
1010                                      16);
1011             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
1012             int sae  = get_sae(src + offset, mean, stride);
1013
1014             acc += sae + 500 < sad;
1015         }
1016     }
1017     return acc;
1018 }
1019
1020
1021 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1022 {
1023     Picture *pic = NULL;
1024     int64_t pts;
1025     int i, display_picture_number = 0, ret;
1026     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1027                                                  (s->low_delay ? 0 : 1);
1028     int direct = 1;
1029
1030     if (pic_arg) {
1031         pts = pic_arg->pts;
1032         display_picture_number = s->input_picture_number++;
1033
1034         if (pts != AV_NOPTS_VALUE) {
1035             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1036                 int64_t last = s->user_specified_pts;
1037
1038                 if (pts <= last) {
1039                     av_log(s->avctx, AV_LOG_ERROR,
1040                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1041                            pts, last);
1042                     return AVERROR(EINVAL);
1043                 }
1044
1045                 if (!s->low_delay && display_picture_number == 1)
1046                     s->dts_delta = pts - last;
1047             }
1048             s->user_specified_pts = pts;
1049         } else {
1050             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1051                 s->user_specified_pts =
1052                 pts = s->user_specified_pts + 1;
1053                 av_log(s->avctx, AV_LOG_INFO,
1054                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1055                        pts);
1056             } else {
1057                 pts = display_picture_number;
1058             }
1059         }
1060     }
1061
1062     if (pic_arg) {
1063         if (!pic_arg->buf[0])
1064             direct = 0;
1065         if (pic_arg->linesize[0] != s->linesize)
1066             direct = 0;
1067         if (pic_arg->linesize[1] != s->uvlinesize)
1068             direct = 0;
1069         if (pic_arg->linesize[2] != s->uvlinesize)
1070             direct = 0;
1071         if ((s->width & 15) || (s->height & 15))
1072             direct = 0;
1073         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1074             direct = 0;
1075         if (s->linesize & (STRIDE_ALIGN-1))
1076             direct = 0;
1077
1078         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1079                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1080
1081         if (direct) {
1082             i = ff_find_unused_picture(s, 1);
1083             if (i < 0)
1084                 return i;
1085
1086             pic = &s->picture[i];
1087             pic->reference = 3;
1088
1089             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1090                 return ret;
1091             if (ff_alloc_picture(s, pic, 1) < 0) {
1092                 return -1;
1093             }
1094         } else {
1095             i = ff_find_unused_picture(s, 0);
1096             if (i < 0)
1097                 return i;
1098
1099             pic = &s->picture[i];
1100             pic->reference = 3;
1101
1102             if (ff_alloc_picture(s, pic, 0) < 0) {
1103                 return -1;
1104             }
1105
1106             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1107                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1108                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1109                 // empty
1110             } else {
1111                 int h_chroma_shift, v_chroma_shift;
1112                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1113                                                  &h_chroma_shift,
1114                                                  &v_chroma_shift);
1115
1116                 for (i = 0; i < 3; i++) {
1117                     int src_stride = pic_arg->linesize[i];
1118                     int dst_stride = i ? s->uvlinesize : s->linesize;
1119                     int h_shift = i ? h_chroma_shift : 0;
1120                     int v_shift = i ? v_chroma_shift : 0;
1121                     int w = s->width  >> h_shift;
1122                     int h = s->height >> v_shift;
1123                     uint8_t *src = pic_arg->data[i];
1124                     uint8_t *dst = pic->f->data[i];
1125                     int vpad = 16;
1126
1127                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1128                         && !s->progressive_sequence)
1129                         vpad = 32;
1130
1131                     if (!s->avctx->rc_buffer_size)
1132                         dst += INPLACE_OFFSET;
1133
1134                     if (src_stride == dst_stride)
1135                         memcpy(dst, src, src_stride * h);
1136                     else {
1137                         int h2 = h;
1138                         uint8_t *dst2 = dst;
1139                         while (h2--) {
1140                             memcpy(dst2, src, w);
1141                             dst2 += dst_stride;
1142                             src += src_stride;
1143                         }
1144                     }
1145                     if ((s->width & 15) || (s->height & (vpad-1))) {
1146                         s->dsp.draw_edges(dst, dst_stride,
1147                                           w, h,
1148                                           16>>h_shift,
1149                                           vpad>>v_shift,
1150                                           EDGE_BOTTOM);
1151                     }
1152                 }
1153             }
1154         }
1155         ret = av_frame_copy_props(pic->f, pic_arg);
1156         if (ret < 0)
1157             return ret;
1158
1159         pic->f->display_picture_number = display_picture_number;
1160         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1161     }
1162
1163     /* shift buffer entries */
1164     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1165         s->input_picture[i - 1] = s->input_picture[i];
1166
1167     s->input_picture[encoding_delay] = (Picture*) pic;
1168
1169     return 0;
1170 }
1171
1172 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1173 {
1174     int x, y, plane;
1175     int score = 0;
1176     int64_t score64 = 0;
1177
1178     for (plane = 0; plane < 3; plane++) {
1179         const int stride = p->f->linesize[plane];
1180         const int bw = plane ? 1 : 2;
1181         for (y = 0; y < s->mb_height * bw; y++) {
1182             for (x = 0; x < s->mb_width * bw; x++) {
1183                 int off = p->shared ? 0 : 16;
1184                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1185                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1186                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1187
1188                 switch (FFABS(s->avctx->frame_skip_exp)) {
1189                 case 0: score    =  FFMAX(score, v);          break;
1190                 case 1: score   += FFABS(v);                  break;
1191                 case 2: score64 += v * (int64_t)v;                       break;
1192                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1193                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1194                 }
1195             }
1196         }
1197     }
1198     emms_c();
1199
1200     if (score)
1201         score64 = score;
1202     if (s->avctx->frame_skip_exp < 0)
1203         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1204                       -1.0/s->avctx->frame_skip_exp);
1205
1206     if (score64 < s->avctx->frame_skip_threshold)
1207         return 1;
1208     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1209         return 1;
1210     return 0;
1211 }
1212
1213 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1214 {
1215     AVPacket pkt = { 0 };
1216     int ret, got_output;
1217
1218     av_init_packet(&pkt);
1219     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1220     if (ret < 0)
1221         return ret;
1222
1223     ret = pkt.size;
1224     av_free_packet(&pkt);
1225     return ret;
1226 }
1227
1228 static int estimate_best_b_count(MpegEncContext *s)
1229 {
1230     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1231     AVCodecContext *c = avcodec_alloc_context3(NULL);
1232     const int scale = s->avctx->brd_scale;
1233     int i, j, out_size, p_lambda, b_lambda, lambda2;
1234     int64_t best_rd  = INT64_MAX;
1235     int best_b_count = -1;
1236
1237     av_assert0(scale >= 0 && scale <= 3);
1238
1239     //emms_c();
1240     //s->next_picture_ptr->quality;
1241     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1242     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1243     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1244     if (!b_lambda) // FIXME we should do this somewhere else
1245         b_lambda = p_lambda;
1246     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1247                FF_LAMBDA_SHIFT;
1248
1249     c->width        = s->width  >> scale;
1250     c->height       = s->height >> scale;
1251     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1252     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1253     c->mb_decision  = s->avctx->mb_decision;
1254     c->me_cmp       = s->avctx->me_cmp;
1255     c->mb_cmp       = s->avctx->mb_cmp;
1256     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1257     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1258     c->time_base    = s->avctx->time_base;
1259     c->max_b_frames = s->max_b_frames;
1260
1261     if (avcodec_open2(c, codec, NULL) < 0)
1262         return -1;
1263
1264     for (i = 0; i < s->max_b_frames + 2; i++) {
1265         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1266                                                 s->next_picture_ptr;
1267
1268         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1269             pre_input = *pre_input_ptr;
1270
1271             if (!pre_input.shared && i) {
1272                 pre_input.f->data[0] += INPLACE_OFFSET;
1273                 pre_input.f->data[1] += INPLACE_OFFSET;
1274                 pre_input.f->data[2] += INPLACE_OFFSET;
1275             }
1276
1277             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1278                                  pre_input.f->data[0], pre_input.f->linesize[0],
1279                                  c->width,      c->height);
1280             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1281                                  pre_input.f->data[1], pre_input.f->linesize[1],
1282                                  c->width >> 1, c->height >> 1);
1283             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1284                                  pre_input.f->data[2], pre_input.f->linesize[2],
1285                                  c->width >> 1, c->height >> 1);
1286         }
1287     }
1288
1289     for (j = 0; j < s->max_b_frames + 1; j++) {
1290         int64_t rd = 0;
1291
1292         if (!s->input_picture[j])
1293             break;
1294
1295         c->error[0] = c->error[1] = c->error[2] = 0;
1296
1297         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1298         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1299
1300         out_size = encode_frame(c, s->tmp_frames[0]);
1301
1302         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1303
1304         for (i = 0; i < s->max_b_frames + 1; i++) {
1305             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1306
1307             s->tmp_frames[i + 1]->pict_type = is_p ?
1308                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1309             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1310
1311             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1312
1313             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1314         }
1315
1316         /* get the delayed frames */
1317         while (out_size) {
1318             out_size = encode_frame(c, NULL);
1319             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1320         }
1321
1322         rd += c->error[0] + c->error[1] + c->error[2];
1323
1324         if (rd < best_rd) {
1325             best_rd = rd;
1326             best_b_count = j;
1327         }
1328     }
1329
1330     avcodec_close(c);
1331     av_freep(&c);
1332
1333     return best_b_count;
1334 }
1335
1336 static int select_input_picture(MpegEncContext *s)
1337 {
1338     int i, ret;
1339
1340     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1341         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1342     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1343
1344     /* set next picture type & ordering */
1345     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1346         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1347             if (s->picture_in_gop_number < s->gop_size &&
1348                 s->next_picture_ptr &&
1349                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1350                 // FIXME check that te gop check above is +-1 correct
1351                 av_frame_unref(s->input_picture[0]->f);
1352
1353                 ff_vbv_update(s, 0);
1354
1355                 goto no_output_pic;
1356             }
1357         }
1358
1359         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1360             s->next_picture_ptr == NULL || s->intra_only) {
1361             s->reordered_input_picture[0] = s->input_picture[0];
1362             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1363             s->reordered_input_picture[0]->f->coded_picture_number =
1364                 s->coded_picture_number++;
1365         } else {
1366             int b_frames;
1367
1368             if (s->flags & CODEC_FLAG_PASS2) {
1369                 for (i = 0; i < s->max_b_frames + 1; i++) {
1370                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1371
1372                     if (pict_num >= s->rc_context.num_entries)
1373                         break;
1374                     if (!s->input_picture[i]) {
1375                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1376                         break;
1377                     }
1378
1379                     s->input_picture[i]->f->pict_type =
1380                         s->rc_context.entry[pict_num].new_pict_type;
1381                 }
1382             }
1383
1384             if (s->avctx->b_frame_strategy == 0) {
1385                 b_frames = s->max_b_frames;
1386                 while (b_frames && !s->input_picture[b_frames])
1387                     b_frames--;
1388             } else if (s->avctx->b_frame_strategy == 1) {
1389                 for (i = 1; i < s->max_b_frames + 1; i++) {
1390                     if (s->input_picture[i] &&
1391                         s->input_picture[i]->b_frame_score == 0) {
1392                         s->input_picture[i]->b_frame_score =
1393                             get_intra_count(s,
1394                                             s->input_picture[i    ]->f->data[0],
1395                                             s->input_picture[i - 1]->f->data[0],
1396                                             s->linesize) + 1;
1397                     }
1398                 }
1399                 for (i = 0; i < s->max_b_frames + 1; i++) {
1400                     if (s->input_picture[i] == NULL ||
1401                         s->input_picture[i]->b_frame_score - 1 >
1402                             s->mb_num / s->avctx->b_sensitivity)
1403                         break;
1404                 }
1405
1406                 b_frames = FFMAX(0, i - 1);
1407
1408                 /* reset scores */
1409                 for (i = 0; i < b_frames + 1; i++) {
1410                     s->input_picture[i]->b_frame_score = 0;
1411                 }
1412             } else if (s->avctx->b_frame_strategy == 2) {
1413                 b_frames = estimate_best_b_count(s);
1414             } else {
1415                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1416                 b_frames = 0;
1417             }
1418
1419             emms_c();
1420
1421             for (i = b_frames - 1; i >= 0; i--) {
1422                 int type = s->input_picture[i]->f->pict_type;
1423                 if (type && type != AV_PICTURE_TYPE_B)
1424                     b_frames = i;
1425             }
1426             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1427                 b_frames == s->max_b_frames) {
1428                 av_log(s->avctx, AV_LOG_ERROR,
1429                        "warning, too many b frames in a row\n");
1430             }
1431
1432             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1433                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1434                     s->gop_size > s->picture_in_gop_number) {
1435                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1436                 } else {
1437                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1438                         b_frames = 0;
1439                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1440                 }
1441             }
1442
1443             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1444                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1445                 b_frames--;
1446
1447             s->reordered_input_picture[0] = s->input_picture[b_frames];
1448             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1449                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1450             s->reordered_input_picture[0]->f->coded_picture_number =
1451                 s->coded_picture_number++;
1452             for (i = 0; i < b_frames; i++) {
1453                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1454                 s->reordered_input_picture[i + 1]->f->pict_type =
1455                     AV_PICTURE_TYPE_B;
1456                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1457                     s->coded_picture_number++;
1458             }
1459         }
1460     }
1461 no_output_pic:
1462     if (s->reordered_input_picture[0]) {
1463         s->reordered_input_picture[0]->reference =
1464            s->reordered_input_picture[0]->f->pict_type !=
1465                AV_PICTURE_TYPE_B ? 3 : 0;
1466
1467         ff_mpeg_unref_picture(s, &s->new_picture);
1468         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1469             return ret;
1470
1471         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1472             // input is a shared pix, so we can't modifiy it -> alloc a new
1473             // one & ensure that the shared one is reuseable
1474
1475             Picture *pic;
1476             int i = ff_find_unused_picture(s, 0);
1477             if (i < 0)
1478                 return i;
1479             pic = &s->picture[i];
1480
1481             pic->reference = s->reordered_input_picture[0]->reference;
1482             if (ff_alloc_picture(s, pic, 0) < 0) {
1483                 return -1;
1484             }
1485
1486             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1487             if (ret < 0)
1488                 return ret;
1489
1490             /* mark us unused / free shared pic */
1491             av_frame_unref(s->reordered_input_picture[0]->f);
1492             s->reordered_input_picture[0]->shared = 0;
1493
1494             s->current_picture_ptr = pic;
1495         } else {
1496             // input is not a shared pix -> reuse buffer for current_pix
1497             s->current_picture_ptr = s->reordered_input_picture[0];
1498             for (i = 0; i < 4; i++) {
1499                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1500             }
1501         }
1502         ff_mpeg_unref_picture(s, &s->current_picture);
1503         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1504                                        s->current_picture_ptr)) < 0)
1505             return ret;
1506
1507         s->picture_number = s->new_picture.f->display_picture_number;
1508     } else {
1509         ff_mpeg_unref_picture(s, &s->new_picture);
1510     }
1511     return 0;
1512 }
1513
1514 static void frame_end(MpegEncContext *s)
1515 {
1516     if (s->unrestricted_mv &&
1517         s->current_picture.reference &&
1518         !s->intra_only) {
1519         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1520         int hshift = desc->log2_chroma_w;
1521         int vshift = desc->log2_chroma_h;
1522         s->dsp.draw_edges(s->current_picture.f->data[0], s->current_picture.f->linesize[0],
1523                           s->h_edge_pos, s->v_edge_pos,
1524                           EDGE_WIDTH, EDGE_WIDTH,
1525                           EDGE_TOP | EDGE_BOTTOM);
1526         s->dsp.draw_edges(s->current_picture.f->data[1], s->current_picture.f->linesize[1],
1527                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1528                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1529                           EDGE_TOP | EDGE_BOTTOM);
1530         s->dsp.draw_edges(s->current_picture.f->data[2], s->current_picture.f->linesize[2],
1531                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1532                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1533                           EDGE_TOP | EDGE_BOTTOM);
1534     }
1535
1536     emms_c();
1537
1538     s->last_pict_type                 = s->pict_type;
1539     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1540     if (s->pict_type!= AV_PICTURE_TYPE_B)
1541         s->last_non_b_pict_type = s->pict_type;
1542
1543     s->avctx->coded_frame = s->current_picture_ptr->f;
1544
1545 }
1546
1547 static void update_noise_reduction(MpegEncContext *s)
1548 {
1549     int intra, i;
1550
1551     for (intra = 0; intra < 2; intra++) {
1552         if (s->dct_count[intra] > (1 << 16)) {
1553             for (i = 0; i < 64; i++) {
1554                 s->dct_error_sum[intra][i] >>= 1;
1555             }
1556             s->dct_count[intra] >>= 1;
1557         }
1558
1559         for (i = 0; i < 64; i++) {
1560             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1561                                        s->dct_count[intra] +
1562                                        s->dct_error_sum[intra][i] / 2) /
1563                                       (s->dct_error_sum[intra][i] + 1);
1564         }
1565     }
1566 }
1567
1568 static int frame_start(MpegEncContext *s)
1569 {
1570     int ret;
1571
1572     /* mark & release old frames */
1573     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1574         s->last_picture_ptr != s->next_picture_ptr &&
1575         s->last_picture_ptr->f->buf[0]) {
1576         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1577     }
1578
1579     s->current_picture_ptr->f->pict_type = s->pict_type;
1580     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1581
1582     ff_mpeg_unref_picture(s, &s->current_picture);
1583     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1584                                    s->current_picture_ptr)) < 0)
1585         return ret;
1586
1587     if (s->pict_type != AV_PICTURE_TYPE_B) {
1588         s->last_picture_ptr = s->next_picture_ptr;
1589         if (!s->droppable)
1590             s->next_picture_ptr = s->current_picture_ptr;
1591     }
1592
1593     if (s->last_picture_ptr) {
1594         ff_mpeg_unref_picture(s, &s->last_picture);
1595         if (s->last_picture_ptr->f->buf[0] &&
1596             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1597                                        s->last_picture_ptr)) < 0)
1598             return ret;
1599     }
1600     if (s->next_picture_ptr) {
1601         ff_mpeg_unref_picture(s, &s->next_picture);
1602         if (s->next_picture_ptr->f->buf[0] &&
1603             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1604                                        s->next_picture_ptr)) < 0)
1605             return ret;
1606     }
1607
1608     if (s->picture_structure!= PICT_FRAME) {
1609         int i;
1610         for (i = 0; i < 4; i++) {
1611             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1612                 s->current_picture.f->data[i] +=
1613                     s->current_picture.f->linesize[i];
1614             }
1615             s->current_picture.f->linesize[i] *= 2;
1616             s->last_picture.f->linesize[i]    *= 2;
1617             s->next_picture.f->linesize[i]    *= 2;
1618         }
1619     }
1620
1621     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1622         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1623         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1624     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1625         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1626         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1627     } else {
1628         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1629         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1630     }
1631
1632     if (s->dct_error_sum) {
1633         av_assert2(s->avctx->noise_reduction && s->encoding);
1634         update_noise_reduction(s);
1635     }
1636
1637     return 0;
1638 }
1639
1640 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1641                           const AVFrame *pic_arg, int *got_packet)
1642 {
1643     MpegEncContext *s = avctx->priv_data;
1644     int i, stuffing_count, ret;
1645     int context_count = s->slice_context_count;
1646
1647     s->picture_in_gop_number++;
1648
1649     if (load_input_picture(s, pic_arg) < 0)
1650         return -1;
1651
1652     if (select_input_picture(s) < 0) {
1653         return -1;
1654     }
1655
1656     /* output? */
1657     if (s->new_picture.f->data[0]) {
1658         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1659             return ret;
1660         if (s->mb_info) {
1661             s->mb_info_ptr = av_packet_new_side_data(pkt,
1662                                  AV_PKT_DATA_H263_MB_INFO,
1663                                  s->mb_width*s->mb_height*12);
1664             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1665         }
1666
1667         for (i = 0; i < context_count; i++) {
1668             int start_y = s->thread_context[i]->start_mb_y;
1669             int   end_y = s->thread_context[i]->  end_mb_y;
1670             int h       = s->mb_height;
1671             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1672             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1673
1674             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1675         }
1676
1677         s->pict_type = s->new_picture.f->pict_type;
1678         //emms_c();
1679         ret = frame_start(s);
1680         if (ret < 0)
1681             return ret;
1682 vbv_retry:
1683         if (encode_picture(s, s->picture_number) < 0)
1684             return -1;
1685
1686         avctx->header_bits = s->header_bits;
1687         avctx->mv_bits     = s->mv_bits;
1688         avctx->misc_bits   = s->misc_bits;
1689         avctx->i_tex_bits  = s->i_tex_bits;
1690         avctx->p_tex_bits  = s->p_tex_bits;
1691         avctx->i_count     = s->i_count;
1692         // FIXME f/b_count in avctx
1693         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1694         avctx->skip_count  = s->skip_count;
1695
1696         frame_end(s);
1697
1698         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1699             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1700
1701         if (avctx->rc_buffer_size) {
1702             RateControlContext *rcc = &s->rc_context;
1703             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1704
1705             if (put_bits_count(&s->pb) > max_size &&
1706                 s->lambda < s->avctx->lmax) {
1707                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1708                                        (s->qscale + 1) / s->qscale);
1709                 if (s->adaptive_quant) {
1710                     int i;
1711                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1712                         s->lambda_table[i] =
1713                             FFMAX(s->lambda_table[i] + 1,
1714                                   s->lambda_table[i] * (s->qscale + 1) /
1715                                   s->qscale);
1716                 }
1717                 s->mb_skipped = 0;        // done in frame_start()
1718                 // done in encode_picture() so we must undo it
1719                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1720                     if (s->flipflop_rounding          ||
1721                         s->codec_id == AV_CODEC_ID_H263P ||
1722                         s->codec_id == AV_CODEC_ID_MPEG4)
1723                         s->no_rounding ^= 1;
1724                 }
1725                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1726                     s->time_base       = s->last_time_base;
1727                     s->last_non_b_time = s->time - s->pp_time;
1728                 }
1729                 for (i = 0; i < context_count; i++) {
1730                     PutBitContext *pb = &s->thread_context[i]->pb;
1731                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1732                 }
1733                 goto vbv_retry;
1734             }
1735
1736             av_assert0(s->avctx->rc_max_rate);
1737         }
1738
1739         if (s->flags & CODEC_FLAG_PASS1)
1740             ff_write_pass1_stats(s);
1741
1742         for (i = 0; i < 4; i++) {
1743             s->current_picture_ptr->f->error[i] =
1744             s->current_picture.f->error[i] =
1745                 s->current_picture.error[i];
1746             avctx->error[i] += s->current_picture_ptr->f->error[i];
1747         }
1748
1749         if (s->flags & CODEC_FLAG_PASS1)
1750             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1751                    avctx->i_tex_bits + avctx->p_tex_bits ==
1752                        put_bits_count(&s->pb));
1753         flush_put_bits(&s->pb);
1754         s->frame_bits  = put_bits_count(&s->pb);
1755
1756         stuffing_count = ff_vbv_update(s, s->frame_bits);
1757         s->stuffing_bits = 8*stuffing_count;
1758         if (stuffing_count) {
1759             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1760                     stuffing_count + 50) {
1761                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1762                 return -1;
1763             }
1764
1765             switch (s->codec_id) {
1766             case AV_CODEC_ID_MPEG1VIDEO:
1767             case AV_CODEC_ID_MPEG2VIDEO:
1768                 while (stuffing_count--) {
1769                     put_bits(&s->pb, 8, 0);
1770                 }
1771             break;
1772             case AV_CODEC_ID_MPEG4:
1773                 put_bits(&s->pb, 16, 0);
1774                 put_bits(&s->pb, 16, 0x1C3);
1775                 stuffing_count -= 4;
1776                 while (stuffing_count--) {
1777                     put_bits(&s->pb, 8, 0xFF);
1778                 }
1779             break;
1780             default:
1781                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1782             }
1783             flush_put_bits(&s->pb);
1784             s->frame_bits  = put_bits_count(&s->pb);
1785         }
1786
1787         /* update mpeg1/2 vbv_delay for CBR */
1788         if (s->avctx->rc_max_rate                          &&
1789             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1790             s->out_format == FMT_MPEG1                     &&
1791             90000LL * (avctx->rc_buffer_size - 1) <=
1792                 s->avctx->rc_max_rate * 0xFFFFLL) {
1793             int vbv_delay, min_delay;
1794             double inbits  = s->avctx->rc_max_rate *
1795                              av_q2d(s->avctx->time_base);
1796             int    minbits = s->frame_bits - 8 *
1797                              (s->vbv_delay_ptr - s->pb.buf - 1);
1798             double bits    = s->rc_context.buffer_index + minbits - inbits;
1799
1800             if (bits < 0)
1801                 av_log(s->avctx, AV_LOG_ERROR,
1802                        "Internal error, negative bits\n");
1803
1804             assert(s->repeat_first_field == 0);
1805
1806             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1807             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1808                         s->avctx->rc_max_rate;
1809
1810             vbv_delay = FFMAX(vbv_delay, min_delay);
1811
1812             av_assert0(vbv_delay < 0xFFFF);
1813
1814             s->vbv_delay_ptr[0] &= 0xF8;
1815             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1816             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1817             s->vbv_delay_ptr[2] &= 0x07;
1818             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1819             avctx->vbv_delay     = vbv_delay * 300;
1820         }
1821         s->total_bits     += s->frame_bits;
1822         avctx->frame_bits  = s->frame_bits;
1823
1824         pkt->pts = s->current_picture.f->pts;
1825         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1826             if (!s->current_picture.f->coded_picture_number)
1827                 pkt->dts = pkt->pts - s->dts_delta;
1828             else
1829                 pkt->dts = s->reordered_pts;
1830             s->reordered_pts = pkt->pts;
1831         } else
1832             pkt->dts = pkt->pts;
1833         if (s->current_picture.f->key_frame)
1834             pkt->flags |= AV_PKT_FLAG_KEY;
1835         if (s->mb_info)
1836             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1837     } else {
1838         s->frame_bits = 0;
1839     }
1840
1841     /* release non-reference frames */
1842     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1843         if (!s->picture[i].reference)
1844             ff_mpeg_unref_picture(s, &s->picture[i]);
1845     }
1846
1847     av_assert1((s->frame_bits & 7) == 0);
1848
1849     pkt->size = s->frame_bits / 8;
1850     *got_packet = !!pkt->size;
1851     return 0;
1852 }
1853
1854 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1855                                                 int n, int threshold)
1856 {
1857     static const char tab[64] = {
1858         3, 2, 2, 1, 1, 1, 1, 1,
1859         1, 1, 1, 1, 1, 1, 1, 1,
1860         1, 1, 1, 1, 1, 1, 1, 1,
1861         0, 0, 0, 0, 0, 0, 0, 0,
1862         0, 0, 0, 0, 0, 0, 0, 0,
1863         0, 0, 0, 0, 0, 0, 0, 0,
1864         0, 0, 0, 0, 0, 0, 0, 0,
1865         0, 0, 0, 0, 0, 0, 0, 0
1866     };
1867     int score = 0;
1868     int run = 0;
1869     int i;
1870     int16_t *block = s->block[n];
1871     const int last_index = s->block_last_index[n];
1872     int skip_dc;
1873
1874     if (threshold < 0) {
1875         skip_dc = 0;
1876         threshold = -threshold;
1877     } else
1878         skip_dc = 1;
1879
1880     /* Are all we could set to zero already zero? */
1881     if (last_index <= skip_dc - 1)
1882         return;
1883
1884     for (i = 0; i <= last_index; i++) {
1885         const int j = s->intra_scantable.permutated[i];
1886         const int level = FFABS(block[j]);
1887         if (level == 1) {
1888             if (skip_dc && i == 0)
1889                 continue;
1890             score += tab[run];
1891             run = 0;
1892         } else if (level > 1) {
1893             return;
1894         } else {
1895             run++;
1896         }
1897     }
1898     if (score >= threshold)
1899         return;
1900     for (i = skip_dc; i <= last_index; i++) {
1901         const int j = s->intra_scantable.permutated[i];
1902         block[j] = 0;
1903     }
1904     if (block[0])
1905         s->block_last_index[n] = 0;
1906     else
1907         s->block_last_index[n] = -1;
1908 }
1909
1910 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1911                                int last_index)
1912 {
1913     int i;
1914     const int maxlevel = s->max_qcoeff;
1915     const int minlevel = s->min_qcoeff;
1916     int overflow = 0;
1917
1918     if (s->mb_intra) {
1919         i = 1; // skip clipping of intra dc
1920     } else
1921         i = 0;
1922
1923     for (; i <= last_index; i++) {
1924         const int j = s->intra_scantable.permutated[i];
1925         int level = block[j];
1926
1927         if (level > maxlevel) {
1928             level = maxlevel;
1929             overflow++;
1930         } else if (level < minlevel) {
1931             level = minlevel;
1932             overflow++;
1933         }
1934
1935         block[j] = level;
1936     }
1937
1938     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1939         av_log(s->avctx, AV_LOG_INFO,
1940                "warning, clipping %d dct coefficients to %d..%d\n",
1941                overflow, minlevel, maxlevel);
1942 }
1943
1944 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1945 {
1946     int x, y;
1947     // FIXME optimize
1948     for (y = 0; y < 8; y++) {
1949         for (x = 0; x < 8; x++) {
1950             int x2, y2;
1951             int sum = 0;
1952             int sqr = 0;
1953             int count = 0;
1954
1955             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1956                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1957                     int v = ptr[x2 + y2 * stride];
1958                     sum += v;
1959                     sqr += v * v;
1960                     count++;
1961                 }
1962             }
1963             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1964         }
1965     }
1966 }
1967
1968 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1969                                                 int motion_x, int motion_y,
1970                                                 int mb_block_height,
1971                                                 int mb_block_width,
1972                                                 int mb_block_count)
1973 {
1974     int16_t weight[12][64];
1975     int16_t orig[12][64];
1976     const int mb_x = s->mb_x;
1977     const int mb_y = s->mb_y;
1978     int i;
1979     int skip_dct[12];
1980     int dct_offset = s->linesize * 8; // default for progressive frames
1981     int uv_dct_offset = s->uvlinesize * 8;
1982     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1983     ptrdiff_t wrap_y, wrap_c;
1984
1985     for (i = 0; i < mb_block_count; i++)
1986         skip_dct[i] = s->skipdct;
1987
1988     if (s->adaptive_quant) {
1989         const int last_qp = s->qscale;
1990         const int mb_xy = mb_x + mb_y * s->mb_stride;
1991
1992         s->lambda = s->lambda_table[mb_xy];
1993         update_qscale(s);
1994
1995         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1996             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1997             s->dquant = s->qscale - last_qp;
1998
1999             if (s->out_format == FMT_H263) {
2000                 s->dquant = av_clip(s->dquant, -2, 2);
2001
2002                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2003                     if (!s->mb_intra) {
2004                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2005                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2006                                 s->dquant = 0;
2007                         }
2008                         if (s->mv_type == MV_TYPE_8X8)
2009                             s->dquant = 0;
2010                     }
2011                 }
2012             }
2013         }
2014         ff_set_qscale(s, last_qp + s->dquant);
2015     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2016         ff_set_qscale(s, s->qscale + s->dquant);
2017
2018     wrap_y = s->linesize;
2019     wrap_c = s->uvlinesize;
2020     ptr_y  = s->new_picture.f->data[0] +
2021              (mb_y * 16 * wrap_y)              + mb_x * 16;
2022     ptr_cb = s->new_picture.f->data[1] +
2023              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2024     ptr_cr = s->new_picture.f->data[2] +
2025              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2026
2027     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2028         uint8_t *ebuf = s->edge_emu_buffer + 36 * wrap_y;
2029         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2030         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2031         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2032                                  wrap_y, wrap_y,
2033                                  16, 16, mb_x * 16, mb_y * 16,
2034                                  s->width, s->height);
2035         ptr_y = ebuf;
2036         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2037                                  wrap_c, wrap_c,
2038                                  mb_block_width, mb_block_height,
2039                                  mb_x * mb_block_width, mb_y * mb_block_height,
2040                                  cw, ch);
2041         ptr_cb = ebuf + 16 * wrap_y;
2042         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2043                                  wrap_c, wrap_c,
2044                                  mb_block_width, mb_block_height,
2045                                  mb_x * mb_block_width, mb_y * mb_block_height,
2046                                  cw, ch);
2047         ptr_cr = ebuf + 16 * wrap_y + 16;
2048     }
2049
2050     if (s->mb_intra) {
2051         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2052             int progressive_score, interlaced_score;
2053
2054             s->interlaced_dct = 0;
2055             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
2056                                                     NULL, wrap_y, 8) +
2057                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2058                                                     NULL, wrap_y, 8) - 400;
2059
2060             if (progressive_score > 0) {
2061                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
2062                                                        NULL, wrap_y * 2, 8) +
2063                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
2064                                                        NULL, wrap_y * 2, 8);
2065                 if (progressive_score > interlaced_score) {
2066                     s->interlaced_dct = 1;
2067
2068                     dct_offset = wrap_y;
2069                     uv_dct_offset = wrap_c;
2070                     wrap_y <<= 1;
2071                     if (s->chroma_format == CHROMA_422 ||
2072                         s->chroma_format == CHROMA_444)
2073                         wrap_c <<= 1;
2074                 }
2075             }
2076         }
2077
2078         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
2079         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
2080         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
2081         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
2082
2083         if (s->flags & CODEC_FLAG_GRAY) {
2084             skip_dct[4] = 1;
2085             skip_dct[5] = 1;
2086         } else {
2087             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2088             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2089             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2090                 s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2091                 s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2092             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2093                 s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c);
2094                 s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c);
2095                 s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c);
2096                 s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c);
2097                 s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2098                 s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2099             }
2100         }
2101     } else {
2102         op_pixels_func (*op_pix)[4];
2103         qpel_mc_func (*op_qpix)[16];
2104         uint8_t *dest_y, *dest_cb, *dest_cr;
2105
2106         dest_y  = s->dest[0];
2107         dest_cb = s->dest[1];
2108         dest_cr = s->dest[2];
2109
2110         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2111             op_pix  = s->hdsp.put_pixels_tab;
2112             op_qpix = s->qdsp.put_qpel_pixels_tab;
2113         } else {
2114             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2115             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2116         }
2117
2118         if (s->mv_dir & MV_DIR_FORWARD) {
2119             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
2120                           s->last_picture.f->data,
2121                           op_pix, op_qpix);
2122             op_pix  = s->hdsp.avg_pixels_tab;
2123             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2124         }
2125         if (s->mv_dir & MV_DIR_BACKWARD) {
2126             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
2127                           s->next_picture.f->data,
2128                           op_pix, op_qpix);
2129         }
2130
2131         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2132             int progressive_score, interlaced_score;
2133
2134             s->interlaced_dct = 0;
2135             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
2136                                                     ptr_y,              wrap_y,
2137                                                     8) +
2138                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
2139                                                     ptr_y + wrap_y * 8, wrap_y,
2140                                                     8) - 400;
2141
2142             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2143                 progressive_score -= 400;
2144
2145             if (progressive_score > 0) {
2146                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
2147                                                        ptr_y,
2148                                                        wrap_y * 2, 8) +
2149                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
2150                                                        ptr_y + wrap_y,
2151                                                        wrap_y * 2, 8);
2152
2153                 if (progressive_score > interlaced_score) {
2154                     s->interlaced_dct = 1;
2155
2156                     dct_offset = wrap_y;
2157                     uv_dct_offset = wrap_c;
2158                     wrap_y <<= 1;
2159                     if (s->chroma_format == CHROMA_422)
2160                         wrap_c <<= 1;
2161                 }
2162             }
2163         }
2164
2165         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2166         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2167         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2168                            dest_y + dct_offset, wrap_y);
2169         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2170                            dest_y + dct_offset + 8, wrap_y);
2171
2172         if (s->flags & CODEC_FLAG_GRAY) {
2173             skip_dct[4] = 1;
2174             skip_dct[5] = 1;
2175         } else {
2176             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2177             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2178             if (!s->chroma_y_shift) { /* 422 */
2179                 s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2180                                    dest_cb + uv_dct_offset, wrap_c);
2181                 s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2182                                    dest_cr + uv_dct_offset, wrap_c);
2183             }
2184         }
2185         /* pre quantization */
2186         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2187                 2 * s->qscale * s->qscale) {
2188             // FIXME optimize
2189             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2190                               wrap_y, 8) < 20 * s->qscale)
2191                 skip_dct[0] = 1;
2192             if (s->dsp.sad[1](NULL, ptr_y + 8,
2193                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2194                 skip_dct[1] = 1;
2195             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2196                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2197                 skip_dct[2] = 1;
2198             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2199                               dest_y + dct_offset + 8,
2200                               wrap_y, 8) < 20 * s->qscale)
2201                 skip_dct[3] = 1;
2202             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2203                               wrap_c, 8) < 20 * s->qscale)
2204                 skip_dct[4] = 1;
2205             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2206                               wrap_c, 8) < 20 * s->qscale)
2207                 skip_dct[5] = 1;
2208             if (!s->chroma_y_shift) { /* 422 */
2209                 if (s->dsp.sad[1](NULL, ptr_cb + uv_dct_offset,
2210                                   dest_cb + uv_dct_offset,
2211                                   wrap_c, 8) < 20 * s->qscale)
2212                     skip_dct[6] = 1;
2213                 if (s->dsp.sad[1](NULL, ptr_cr + uv_dct_offset,
2214                                   dest_cr + uv_dct_offset,
2215                                   wrap_c, 8) < 20 * s->qscale)
2216                     skip_dct[7] = 1;
2217             }
2218         }
2219     }
2220
2221     if (s->quantizer_noise_shaping) {
2222         if (!skip_dct[0])
2223             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2224         if (!skip_dct[1])
2225             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2226         if (!skip_dct[2])
2227             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2228         if (!skip_dct[3])
2229             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2230         if (!skip_dct[4])
2231             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2232         if (!skip_dct[5])
2233             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2234         if (!s->chroma_y_shift) { /* 422 */
2235             if (!skip_dct[6])
2236                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2237                                   wrap_c);
2238             if (!skip_dct[7])
2239                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2240                                   wrap_c);
2241         }
2242         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2243     }
2244
2245     /* DCT & quantize */
2246     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2247     {
2248         for (i = 0; i < mb_block_count; i++) {
2249             if (!skip_dct[i]) {
2250                 int overflow;
2251                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2252                 // FIXME we could decide to change to quantizer instead of
2253                 // clipping
2254                 // JS: I don't think that would be a good idea it could lower
2255                 //     quality instead of improve it. Just INTRADC clipping
2256                 //     deserves changes in quantizer
2257                 if (overflow)
2258                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2259             } else
2260                 s->block_last_index[i] = -1;
2261         }
2262         if (s->quantizer_noise_shaping) {
2263             for (i = 0; i < mb_block_count; i++) {
2264                 if (!skip_dct[i]) {
2265                     s->block_last_index[i] =
2266                         dct_quantize_refine(s, s->block[i], weight[i],
2267                                             orig[i], i, s->qscale);
2268                 }
2269             }
2270         }
2271
2272         if (s->luma_elim_threshold && !s->mb_intra)
2273             for (i = 0; i < 4; i++)
2274                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2275         if (s->chroma_elim_threshold && !s->mb_intra)
2276             for (i = 4; i < mb_block_count; i++)
2277                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2278
2279         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2280             for (i = 0; i < mb_block_count; i++) {
2281                 if (s->block_last_index[i] == -1)
2282                     s->coded_score[i] = INT_MAX / 256;
2283             }
2284         }
2285     }
2286
2287     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2288         s->block_last_index[4] =
2289         s->block_last_index[5] = 0;
2290         s->block[4][0] =
2291         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2292         if (!s->chroma_y_shift) { /* 422 / 444 */
2293             for (i=6; i<12; i++) {
2294                 s->block_last_index[i] = 0;
2295                 s->block[i][0] = s->block[4][0];
2296             }
2297         }
2298     }
2299
2300     // non c quantize code returns incorrect block_last_index FIXME
2301     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2302         for (i = 0; i < mb_block_count; i++) {
2303             int j;
2304             if (s->block_last_index[i] > 0) {
2305                 for (j = 63; j > 0; j--) {
2306                     if (s->block[i][s->intra_scantable.permutated[j]])
2307                         break;
2308                 }
2309                 s->block_last_index[i] = j;
2310             }
2311         }
2312     }
2313
2314     /* huffman encode */
2315     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2316     case AV_CODEC_ID_MPEG1VIDEO:
2317     case AV_CODEC_ID_MPEG2VIDEO:
2318         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2319             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2320         break;
2321     case AV_CODEC_ID_MPEG4:
2322         if (CONFIG_MPEG4_ENCODER)
2323             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2324         break;
2325     case AV_CODEC_ID_MSMPEG4V2:
2326     case AV_CODEC_ID_MSMPEG4V3:
2327     case AV_CODEC_ID_WMV1:
2328         if (CONFIG_MSMPEG4_ENCODER)
2329             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2330         break;
2331     case AV_CODEC_ID_WMV2:
2332         if (CONFIG_WMV2_ENCODER)
2333             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2334         break;
2335     case AV_CODEC_ID_H261:
2336         if (CONFIG_H261_ENCODER)
2337             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2338         break;
2339     case AV_CODEC_ID_H263:
2340     case AV_CODEC_ID_H263P:
2341     case AV_CODEC_ID_FLV1:
2342     case AV_CODEC_ID_RV10:
2343     case AV_CODEC_ID_RV20:
2344         if (CONFIG_H263_ENCODER)
2345             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2346         break;
2347     case AV_CODEC_ID_MJPEG:
2348     case AV_CODEC_ID_AMV:
2349         if (CONFIG_MJPEG_ENCODER)
2350             ff_mjpeg_encode_mb(s, s->block);
2351         break;
2352     default:
2353         av_assert1(0);
2354     }
2355 }
2356
2357 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2358 {
2359     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2360     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2361     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2362 }
2363
2364 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2365     int i;
2366
2367     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2368
2369     /* mpeg1 */
2370     d->mb_skip_run= s->mb_skip_run;
2371     for(i=0; i<3; i++)
2372         d->last_dc[i] = s->last_dc[i];
2373
2374     /* statistics */
2375     d->mv_bits= s->mv_bits;
2376     d->i_tex_bits= s->i_tex_bits;
2377     d->p_tex_bits= s->p_tex_bits;
2378     d->i_count= s->i_count;
2379     d->f_count= s->f_count;
2380     d->b_count= s->b_count;
2381     d->skip_count= s->skip_count;
2382     d->misc_bits= s->misc_bits;
2383     d->last_bits= 0;
2384
2385     d->mb_skipped= 0;
2386     d->qscale= s->qscale;
2387     d->dquant= s->dquant;
2388
2389     d->esc3_level_length= s->esc3_level_length;
2390 }
2391
2392 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2393     int i;
2394
2395     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2396     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2397
2398     /* mpeg1 */
2399     d->mb_skip_run= s->mb_skip_run;
2400     for(i=0; i<3; i++)
2401         d->last_dc[i] = s->last_dc[i];
2402
2403     /* statistics */
2404     d->mv_bits= s->mv_bits;
2405     d->i_tex_bits= s->i_tex_bits;
2406     d->p_tex_bits= s->p_tex_bits;
2407     d->i_count= s->i_count;
2408     d->f_count= s->f_count;
2409     d->b_count= s->b_count;
2410     d->skip_count= s->skip_count;
2411     d->misc_bits= s->misc_bits;
2412
2413     d->mb_intra= s->mb_intra;
2414     d->mb_skipped= s->mb_skipped;
2415     d->mv_type= s->mv_type;
2416     d->mv_dir= s->mv_dir;
2417     d->pb= s->pb;
2418     if(s->data_partitioning){
2419         d->pb2= s->pb2;
2420         d->tex_pb= s->tex_pb;
2421     }
2422     d->block= s->block;
2423     for(i=0; i<8; i++)
2424         d->block_last_index[i]= s->block_last_index[i];
2425     d->interlaced_dct= s->interlaced_dct;
2426     d->qscale= s->qscale;
2427
2428     d->esc3_level_length= s->esc3_level_length;
2429 }
2430
2431 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2432                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2433                            int *dmin, int *next_block, int motion_x, int motion_y)
2434 {
2435     int score;
2436     uint8_t *dest_backup[3];
2437
2438     copy_context_before_encode(s, backup, type);
2439
2440     s->block= s->blocks[*next_block];
2441     s->pb= pb[*next_block];
2442     if(s->data_partitioning){
2443         s->pb2   = pb2   [*next_block];
2444         s->tex_pb= tex_pb[*next_block];
2445     }
2446
2447     if(*next_block){
2448         memcpy(dest_backup, s->dest, sizeof(s->dest));
2449         s->dest[0] = s->rd_scratchpad;
2450         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2451         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2452         av_assert0(s->linesize >= 32); //FIXME
2453     }
2454
2455     encode_mb(s, motion_x, motion_y);
2456
2457     score= put_bits_count(&s->pb);
2458     if(s->data_partitioning){
2459         score+= put_bits_count(&s->pb2);
2460         score+= put_bits_count(&s->tex_pb);
2461     }
2462
2463     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2464         ff_MPV_decode_mb(s, s->block);
2465
2466         score *= s->lambda2;
2467         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2468     }
2469
2470     if(*next_block){
2471         memcpy(s->dest, dest_backup, sizeof(s->dest));
2472     }
2473
2474     if(score<*dmin){
2475         *dmin= score;
2476         *next_block^=1;
2477
2478         copy_context_after_encode(best, s, type);
2479     }
2480 }
2481
2482 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2483     uint32_t *sq = ff_square_tab + 256;
2484     int acc=0;
2485     int x,y;
2486
2487     if(w==16 && h==16)
2488         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2489     else if(w==8 && h==8)
2490         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2491
2492     for(y=0; y<h; y++){
2493         for(x=0; x<w; x++){
2494             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2495         }
2496     }
2497
2498     av_assert2(acc>=0);
2499
2500     return acc;
2501 }
2502
2503 static int sse_mb(MpegEncContext *s){
2504     int w= 16;
2505     int h= 16;
2506
2507     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2508     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2509
2510     if(w==16 && h==16)
2511       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2512         return  s->dsp.nsse[0](s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2513                +s->dsp.nsse[1](s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2514                +s->dsp.nsse[1](s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2515       }else{
2516         return  s->dsp.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2517                +s->dsp.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2518                +s->dsp.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2519       }
2520     else
2521         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2522                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2523                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2524 }
2525
2526 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2527     MpegEncContext *s= *(void**)arg;
2528
2529
2530     s->me.pre_pass=1;
2531     s->me.dia_size= s->avctx->pre_dia_size;
2532     s->first_slice_line=1;
2533     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2534         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2535             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2536         }
2537         s->first_slice_line=0;
2538     }
2539
2540     s->me.pre_pass=0;
2541
2542     return 0;
2543 }
2544
2545 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2546     MpegEncContext *s= *(void**)arg;
2547
2548     ff_check_alignment();
2549
2550     s->me.dia_size= s->avctx->dia_size;
2551     s->first_slice_line=1;
2552     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2553         s->mb_x=0; //for block init below
2554         ff_init_block_index(s);
2555         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2556             s->block_index[0]+=2;
2557             s->block_index[1]+=2;
2558             s->block_index[2]+=2;
2559             s->block_index[3]+=2;
2560
2561             /* compute motion vector & mb_type and store in context */
2562             if(s->pict_type==AV_PICTURE_TYPE_B)
2563                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2564             else
2565                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2566         }
2567         s->first_slice_line=0;
2568     }
2569     return 0;
2570 }
2571
2572 static int mb_var_thread(AVCodecContext *c, void *arg){
2573     MpegEncContext *s= *(void**)arg;
2574     int mb_x, mb_y;
2575
2576     ff_check_alignment();
2577
2578     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2579         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2580             int xx = mb_x * 16;
2581             int yy = mb_y * 16;
2582             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2583             int varc;
2584             int sum = s->dsp.pix_sum(pix, s->linesize);
2585
2586             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2587
2588             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2589             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2590             s->me.mb_var_sum_temp    += varc;
2591         }
2592     }
2593     return 0;
2594 }
2595
2596 static void write_slice_end(MpegEncContext *s){
2597     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2598         if(s->partitioned_frame){
2599             ff_mpeg4_merge_partitions(s);
2600         }
2601
2602         ff_mpeg4_stuffing(&s->pb);
2603     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2604         ff_mjpeg_encode_stuffing(s);
2605     }
2606
2607     avpriv_align_put_bits(&s->pb);
2608     flush_put_bits(&s->pb);
2609
2610     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2611         s->misc_bits+= get_bits_diff(s);
2612 }
2613
2614 static void write_mb_info(MpegEncContext *s)
2615 {
2616     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2617     int offset = put_bits_count(&s->pb);
2618     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2619     int gobn = s->mb_y / s->gob_index;
2620     int pred_x, pred_y;
2621     if (CONFIG_H263_ENCODER)
2622         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2623     bytestream_put_le32(&ptr, offset);
2624     bytestream_put_byte(&ptr, s->qscale);
2625     bytestream_put_byte(&ptr, gobn);
2626     bytestream_put_le16(&ptr, mba);
2627     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2628     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2629     /* 4MV not implemented */
2630     bytestream_put_byte(&ptr, 0); /* hmv2 */
2631     bytestream_put_byte(&ptr, 0); /* vmv2 */
2632 }
2633
2634 static void update_mb_info(MpegEncContext *s, int startcode)
2635 {
2636     if (!s->mb_info)
2637         return;
2638     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2639         s->mb_info_size += 12;
2640         s->prev_mb_info = s->last_mb_info;
2641     }
2642     if (startcode) {
2643         s->prev_mb_info = put_bits_count(&s->pb)/8;
2644         /* This might have incremented mb_info_size above, and we return without
2645          * actually writing any info into that slot yet. But in that case,
2646          * this will be called again at the start of the after writing the
2647          * start code, actually writing the mb info. */
2648         return;
2649     }
2650
2651     s->last_mb_info = put_bits_count(&s->pb)/8;
2652     if (!s->mb_info_size)
2653         s->mb_info_size += 12;
2654     write_mb_info(s);
2655 }
2656
2657 static int encode_thread(AVCodecContext *c, void *arg){
2658     MpegEncContext *s= *(void**)arg;
2659     int mb_x, mb_y, pdif = 0;
2660     int chr_h= 16>>s->chroma_y_shift;
2661     int i, j;
2662     MpegEncContext best_s, backup_s;
2663     uint8_t bit_buf[2][MAX_MB_BYTES];
2664     uint8_t bit_buf2[2][MAX_MB_BYTES];
2665     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2666     PutBitContext pb[2], pb2[2], tex_pb[2];
2667
2668     ff_check_alignment();
2669
2670     for(i=0; i<2; i++){
2671         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2672         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2673         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2674     }
2675
2676     s->last_bits= put_bits_count(&s->pb);
2677     s->mv_bits=0;
2678     s->misc_bits=0;
2679     s->i_tex_bits=0;
2680     s->p_tex_bits=0;
2681     s->i_count=0;
2682     s->f_count=0;
2683     s->b_count=0;
2684     s->skip_count=0;
2685
2686     for(i=0; i<3; i++){
2687         /* init last dc values */
2688         /* note: quant matrix value (8) is implied here */
2689         s->last_dc[i] = 128 << s->intra_dc_precision;
2690
2691         s->current_picture.error[i] = 0;
2692     }
2693     if(s->codec_id==AV_CODEC_ID_AMV){
2694         s->last_dc[0] = 128*8/13;
2695         s->last_dc[1] = 128*8/14;
2696         s->last_dc[2] = 128*8/14;
2697     }
2698     s->mb_skip_run = 0;
2699     memset(s->last_mv, 0, sizeof(s->last_mv));
2700
2701     s->last_mv_dir = 0;
2702
2703     switch(s->codec_id){
2704     case AV_CODEC_ID_H263:
2705     case AV_CODEC_ID_H263P:
2706     case AV_CODEC_ID_FLV1:
2707         if (CONFIG_H263_ENCODER)
2708             s->gob_index = ff_h263_get_gob_height(s);
2709         break;
2710     case AV_CODEC_ID_MPEG4:
2711         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2712             ff_mpeg4_init_partitions(s);
2713         break;
2714     }
2715
2716     s->resync_mb_x=0;
2717     s->resync_mb_y=0;
2718     s->first_slice_line = 1;
2719     s->ptr_lastgob = s->pb.buf;
2720     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2721         s->mb_x=0;
2722         s->mb_y= mb_y;
2723
2724         ff_set_qscale(s, s->qscale);
2725         ff_init_block_index(s);
2726
2727         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2728             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2729             int mb_type= s->mb_type[xy];
2730 //            int d;
2731             int dmin= INT_MAX;
2732             int dir;
2733
2734             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2735                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2736                 return -1;
2737             }
2738             if(s->data_partitioning){
2739                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2740                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2741                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2742                     return -1;
2743                 }
2744             }
2745
2746             s->mb_x = mb_x;
2747             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2748             ff_update_block_index(s);
2749
2750             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2751                 ff_h261_reorder_mb_index(s);
2752                 xy= s->mb_y*s->mb_stride + s->mb_x;
2753                 mb_type= s->mb_type[xy];
2754             }
2755
2756             /* write gob / video packet header  */
2757             if(s->rtp_mode){
2758                 int current_packet_size, is_gob_start;
2759
2760                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2761
2762                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2763
2764                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2765
2766                 switch(s->codec_id){
2767                 case AV_CODEC_ID_H263:
2768                 case AV_CODEC_ID_H263P:
2769                     if(!s->h263_slice_structured)
2770                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2771                     break;
2772                 case AV_CODEC_ID_MPEG2VIDEO:
2773                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2774                 case AV_CODEC_ID_MPEG1VIDEO:
2775                     if(s->mb_skip_run) is_gob_start=0;
2776                     break;
2777                 case AV_CODEC_ID_MJPEG:
2778                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2779                     break;
2780                 }
2781
2782                 if(is_gob_start){
2783                     if(s->start_mb_y != mb_y || mb_x!=0){
2784                         write_slice_end(s);
2785
2786                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2787                             ff_mpeg4_init_partitions(s);
2788                         }
2789                     }
2790
2791                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2792                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2793
2794                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2795                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2796                         int d = 100 / s->error_rate;
2797                         if(r % d == 0){
2798                             current_packet_size=0;
2799                             s->pb.buf_ptr= s->ptr_lastgob;
2800                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2801                         }
2802                     }
2803
2804                     if (s->avctx->rtp_callback){
2805                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2806                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2807                     }
2808                     update_mb_info(s, 1);
2809
2810                     switch(s->codec_id){
2811                     case AV_CODEC_ID_MPEG4:
2812                         if (CONFIG_MPEG4_ENCODER) {
2813                             ff_mpeg4_encode_video_packet_header(s);
2814                             ff_mpeg4_clean_buffers(s);
2815                         }
2816                     break;
2817                     case AV_CODEC_ID_MPEG1VIDEO:
2818                     case AV_CODEC_ID_MPEG2VIDEO:
2819                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2820                             ff_mpeg1_encode_slice_header(s);
2821                             ff_mpeg1_clean_buffers(s);
2822                         }
2823                     break;
2824                     case AV_CODEC_ID_H263:
2825                     case AV_CODEC_ID_H263P:
2826                         if (CONFIG_H263_ENCODER)
2827                             ff_h263_encode_gob_header(s, mb_y);
2828                     break;
2829                     }
2830
2831                     if(s->flags&CODEC_FLAG_PASS1){
2832                         int bits= put_bits_count(&s->pb);
2833                         s->misc_bits+= bits - s->last_bits;
2834                         s->last_bits= bits;
2835                     }
2836
2837                     s->ptr_lastgob += current_packet_size;
2838                     s->first_slice_line=1;
2839                     s->resync_mb_x=mb_x;
2840                     s->resync_mb_y=mb_y;
2841                 }
2842             }
2843
2844             if(  (s->resync_mb_x   == s->mb_x)
2845                && s->resync_mb_y+1 == s->mb_y){
2846                 s->first_slice_line=0;
2847             }
2848
2849             s->mb_skipped=0;
2850             s->dquant=0; //only for QP_RD
2851
2852             update_mb_info(s, 0);
2853
2854             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2855                 int next_block=0;
2856                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2857
2858                 copy_context_before_encode(&backup_s, s, -1);
2859                 backup_s.pb= s->pb;
2860                 best_s.data_partitioning= s->data_partitioning;
2861                 best_s.partitioned_frame= s->partitioned_frame;
2862                 if(s->data_partitioning){
2863                     backup_s.pb2= s->pb2;
2864                     backup_s.tex_pb= s->tex_pb;
2865                 }
2866
2867                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2868                     s->mv_dir = MV_DIR_FORWARD;
2869                     s->mv_type = MV_TYPE_16X16;
2870                     s->mb_intra= 0;
2871                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2872                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2873                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2874                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2875                 }
2876                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2877                     s->mv_dir = MV_DIR_FORWARD;
2878                     s->mv_type = MV_TYPE_FIELD;
2879                     s->mb_intra= 0;
2880                     for(i=0; i<2; i++){
2881                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2882                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2883                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2884                     }
2885                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2886                                  &dmin, &next_block, 0, 0);
2887                 }
2888                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2889                     s->mv_dir = MV_DIR_FORWARD;
2890                     s->mv_type = MV_TYPE_16X16;
2891                     s->mb_intra= 0;
2892                     s->mv[0][0][0] = 0;
2893                     s->mv[0][0][1] = 0;
2894                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2895                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2896                 }
2897                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2898                     s->mv_dir = MV_DIR_FORWARD;
2899                     s->mv_type = MV_TYPE_8X8;
2900                     s->mb_intra= 0;
2901                     for(i=0; i<4; i++){
2902                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2903                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2904                     }
2905                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2906                                  &dmin, &next_block, 0, 0);
2907                 }
2908                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2909                     s->mv_dir = MV_DIR_FORWARD;
2910                     s->mv_type = MV_TYPE_16X16;
2911                     s->mb_intra= 0;
2912                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2913                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2914                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2915                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2916                 }
2917                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2918                     s->mv_dir = MV_DIR_BACKWARD;
2919                     s->mv_type = MV_TYPE_16X16;
2920                     s->mb_intra= 0;
2921                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2922                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2923                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2924                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2925                 }
2926                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2927                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2928                     s->mv_type = MV_TYPE_16X16;
2929                     s->mb_intra= 0;
2930                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2931                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2932                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2933                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2934                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2935                                  &dmin, &next_block, 0, 0);
2936                 }
2937                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2938                     s->mv_dir = MV_DIR_FORWARD;
2939                     s->mv_type = MV_TYPE_FIELD;
2940                     s->mb_intra= 0;
2941                     for(i=0; i<2; i++){
2942                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2943                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2944                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2945                     }
2946                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2947                                  &dmin, &next_block, 0, 0);
2948                 }
2949                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2950                     s->mv_dir = MV_DIR_BACKWARD;
2951                     s->mv_type = MV_TYPE_FIELD;
2952                     s->mb_intra= 0;
2953                     for(i=0; i<2; i++){
2954                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2955                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2956                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2957                     }
2958                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2959                                  &dmin, &next_block, 0, 0);
2960                 }
2961                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2962                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2963                     s->mv_type = MV_TYPE_FIELD;
2964                     s->mb_intra= 0;
2965                     for(dir=0; dir<2; dir++){
2966                         for(i=0; i<2; i++){
2967                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2968                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2969                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2970                         }
2971                     }
2972                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2973                                  &dmin, &next_block, 0, 0);
2974                 }
2975                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2976                     s->mv_dir = 0;
2977                     s->mv_type = MV_TYPE_16X16;
2978                     s->mb_intra= 1;
2979                     s->mv[0][0][0] = 0;
2980                     s->mv[0][0][1] = 0;
2981                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2982                                  &dmin, &next_block, 0, 0);
2983                     if(s->h263_pred || s->h263_aic){
2984                         if(best_s.mb_intra)
2985                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2986                         else
2987                             ff_clean_intra_table_entries(s); //old mode?
2988                     }
2989                 }
2990
2991                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2992                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2993                         const int last_qp= backup_s.qscale;
2994                         int qpi, qp, dc[6];
2995                         int16_t ac[6][16];
2996                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2997                         static const int dquant_tab[4]={-1,1,-2,2};
2998                         int storecoefs = s->mb_intra && s->dc_val[0];
2999
3000                         av_assert2(backup_s.dquant == 0);
3001
3002                         //FIXME intra
3003                         s->mv_dir= best_s.mv_dir;
3004                         s->mv_type = MV_TYPE_16X16;
3005                         s->mb_intra= best_s.mb_intra;
3006                         s->mv[0][0][0] = best_s.mv[0][0][0];
3007                         s->mv[0][0][1] = best_s.mv[0][0][1];
3008                         s->mv[1][0][0] = best_s.mv[1][0][0];
3009                         s->mv[1][0][1] = best_s.mv[1][0][1];
3010
3011                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3012                         for(; qpi<4; qpi++){
3013                             int dquant= dquant_tab[qpi];
3014                             qp= last_qp + dquant;
3015                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3016                                 continue;
3017                             backup_s.dquant= dquant;
3018                             if(storecoefs){
3019                                 for(i=0; i<6; i++){
3020                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3021                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3022                                 }
3023                             }
3024
3025                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3026                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3027                             if(best_s.qscale != qp){
3028                                 if(storecoefs){
3029                                     for(i=0; i<6; i++){
3030                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3031                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3032                                     }
3033                                 }
3034                             }
3035                         }
3036                     }
3037                 }
3038                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3039                     int mx= s->b_direct_mv_table[xy][0];
3040                     int my= s->b_direct_mv_table[xy][1];
3041
3042                     backup_s.dquant = 0;
3043                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3044                     s->mb_intra= 0;
3045                     ff_mpeg4_set_direct_mv(s, mx, my);
3046                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3047                                  &dmin, &next_block, mx, my);
3048                 }
3049                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3050                     backup_s.dquant = 0;
3051                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3052                     s->mb_intra= 0;
3053                     ff_mpeg4_set_direct_mv(s, 0, 0);
3054                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3055                                  &dmin, &next_block, 0, 0);
3056                 }
3057                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3058                     int coded=0;
3059                     for(i=0; i<6; i++)
3060                         coded |= s->block_last_index[i];
3061                     if(coded){
3062                         int mx,my;
3063                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3064                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3065                             mx=my=0; //FIXME find the one we actually used
3066                             ff_mpeg4_set_direct_mv(s, mx, my);
3067                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3068                             mx= s->mv[1][0][0];
3069                             my= s->mv[1][0][1];
3070                         }else{
3071                             mx= s->mv[0][0][0];
3072                             my= s->mv[0][0][1];
3073                         }
3074
3075                         s->mv_dir= best_s.mv_dir;
3076                         s->mv_type = best_s.mv_type;
3077                         s->mb_intra= 0;
3078 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3079                         s->mv[0][0][1] = best_s.mv[0][0][1];
3080                         s->mv[1][0][0] = best_s.mv[1][0][0];
3081                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3082                         backup_s.dquant= 0;
3083                         s->skipdct=1;
3084                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3085                                         &dmin, &next_block, mx, my);
3086                         s->skipdct=0;
3087                     }
3088                 }
3089
3090                 s->current_picture.qscale_table[xy] = best_s.qscale;
3091
3092                 copy_context_after_encode(s, &best_s, -1);
3093
3094                 pb_bits_count= put_bits_count(&s->pb);
3095                 flush_put_bits(&s->pb);
3096                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3097                 s->pb= backup_s.pb;
3098
3099                 if(s->data_partitioning){
3100                     pb2_bits_count= put_bits_count(&s->pb2);
3101                     flush_put_bits(&s->pb2);
3102                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3103                     s->pb2= backup_s.pb2;
3104
3105                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3106                     flush_put_bits(&s->tex_pb);
3107                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3108                     s->tex_pb= backup_s.tex_pb;
3109                 }
3110                 s->last_bits= put_bits_count(&s->pb);
3111
3112                 if (CONFIG_H263_ENCODER &&
3113                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3114                     ff_h263_update_motion_val(s);
3115
3116                 if(next_block==0){ //FIXME 16 vs linesize16
3117                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3118                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3119                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3120                 }
3121
3122                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3123                     ff_MPV_decode_mb(s, s->block);
3124             } else {
3125                 int motion_x = 0, motion_y = 0;
3126                 s->mv_type=MV_TYPE_16X16;
3127                 // only one MB-Type possible
3128
3129                 switch(mb_type){
3130                 case CANDIDATE_MB_TYPE_INTRA:
3131                     s->mv_dir = 0;
3132                     s->mb_intra= 1;
3133                     motion_x= s->mv[0][0][0] = 0;
3134                     motion_y= s->mv[0][0][1] = 0;
3135                     break;
3136                 case CANDIDATE_MB_TYPE_INTER:
3137                     s->mv_dir = MV_DIR_FORWARD;
3138                     s->mb_intra= 0;
3139                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3140                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3141                     break;
3142                 case CANDIDATE_MB_TYPE_INTER_I:
3143                     s->mv_dir = MV_DIR_FORWARD;
3144                     s->mv_type = MV_TYPE_FIELD;
3145                     s->mb_intra= 0;
3146                     for(i=0; i<2; i++){
3147                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3148                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3149                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3150                     }
3151                     break;
3152                 case CANDIDATE_MB_TYPE_INTER4V:
3153                     s->mv_dir = MV_DIR_FORWARD;
3154                     s->mv_type = MV_TYPE_8X8;
3155                     s->mb_intra= 0;
3156                     for(i=0; i<4; i++){
3157                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3158                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3159                     }
3160                     break;
3161                 case CANDIDATE_MB_TYPE_DIRECT:
3162                     if (CONFIG_MPEG4_ENCODER) {
3163                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3164                         s->mb_intra= 0;
3165                         motion_x=s->b_direct_mv_table[xy][0];
3166                         motion_y=s->b_direct_mv_table[xy][1];
3167                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3168                     }
3169                     break;
3170                 case CANDIDATE_MB_TYPE_DIRECT0:
3171                     if (CONFIG_MPEG4_ENCODER) {
3172                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3173                         s->mb_intra= 0;
3174                         ff_mpeg4_set_direct_mv(s, 0, 0);
3175                     }
3176                     break;
3177                 case CANDIDATE_MB_TYPE_BIDIR:
3178                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3179                     s->mb_intra= 0;
3180                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3181                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3182                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3183                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3184                     break;
3185                 case CANDIDATE_MB_TYPE_BACKWARD:
3186                     s->mv_dir = MV_DIR_BACKWARD;
3187                     s->mb_intra= 0;
3188                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3189                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3190                     break;
3191                 case CANDIDATE_MB_TYPE_FORWARD:
3192                     s->mv_dir = MV_DIR_FORWARD;
3193                     s->mb_intra= 0;
3194                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3195                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3196                     break;
3197                 case CANDIDATE_MB_TYPE_FORWARD_I:
3198                     s->mv_dir = MV_DIR_FORWARD;
3199                     s->mv_type = MV_TYPE_FIELD;
3200                     s->mb_intra= 0;
3201                     for(i=0; i<2; i++){
3202                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3203                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3204                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3205                     }
3206                     break;
3207                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3208                     s->mv_dir = MV_DIR_BACKWARD;
3209                     s->mv_type = MV_TYPE_FIELD;
3210                     s->mb_intra= 0;
3211                     for(i=0; i<2; i++){
3212                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3213                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3214                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3215                     }
3216                     break;
3217                 case CANDIDATE_MB_TYPE_BIDIR_I:
3218                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3219                     s->mv_type = MV_TYPE_FIELD;
3220                     s->mb_intra= 0;
3221                     for(dir=0; dir<2; dir++){
3222                         for(i=0; i<2; i++){
3223                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3224                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3225                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3226                         }
3227                     }
3228                     break;
3229                 default:
3230                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3231                 }
3232
3233                 encode_mb(s, motion_x, motion_y);
3234
3235                 // RAL: Update last macroblock type
3236                 s->last_mv_dir = s->mv_dir;
3237
3238                 if (CONFIG_H263_ENCODER &&
3239                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3240                     ff_h263_update_motion_val(s);
3241
3242                 ff_MPV_decode_mb(s, s->block);
3243             }
3244
3245             /* clean the MV table in IPS frames for direct mode in B frames */
3246             if(s->mb_intra /* && I,P,S_TYPE */){
3247                 s->p_mv_table[xy][0]=0;
3248                 s->p_mv_table[xy][1]=0;
3249             }
3250
3251             if(s->flags&CODEC_FLAG_PSNR){
3252                 int w= 16;
3253                 int h= 16;
3254
3255                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3256                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3257
3258                 s->current_picture.error[0] += sse(
3259                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3260                     s->dest[0], w, h, s->linesize);
3261                 s->current_picture.error[1] += sse(
3262                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3263                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3264                 s->current_picture.error[2] += sse(
3265                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3266                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3267             }
3268             if(s->loop_filter){
3269                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3270                     ff_h263_loop_filter(s);
3271             }
3272             av_dlog(s->avctx, "MB %d %d bits\n",
3273                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3274         }
3275     }
3276
3277     //not beautiful here but we must write it before flushing so it has to be here
3278     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3279         ff_msmpeg4_encode_ext_header(s);
3280
3281     write_slice_end(s);
3282
3283     /* Send the last GOB if RTP */
3284     if (s->avctx->rtp_callback) {
3285         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3286         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3287         /* Call the RTP callback to send the last GOB */
3288         emms_c();
3289         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3290     }
3291
3292     return 0;
3293 }
3294
3295 #define MERGE(field) dst->field += src->field; src->field=0
3296 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3297     MERGE(me.scene_change_score);
3298     MERGE(me.mc_mb_var_sum_temp);
3299     MERGE(me.mb_var_sum_temp);
3300 }
3301
3302 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3303     int i;
3304
3305     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3306     MERGE(dct_count[1]);
3307     MERGE(mv_bits);
3308     MERGE(i_tex_bits);
3309     MERGE(p_tex_bits);
3310     MERGE(i_count);
3311     MERGE(f_count);
3312     MERGE(b_count);
3313     MERGE(skip_count);
3314     MERGE(misc_bits);
3315     MERGE(er.error_count);
3316     MERGE(padding_bug_score);
3317     MERGE(current_picture.error[0]);
3318     MERGE(current_picture.error[1]);
3319     MERGE(current_picture.error[2]);
3320
3321     if(dst->avctx->noise_reduction){
3322         for(i=0; i<64; i++){
3323             MERGE(dct_error_sum[0][i]);
3324             MERGE(dct_error_sum[1][i]);
3325         }
3326     }
3327
3328     assert(put_bits_count(&src->pb) % 8 ==0);
3329     assert(put_bits_count(&dst->pb) % 8 ==0);
3330     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3331     flush_put_bits(&dst->pb);
3332 }
3333
3334 static int estimate_qp(MpegEncContext *s, int dry_run){
3335     if (s->next_lambda){
3336         s->current_picture_ptr->f->quality =
3337         s->current_picture.f->quality = s->next_lambda;
3338         if(!dry_run) s->next_lambda= 0;
3339     } else if (!s->fixed_qscale) {
3340         s->current_picture_ptr->f->quality =
3341         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3342         if (s->current_picture.f->quality < 0)
3343             return -1;
3344     }
3345
3346     if(s->adaptive_quant){
3347         switch(s->codec_id){
3348         case AV_CODEC_ID_MPEG4:
3349             if (CONFIG_MPEG4_ENCODER)
3350                 ff_clean_mpeg4_qscales(s);
3351             break;
3352         case AV_CODEC_ID_H263:
3353         case AV_CODEC_ID_H263P:
3354         case AV_CODEC_ID_FLV1:
3355             if (CONFIG_H263_ENCODER)
3356                 ff_clean_h263_qscales(s);
3357             break;
3358         default:
3359             ff_init_qscale_tab(s);
3360         }
3361
3362         s->lambda= s->lambda_table[0];
3363         //FIXME broken
3364     }else
3365         s->lambda = s->current_picture.f->quality;
3366     update_qscale(s);
3367     return 0;
3368 }
3369
3370 /* must be called before writing the header */
3371 static void set_frame_distances(MpegEncContext * s){
3372     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3373     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3374
3375     if(s->pict_type==AV_PICTURE_TYPE_B){
3376         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3377         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3378     }else{
3379         s->pp_time= s->time - s->last_non_b_time;
3380         s->last_non_b_time= s->time;
3381         assert(s->picture_number==0 || s->pp_time > 0);
3382     }
3383 }
3384
3385 static int encode_picture(MpegEncContext *s, int picture_number)
3386 {
3387     int i, ret;
3388     int bits;
3389     int context_count = s->slice_context_count;
3390
3391     s->picture_number = picture_number;
3392
3393     /* Reset the average MB variance */
3394     s->me.mb_var_sum_temp    =
3395     s->me.mc_mb_var_sum_temp = 0;
3396
3397     /* we need to initialize some time vars before we can encode b-frames */
3398     // RAL: Condition added for MPEG1VIDEO
3399     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3400         set_frame_distances(s);
3401     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3402         ff_set_mpeg4_time(s);
3403
3404     s->me.scene_change_score=0;
3405
3406 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3407
3408     if(s->pict_type==AV_PICTURE_TYPE_I){
3409         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3410         else                        s->no_rounding=0;
3411     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3412         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3413             s->no_rounding ^= 1;
3414     }
3415
3416     if(s->flags & CODEC_FLAG_PASS2){
3417         if (estimate_qp(s,1) < 0)
3418             return -1;
3419         ff_get_2pass_fcode(s);
3420     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3421         if(s->pict_type==AV_PICTURE_TYPE_B)
3422             s->lambda= s->last_lambda_for[s->pict_type];
3423         else
3424             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3425         update_qscale(s);
3426     }
3427
3428     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3429         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3430         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3431         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3432         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3433     }
3434
3435     s->mb_intra=0; //for the rate distortion & bit compare functions
3436     for(i=1; i<context_count; i++){
3437         ret = ff_update_duplicate_context(s->thread_context[i], s);
3438         if (ret < 0)
3439             return ret;
3440     }
3441
3442     if(ff_init_me(s)<0)
3443         return -1;
3444
3445     /* Estimate motion for every MB */
3446     if(s->pict_type != AV_PICTURE_TYPE_I){
3447         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3448         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3449         if (s->pict_type != AV_PICTURE_TYPE_B) {
3450             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3451                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3452             }
3453         }
3454
3455         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3456     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3457         /* I-Frame */
3458         for(i=0; i<s->mb_stride*s->mb_height; i++)
3459             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3460
3461         if(!s->fixed_qscale){
3462             /* finding spatial complexity for I-frame rate control */
3463             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3464         }
3465     }
3466     for(i=1; i<context_count; i++){
3467         merge_context_after_me(s, s->thread_context[i]);
3468     }
3469     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3470     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3471     emms_c();
3472
3473     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3474         s->pict_type= AV_PICTURE_TYPE_I;
3475         for(i=0; i<s->mb_stride*s->mb_height; i++)
3476             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3477         if(s->msmpeg4_version >= 3)
3478             s->no_rounding=1;
3479         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3480                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3481     }
3482
3483     if(!s->umvplus){
3484         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3485             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3486
3487             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3488                 int a,b;
3489                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3490                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3491                 s->f_code= FFMAX3(s->f_code, a, b);
3492             }
3493
3494             ff_fix_long_p_mvs(s);
3495             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3496             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3497                 int j;
3498                 for(i=0; i<2; i++){
3499                     for(j=0; j<2; j++)
3500                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3501                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3502                 }
3503             }
3504         }
3505
3506         if(s->pict_type==AV_PICTURE_TYPE_B){
3507             int a, b;
3508
3509             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3510             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3511             s->f_code = FFMAX(a, b);
3512
3513             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3514             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3515             s->b_code = FFMAX(a, b);
3516
3517             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3518             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3519             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3520             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3521             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3522                 int dir, j;
3523                 for(dir=0; dir<2; dir++){
3524                     for(i=0; i<2; i++){
3525                         for(j=0; j<2; j++){
3526                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3527                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3528                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3529                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3530                         }
3531                     }
3532                 }
3533             }
3534         }
3535     }
3536
3537     if (estimate_qp(s, 0) < 0)
3538         return -1;
3539
3540     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3541         s->qscale= 3; //reduce clipping problems
3542
3543     if (s->out_format == FMT_MJPEG) {
3544         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3545         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3546
3547         if (s->avctx->intra_matrix) {
3548             chroma_matrix =
3549             luma_matrix = s->avctx->intra_matrix;
3550         }
3551         if (s->avctx->chroma_intra_matrix)
3552             chroma_matrix = s->avctx->chroma_intra_matrix;
3553
3554         /* for mjpeg, we do include qscale in the matrix */
3555         for(i=1;i<64;i++){
3556             int j= s->dsp.idct_permutation[i];
3557
3558             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3559             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3560         }
3561         s->y_dc_scale_table=
3562         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3563         s->chroma_intra_matrix[0] =
3564         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3565         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3566                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3567         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3568                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3569         s->qscale= 8;
3570     }
3571     if(s->codec_id == AV_CODEC_ID_AMV){
3572         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3573         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3574         for(i=1;i<64;i++){
3575             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3576
3577             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3578             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3579         }
3580         s->y_dc_scale_table= y;
3581         s->c_dc_scale_table= c;
3582         s->intra_matrix[0] = 13;
3583         s->chroma_intra_matrix[0] = 14;
3584         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3585                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3586         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3587                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3588         s->qscale= 8;
3589     }
3590
3591     //FIXME var duplication
3592     s->current_picture_ptr->f->key_frame =
3593     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3594     s->current_picture_ptr->f->pict_type =
3595     s->current_picture.f->pict_type = s->pict_type;
3596
3597     if (s->current_picture.f->key_frame)
3598         s->picture_in_gop_number=0;
3599
3600     s->mb_x = s->mb_y = 0;
3601     s->last_bits= put_bits_count(&s->pb);
3602     switch(s->out_format) {
3603     case FMT_MJPEG:
3604         if (CONFIG_MJPEG_ENCODER)
3605             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3606                                            s->intra_matrix, s->chroma_intra_matrix);
3607         break;
3608     case FMT_H261:
3609         if (CONFIG_H261_ENCODER)
3610             ff_h261_encode_picture_header(s, picture_number);
3611         break;
3612     case FMT_H263:
3613         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3614             ff_wmv2_encode_picture_header(s, picture_number);
3615         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3616             ff_msmpeg4_encode_picture_header(s, picture_number);
3617         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3618             ff_mpeg4_encode_picture_header(s, picture_number);
3619         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3620             ff_rv10_encode_picture_header(s, picture_number);
3621         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3622             ff_rv20_encode_picture_header(s, picture_number);
3623         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3624             ff_flv_encode_picture_header(s, picture_number);
3625         else if (CONFIG_H263_ENCODER)
3626             ff_h263_encode_picture_header(s, picture_number);
3627         break;
3628     case FMT_MPEG1:
3629         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3630             ff_mpeg1_encode_picture_header(s, picture_number);
3631         break;
3632     default:
3633         av_assert0(0);
3634     }
3635     bits= put_bits_count(&s->pb);
3636     s->header_bits= bits - s->last_bits;
3637
3638     for(i=1; i<context_count; i++){
3639         update_duplicate_context_after_me(s->thread_context[i], s);
3640     }
3641     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3642     for(i=1; i<context_count; i++){
3643         merge_context_after_encode(s, s->thread_context[i]);
3644     }
3645     emms_c();
3646     return 0;
3647 }
3648
3649 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3650     const int intra= s->mb_intra;
3651     int i;
3652
3653     s->dct_count[intra]++;
3654
3655     for(i=0; i<64; i++){
3656         int level= block[i];
3657
3658         if(level){
3659             if(level>0){
3660                 s->dct_error_sum[intra][i] += level;
3661                 level -= s->dct_offset[intra][i];
3662                 if(level<0) level=0;
3663             }else{
3664                 s->dct_error_sum[intra][i] -= level;
3665                 level += s->dct_offset[intra][i];
3666                 if(level>0) level=0;
3667             }
3668             block[i]= level;
3669         }
3670     }
3671 }
3672
3673 static int dct_quantize_trellis_c(MpegEncContext *s,
3674                                   int16_t *block, int n,
3675                                   int qscale, int *overflow){
3676     const int *qmat;
3677     const uint8_t *scantable= s->intra_scantable.scantable;
3678     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3679     int max=0;
3680     unsigned int threshold1, threshold2;
3681     int bias=0;
3682     int run_tab[65];
3683     int level_tab[65];
3684     int score_tab[65];
3685     int survivor[65];
3686     int survivor_count;
3687     int last_run=0;
3688     int last_level=0;
3689     int last_score= 0;
3690     int last_i;
3691     int coeff[2][64];
3692     int coeff_count[64];
3693     int qmul, qadd, start_i, last_non_zero, i, dc;
3694     const int esc_length= s->ac_esc_length;
3695     uint8_t * length;
3696     uint8_t * last_length;
3697     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3698
3699     s->dsp.fdct (block);
3700
3701     if(s->dct_error_sum)
3702         s->denoise_dct(s, block);
3703     qmul= qscale*16;
3704     qadd= ((qscale-1)|1)*8;
3705
3706     if (s->mb_intra) {
3707         int q;
3708         if (!s->h263_aic) {
3709             if (n < 4)
3710                 q = s->y_dc_scale;
3711             else
3712                 q = s->c_dc_scale;
3713             q = q << 3;
3714         } else{
3715             /* For AIC we skip quant/dequant of INTRADC */
3716             q = 1 << 3;
3717             qadd=0;
3718         }
3719
3720         /* note: block[0] is assumed to be positive */
3721         block[0] = (block[0] + (q >> 1)) / q;
3722         start_i = 1;
3723         last_non_zero = 0;
3724         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3725         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3726             bias= 1<<(QMAT_SHIFT-1);
3727         length     = s->intra_ac_vlc_length;
3728         last_length= s->intra_ac_vlc_last_length;
3729     } else {
3730         start_i = 0;
3731         last_non_zero = -1;
3732         qmat = s->q_inter_matrix[qscale];
3733         length     = s->inter_ac_vlc_length;
3734         last_length= s->inter_ac_vlc_last_length;
3735     }
3736     last_i= start_i;
3737
3738     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3739     threshold2= (threshold1<<1);
3740
3741     for(i=63; i>=start_i; i--) {
3742         const int j = scantable[i];
3743         int level = block[j] * qmat[j];
3744
3745         if(((unsigned)(level+threshold1))>threshold2){
3746             last_non_zero = i;
3747             break;
3748         }
3749     }
3750
3751     for(i=start_i; i<=last_non_zero; i++) {
3752         const int j = scantable[i];
3753         int level = block[j] * qmat[j];
3754
3755 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3756 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3757         if(((unsigned)(level+threshold1))>threshold2){
3758             if(level>0){
3759                 level= (bias + level)>>QMAT_SHIFT;
3760                 coeff[0][i]= level;
3761                 coeff[1][i]= level-1;
3762 //                coeff[2][k]= level-2;
3763             }else{
3764                 level= (bias - level)>>QMAT_SHIFT;
3765                 coeff[0][i]= -level;
3766                 coeff[1][i]= -level+1;
3767 //                coeff[2][k]= -level+2;
3768             }
3769             coeff_count[i]= FFMIN(level, 2);
3770             av_assert2(coeff_count[i]);
3771             max |=level;
3772         }else{
3773             coeff[0][i]= (level>>31)|1;
3774             coeff_count[i]= 1;
3775         }
3776     }
3777
3778     *overflow= s->max_qcoeff < max; //overflow might have happened
3779
3780     if(last_non_zero < start_i){
3781         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3782         return last_non_zero;
3783     }
3784
3785     score_tab[start_i]= 0;
3786     survivor[0]= start_i;
3787     survivor_count= 1;
3788
3789     for(i=start_i; i<=last_non_zero; i++){
3790         int level_index, j, zero_distortion;
3791         int dct_coeff= FFABS(block[ scantable[i] ]);
3792         int best_score=256*256*256*120;
3793
3794         if (s->dsp.fdct == ff_fdct_ifast)
3795             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3796         zero_distortion= dct_coeff*dct_coeff;
3797
3798         for(level_index=0; level_index < coeff_count[i]; level_index++){
3799             int distortion;
3800             int level= coeff[level_index][i];
3801             const int alevel= FFABS(level);
3802             int unquant_coeff;
3803
3804             av_assert2(level);
3805
3806             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3807                 unquant_coeff= alevel*qmul + qadd;
3808             }else{ //MPEG1
3809                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3810                 if(s->mb_intra){
3811                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3812                         unquant_coeff =   (unquant_coeff - 1) | 1;
3813                 }else{
3814                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3815                         unquant_coeff =   (unquant_coeff - 1) | 1;
3816                 }
3817                 unquant_coeff<<= 3;
3818             }
3819
3820             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3821             level+=64;
3822             if((level&(~127)) == 0){
3823                 for(j=survivor_count-1; j>=0; j--){
3824                     int run= i - survivor[j];
3825                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3826                     score += score_tab[i-run];
3827
3828                     if(score < best_score){
3829                         best_score= score;
3830                         run_tab[i+1]= run;
3831                         level_tab[i+1]= level-64;
3832                     }
3833                 }
3834
3835                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3836                     for(j=survivor_count-1; j>=0; j--){
3837                         int run= i - survivor[j];
3838                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3839                         score += score_tab[i-run];
3840                         if(score < last_score){
3841                             last_score= score;
3842                             last_run= run;
3843                             last_level= level-64;
3844                             last_i= i+1;
3845                         }
3846                     }
3847                 }
3848             }else{
3849                 distortion += esc_length*lambda;
3850                 for(j=survivor_count-1; j>=0; j--){
3851                     int run= i - survivor[j];
3852                     int score= distortion + score_tab[i-run];
3853
3854                     if(score < best_score){
3855                         best_score= score;
3856                         run_tab[i+1]= run;
3857                         level_tab[i+1]= level-64;
3858                     }
3859                 }
3860
3861                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3862                   for(j=survivor_count-1; j>=0; j--){
3863                         int run= i - survivor[j];
3864                         int score= distortion + score_tab[i-run];
3865                         if(score < last_score){
3866                             last_score= score;
3867                             last_run= run;
3868                             last_level= level-64;
3869                             last_i= i+1;
3870                         }
3871                     }
3872                 }
3873             }
3874         }
3875
3876         score_tab[i+1]= best_score;
3877
3878         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3879         if(last_non_zero <= 27){
3880             for(; survivor_count; survivor_count--){
3881                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3882                     break;
3883             }
3884         }else{
3885             for(; survivor_count; survivor_count--){
3886                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3887                     break;
3888             }
3889         }
3890
3891         survivor[ survivor_count++ ]= i+1;
3892     }
3893
3894     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3895         last_score= 256*256*256*120;
3896         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3897             int score= score_tab[i];
3898             if(i) score += lambda*2; //FIXME exacter?
3899
3900             if(score < last_score){
3901                 last_score= score;
3902                 last_i= i;
3903                 last_level= level_tab[i];
3904                 last_run= run_tab[i];
3905             }
3906         }
3907     }
3908
3909     s->coded_score[n] = last_score;
3910
3911     dc= FFABS(block[0]);
3912     last_non_zero= last_i - 1;
3913     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3914
3915     if(last_non_zero < start_i)
3916         return last_non_zero;
3917
3918     if(last_non_zero == 0 && start_i == 0){
3919         int best_level= 0;
3920         int best_score= dc * dc;
3921
3922         for(i=0; i<coeff_count[0]; i++){
3923             int level= coeff[i][0];
3924             int alevel= FFABS(level);
3925             int unquant_coeff, score, distortion;
3926
3927             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3928                     unquant_coeff= (alevel*qmul + qadd)>>3;
3929             }else{ //MPEG1
3930                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3931                     unquant_coeff =   (unquant_coeff - 1) | 1;
3932             }
3933             unquant_coeff = (unquant_coeff + 4) >> 3;
3934             unquant_coeff<<= 3 + 3;
3935
3936             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3937             level+=64;
3938             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3939             else                    score= distortion + esc_length*lambda;
3940
3941             if(score < best_score){
3942                 best_score= score;
3943                 best_level= level - 64;
3944             }
3945         }
3946         block[0]= best_level;
3947         s->coded_score[n] = best_score - dc*dc;
3948         if(best_level == 0) return -1;
3949         else                return last_non_zero;
3950     }
3951
3952     i= last_i;
3953     av_assert2(last_level);
3954
3955     block[ perm_scantable[last_non_zero] ]= last_level;
3956     i -= last_run + 1;
3957
3958     for(; i>start_i; i -= run_tab[i] + 1){
3959         block[ perm_scantable[i-1] ]= level_tab[i];
3960     }
3961
3962     return last_non_zero;
3963 }
3964
3965 //#define REFINE_STATS 1
3966 static int16_t basis[64][64];
3967
3968 static void build_basis(uint8_t *perm){
3969     int i, j, x, y;
3970     emms_c();
3971     for(i=0; i<8; i++){
3972         for(j=0; j<8; j++){
3973             for(y=0; y<8; y++){
3974                 for(x=0; x<8; x++){
3975                     double s= 0.25*(1<<BASIS_SHIFT);
3976                     int index= 8*i + j;
3977                     int perm_index= perm[index];
3978                     if(i==0) s*= sqrt(0.5);
3979                     if(j==0) s*= sqrt(0.5);
3980                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3981                 }
3982             }
3983         }
3984     }
3985 }
3986
3987 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3988                         int16_t *block, int16_t *weight, int16_t *orig,
3989                         int n, int qscale){
3990     int16_t rem[64];
3991     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3992     const uint8_t *scantable= s->intra_scantable.scantable;
3993     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3994 //    unsigned int threshold1, threshold2;
3995 //    int bias=0;
3996     int run_tab[65];
3997     int prev_run=0;
3998     int prev_level=0;
3999     int qmul, qadd, start_i, last_non_zero, i, dc;
4000     uint8_t * length;
4001     uint8_t * last_length;
4002     int lambda;
4003     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4004 #ifdef REFINE_STATS
4005 static int count=0;
4006 static int after_last=0;
4007 static int to_zero=0;
4008 static int from_zero=0;
4009 static int raise=0;
4010 static int lower=0;
4011 static int messed_sign=0;
4012 #endif
4013
4014     if(basis[0][0] == 0)
4015         build_basis(s->dsp.idct_permutation);
4016
4017     qmul= qscale*2;
4018     qadd= (qscale-1)|1;
4019     if (s->mb_intra) {
4020         if (!s->h263_aic) {
4021             if (n < 4)
4022                 q = s->y_dc_scale;
4023             else
4024                 q = s->c_dc_scale;
4025         } else{
4026             /* For AIC we skip quant/dequant of INTRADC */
4027             q = 1;
4028             qadd=0;
4029         }
4030         q <<= RECON_SHIFT-3;
4031         /* note: block[0] is assumed to be positive */
4032         dc= block[0]*q;
4033 //        block[0] = (block[0] + (q >> 1)) / q;
4034         start_i = 1;
4035 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4036 //            bias= 1<<(QMAT_SHIFT-1);
4037         length     = s->intra_ac_vlc_length;
4038         last_length= s->intra_ac_vlc_last_length;
4039     } else {
4040         dc= 0;
4041         start_i = 0;
4042         length     = s->inter_ac_vlc_length;
4043         last_length= s->inter_ac_vlc_last_length;
4044     }
4045     last_non_zero = s->block_last_index[n];
4046
4047 #ifdef REFINE_STATS
4048 {START_TIMER
4049 #endif
4050     dc += (1<<(RECON_SHIFT-1));
4051     for(i=0; i<64; i++){
4052         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4053     }
4054 #ifdef REFINE_STATS
4055 STOP_TIMER("memset rem[]")}
4056 #endif
4057     sum=0;
4058     for(i=0; i<64; i++){
4059         int one= 36;
4060         int qns=4;
4061         int w;
4062
4063         w= FFABS(weight[i]) + qns*one;
4064         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4065
4066         weight[i] = w;
4067 //        w=weight[i] = (63*qns + (w/2)) / w;
4068
4069         av_assert2(w>0);
4070         av_assert2(w<(1<<6));
4071         sum += w*w;
4072     }
4073     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4074 #ifdef REFINE_STATS
4075 {START_TIMER
4076 #endif
4077     run=0;
4078     rle_index=0;
4079     for(i=start_i; i<=last_non_zero; i++){
4080         int j= perm_scantable[i];
4081         const int level= block[j];
4082         int coeff;
4083
4084         if(level){
4085             if(level<0) coeff= qmul*level - qadd;
4086             else        coeff= qmul*level + qadd;
4087             run_tab[rle_index++]=run;
4088             run=0;
4089
4090             s->dsp.add_8x8basis(rem, basis[j], coeff);
4091         }else{
4092             run++;
4093         }
4094     }
4095 #ifdef REFINE_STATS
4096 if(last_non_zero>0){
4097 STOP_TIMER("init rem[]")
4098 }
4099 }
4100
4101 {START_TIMER
4102 #endif
4103     for(;;){
4104         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
4105         int best_coeff=0;
4106         int best_change=0;
4107         int run2, best_unquant_change=0, analyze_gradient;
4108 #ifdef REFINE_STATS
4109 {START_TIMER
4110 #endif
4111         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4112
4113         if(analyze_gradient){
4114 #ifdef REFINE_STATS
4115 {START_TIMER
4116 #endif
4117             for(i=0; i<64; i++){
4118                 int w= weight[i];
4119
4120                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4121             }
4122 #ifdef REFINE_STATS
4123 STOP_TIMER("rem*w*w")}
4124 {START_TIMER
4125 #endif
4126             s->dsp.fdct(d1);
4127 #ifdef REFINE_STATS
4128 STOP_TIMER("dct")}
4129 #endif
4130         }
4131
4132         if(start_i){
4133             const int level= block[0];
4134             int change, old_coeff;
4135
4136             av_assert2(s->mb_intra);
4137
4138             old_coeff= q*level;
4139
4140             for(change=-1; change<=1; change+=2){
4141                 int new_level= level + change;
4142                 int score, new_coeff;
4143
4144                 new_coeff= q*new_level;
4145                 if(new_coeff >= 2048 || new_coeff < 0)
4146                     continue;
4147
4148                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
4149                 if(score<best_score){
4150                     best_score= score;
4151                     best_coeff= 0;
4152                     best_change= change;
4153                     best_unquant_change= new_coeff - old_coeff;
4154                 }
4155             }
4156         }
4157
4158         run=0;
4159         rle_index=0;
4160         run2= run_tab[rle_index++];
4161         prev_level=0;
4162         prev_run=0;
4163
4164         for(i=start_i; i<64; i++){
4165             int j= perm_scantable[i];
4166             const int level= block[j];
4167             int change, old_coeff;
4168
4169             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4170                 break;
4171
4172             if(level){
4173                 if(level<0) old_coeff= qmul*level - qadd;
4174                 else        old_coeff= qmul*level + qadd;
4175                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4176             }else{
4177                 old_coeff=0;
4178                 run2--;
4179                 av_assert2(run2>=0 || i >= last_non_zero );
4180             }
4181
4182             for(change=-1; change<=1; change+=2){
4183                 int new_level= level + change;
4184                 int score, new_coeff, unquant_change;
4185
4186                 score=0;
4187                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4188                    continue;
4189
4190                 if(new_level){
4191                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4192                     else            new_coeff= qmul*new_level + qadd;
4193                     if(new_coeff >= 2048 || new_coeff <= -2048)
4194                         continue;
4195                     //FIXME check for overflow
4196
4197                     if(level){
4198                         if(level < 63 && level > -63){
4199                             if(i < last_non_zero)
4200                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4201                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4202                             else
4203                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4204                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4205                         }
4206                     }else{
4207                         av_assert2(FFABS(new_level)==1);
4208
4209                         if(analyze_gradient){
4210                             int g= d1[ scantable[i] ];
4211                             if(g && (g^new_level) >= 0)
4212                                 continue;
4213                         }
4214
4215                         if(i < last_non_zero){
4216                             int next_i= i + run2 + 1;
4217                             int next_level= block[ perm_scantable[next_i] ] + 64;
4218
4219                             if(next_level&(~127))
4220                                 next_level= 0;
4221
4222                             if(next_i < last_non_zero)
4223                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4224                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4225                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4226                             else
4227                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4228                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4229                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4230                         }else{
4231                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4232                             if(prev_level){
4233                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4234                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4235                             }
4236                         }
4237                     }
4238                 }else{
4239                     new_coeff=0;
4240                     av_assert2(FFABS(level)==1);
4241
4242                     if(i < last_non_zero){
4243                         int next_i= i + run2 + 1;
4244                         int next_level= block[ perm_scantable[next_i] ] + 64;
4245
4246                         if(next_level&(~127))
4247                             next_level= 0;
4248
4249                         if(next_i < last_non_zero)
4250                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4251                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4252                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4253                         else
4254                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4255                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4256                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4257                     }else{
4258                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4259                         if(prev_level){
4260                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4261                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4262                         }
4263                     }
4264                 }
4265
4266                 score *= lambda;
4267
4268                 unquant_change= new_coeff - old_coeff;
4269                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4270
4271                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4272                 if(score<best_score){
4273                     best_score= score;
4274                     best_coeff= i;
4275                     best_change= change;
4276                     best_unquant_change= unquant_change;
4277                 }
4278             }
4279             if(level){
4280                 prev_level= level + 64;
4281                 if(prev_level&(~127))
4282                     prev_level= 0;
4283                 prev_run= run;
4284                 run=0;
4285             }else{
4286                 run++;
4287             }
4288         }
4289 #ifdef REFINE_STATS
4290 STOP_TIMER("iterative step")}
4291 #endif
4292
4293         if(best_change){
4294             int j= perm_scantable[ best_coeff ];
4295
4296             block[j] += best_change;
4297
4298             if(best_coeff > last_non_zero){
4299                 last_non_zero= best_coeff;
4300                 av_assert2(block[j]);
4301 #ifdef REFINE_STATS
4302 after_last++;
4303 #endif
4304             }else{
4305 #ifdef REFINE_STATS
4306 if(block[j]){
4307     if(block[j] - best_change){
4308         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4309             raise++;
4310         }else{
4311             lower++;
4312         }
4313     }else{
4314         from_zero++;
4315     }
4316 }else{
4317     to_zero++;
4318 }
4319 #endif
4320                 for(; last_non_zero>=start_i; last_non_zero--){
4321                     if(block[perm_scantable[last_non_zero]])
4322                         break;
4323                 }
4324             }
4325 #ifdef REFINE_STATS
4326 count++;
4327 if(256*256*256*64 % count == 0){
4328     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4329 }
4330 #endif
4331             run=0;
4332             rle_index=0;
4333             for(i=start_i; i<=last_non_zero; i++){
4334                 int j= perm_scantable[i];
4335                 const int level= block[j];
4336
4337                  if(level){
4338                      run_tab[rle_index++]=run;
4339                      run=0;
4340                  }else{
4341                      run++;
4342                  }
4343             }
4344
4345             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4346         }else{
4347             break;
4348         }
4349     }
4350 #ifdef REFINE_STATS
4351 if(last_non_zero>0){
4352 STOP_TIMER("iterative search")
4353 }
4354 }
4355 #endif
4356
4357     return last_non_zero;
4358 }
4359
4360 int ff_dct_quantize_c(MpegEncContext *s,
4361                         int16_t *block, int n,
4362                         int qscale, int *overflow)
4363 {
4364     int i, j, level, last_non_zero, q, start_i;
4365     const int *qmat;
4366     const uint8_t *scantable= s->intra_scantable.scantable;
4367     int bias;
4368     int max=0;
4369     unsigned int threshold1, threshold2;
4370
4371     s->dsp.fdct (block);
4372
4373     if(s->dct_error_sum)
4374         s->denoise_dct(s, block);
4375
4376     if (s->mb_intra) {
4377         if (!s->h263_aic) {
4378             if (n < 4)
4379                 q = s->y_dc_scale;
4380             else
4381                 q = s->c_dc_scale;
4382             q = q << 3;
4383         } else
4384             /* For AIC we skip quant/dequant of INTRADC */
4385             q = 1 << 3;
4386
4387         /* note: block[0] is assumed to be positive */
4388         block[0] = (block[0] + (q >> 1)) / q;
4389         start_i = 1;
4390         last_non_zero = 0;
4391         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4392         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4393     } else {
4394         start_i = 0;
4395         last_non_zero = -1;
4396         qmat = s->q_inter_matrix[qscale];
4397         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4398     }
4399     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4400     threshold2= (threshold1<<1);
4401     for(i=63;i>=start_i;i--) {
4402         j = scantable[i];
4403         level = block[j] * qmat[j];
4404
4405         if(((unsigned)(level+threshold1))>threshold2){
4406             last_non_zero = i;
4407             break;
4408         }else{
4409             block[j]=0;
4410         }
4411     }
4412     for(i=start_i; i<=last_non_zero; i++) {
4413         j = scantable[i];
4414         level = block[j] * qmat[j];
4415
4416 //        if(   bias+level >= (1<<QMAT_SHIFT)
4417 //           || bias-level >= (1<<QMAT_SHIFT)){
4418         if(((unsigned)(level+threshold1))>threshold2){
4419             if(level>0){
4420                 level= (bias + level)>>QMAT_SHIFT;
4421                 block[j]= level;
4422             }else{
4423                 level= (bias - level)>>QMAT_SHIFT;
4424                 block[j]= -level;
4425             }
4426             max |=level;
4427         }else{
4428             block[j]=0;
4429         }
4430     }
4431     *overflow= s->max_qcoeff < max; //overflow might have happened
4432
4433     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4434     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4435         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4436
4437     return last_non_zero;
4438 }
4439
4440 #define OFFSET(x) offsetof(MpegEncContext, x)
4441 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4442 static const AVOption h263_options[] = {
4443     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4444     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4445     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4446     FF_MPV_COMMON_OPTS
4447     { NULL },
4448 };
4449
4450 static const AVClass h263_class = {
4451     .class_name = "H.263 encoder",
4452     .item_name  = av_default_item_name,
4453     .option     = h263_options,
4454     .version    = LIBAVUTIL_VERSION_INT,
4455 };
4456
4457 AVCodec ff_h263_encoder = {
4458     .name           = "h263",
4459     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4460     .type           = AVMEDIA_TYPE_VIDEO,
4461     .id             = AV_CODEC_ID_H263,
4462     .priv_data_size = sizeof(MpegEncContext),
4463     .init           = ff_MPV_encode_init,
4464     .encode2        = ff_MPV_encode_picture,
4465     .close          = ff_MPV_encode_end,
4466     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4467     .priv_class     = &h263_class,
4468 };
4469
4470 static const AVOption h263p_options[] = {
4471     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4472     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4473     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4474     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4475     FF_MPV_COMMON_OPTS
4476     { NULL },
4477 };
4478 static const AVClass h263p_class = {
4479     .class_name = "H.263p encoder",
4480     .item_name  = av_default_item_name,
4481     .option     = h263p_options,
4482     .version    = LIBAVUTIL_VERSION_INT,
4483 };
4484
4485 AVCodec ff_h263p_encoder = {
4486     .name           = "h263p",
4487     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4488     .type           = AVMEDIA_TYPE_VIDEO,
4489     .id             = AV_CODEC_ID_H263P,
4490     .priv_data_size = sizeof(MpegEncContext),
4491     .init           = ff_MPV_encode_init,
4492     .encode2        = ff_MPV_encode_picture,
4493     .close          = ff_MPV_encode_end,
4494     .capabilities   = CODEC_CAP_SLICE_THREADS,
4495     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4496     .priv_class     = &h263p_class,
4497 };
4498
4499 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4500
4501 AVCodec ff_msmpeg4v2_encoder = {
4502     .name           = "msmpeg4v2",
4503     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4504     .type           = AVMEDIA_TYPE_VIDEO,
4505     .id             = AV_CODEC_ID_MSMPEG4V2,
4506     .priv_data_size = sizeof(MpegEncContext),
4507     .init           = ff_MPV_encode_init,
4508     .encode2        = ff_MPV_encode_picture,
4509     .close          = ff_MPV_encode_end,
4510     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4511     .priv_class     = &msmpeg4v2_class,
4512 };
4513
4514 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4515
4516 AVCodec ff_msmpeg4v3_encoder = {
4517     .name           = "msmpeg4",
4518     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4519     .type           = AVMEDIA_TYPE_VIDEO,
4520     .id             = AV_CODEC_ID_MSMPEG4V3,
4521     .priv_data_size = sizeof(MpegEncContext),
4522     .init           = ff_MPV_encode_init,
4523     .encode2        = ff_MPV_encode_picture,
4524     .close          = ff_MPV_encode_end,
4525     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4526     .priv_class     = &msmpeg4v3_class,
4527 };
4528
4529 FF_MPV_GENERIC_CLASS(wmv1)
4530
4531 AVCodec ff_wmv1_encoder = {
4532     .name           = "wmv1",
4533     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4534     .type           = AVMEDIA_TYPE_VIDEO,
4535     .id             = AV_CODEC_ID_WMV1,
4536     .priv_data_size = sizeof(MpegEncContext),
4537     .init           = ff_MPV_encode_init,
4538     .encode2        = ff_MPV_encode_picture,
4539     .close          = ff_MPV_encode_end,
4540     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4541     .priv_class     = &wmv1_class,
4542 };