]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Revert "x86/videodsp: add emulated_edge_mc_mmxext"
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "dsputil.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mathops.h"
46 #include "mpegutils.h"
47 #include "mjpegenc.h"
48 #include "msmpeg4.h"
49 #include "qpeldsp.h"
50 #include "faandct.h"
51 #include "thread.h"
52 #include "aandcttab.h"
53 #include "flv.h"
54 #include "mpeg4video.h"
55 #include "internal.h"
56 #include "bytestream.h"
57 #include <limits.h>
58 #include "sp5x.h"
59
60 static int encode_picture(MpegEncContext *s, int picture_number);
61 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
62 static int sse_mb(MpegEncContext *s);
63 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
64 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
65
66 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
67 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
68
69 const AVOption ff_mpv_generic_options[] = {
70     FF_MPV_COMMON_OPTS
71     { NULL },
72 };
73
74 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
75                        uint16_t (*qmat16)[2][64],
76                        const uint16_t *quant_matrix,
77                        int bias, int qmin, int qmax, int intra)
78 {
79     DSPContext *dsp = &s->dsp;
80     int qscale;
81     int shift = 0;
82
83     for (qscale = qmin; qscale <= qmax; qscale++) {
84         int i;
85         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
86             dsp->fdct == ff_jpeg_fdct_islow_10 ||
87             dsp->fdct == ff_faandct) {
88             for (i = 0; i < 64; i++) {
89                 const int j = dsp->idct_permutation[i];
90                 /* 16 <= qscale * quant_matrix[i] <= 7905
91                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
92                  *             19952 <=              x  <= 249205026
93                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
94                  *           3444240 >= (1 << 36) / (x) >= 275 */
95
96                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
97                                         (qscale * quant_matrix[j]));
98             }
99         } else if (dsp->fdct == ff_fdct_ifast) {
100             for (i = 0; i < 64; i++) {
101                 const int j = dsp->idct_permutation[i];
102                 /* 16 <= qscale * quant_matrix[i] <= 7905
103                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
104                  *             19952 <=              x  <= 249205026
105                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
106                  *           3444240 >= (1 << 36) / (x) >= 275 */
107
108                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
109                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
110             }
111         } else {
112             for (i = 0; i < 64; i++) {
113                 const int j = dsp->idct_permutation[i];
114                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
115                  * Assume x = qscale * quant_matrix[i]
116                  * So             16 <=              x  <= 7905
117                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
118                  * so          32768 >= (1 << 19) / (x) >= 67 */
119                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
120                                         (qscale * quant_matrix[j]));
121                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
122                 //                    (qscale * quant_matrix[i]);
123                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
124                                        (qscale * quant_matrix[j]);
125
126                 if (qmat16[qscale][0][i] == 0 ||
127                     qmat16[qscale][0][i] == 128 * 256)
128                     qmat16[qscale][0][i] = 128 * 256 - 1;
129                 qmat16[qscale][1][i] =
130                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
131                                 qmat16[qscale][0][i]);
132             }
133         }
134
135         for (i = intra; i < 64; i++) {
136             int64_t max = 8191;
137             if (dsp->fdct == ff_fdct_ifast) {
138                 max = (8191LL * ff_aanscales[i]) >> 14;
139             }
140             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
141                 shift++;
142             }
143         }
144     }
145     if (shift) {
146         av_log(NULL, AV_LOG_INFO,
147                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
148                QMAT_SHIFT - shift);
149     }
150 }
151
152 static inline void update_qscale(MpegEncContext *s)
153 {
154     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
155                 (FF_LAMBDA_SHIFT + 7);
156     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
157
158     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
159                  FF_LAMBDA_SHIFT;
160 }
161
162 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
163 {
164     int i;
165
166     if (matrix) {
167         put_bits(pb, 1, 1);
168         for (i = 0; i < 64; i++) {
169             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
170         }
171     } else
172         put_bits(pb, 1, 0);
173 }
174
175 /**
176  * init s->current_picture.qscale_table from s->lambda_table
177  */
178 void ff_init_qscale_tab(MpegEncContext *s)
179 {
180     int8_t * const qscale_table = s->current_picture.qscale_table;
181     int i;
182
183     for (i = 0; i < s->mb_num; i++) {
184         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
185         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
186         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
187                                                   s->avctx->qmax);
188     }
189 }
190
191 static void update_duplicate_context_after_me(MpegEncContext *dst,
192                                               MpegEncContext *src)
193 {
194 #define COPY(a) dst->a= src->a
195     COPY(pict_type);
196     COPY(current_picture);
197     COPY(f_code);
198     COPY(b_code);
199     COPY(qscale);
200     COPY(lambda);
201     COPY(lambda2);
202     COPY(picture_in_gop_number);
203     COPY(gop_picture_number);
204     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
205     COPY(progressive_frame);    // FIXME don't set in encode_header
206     COPY(partitioned_frame);    // FIXME don't set in encode_header
207 #undef COPY
208 }
209
210 /**
211  * Set the given MpegEncContext to defaults for encoding.
212  * the changed fields will not depend upon the prior state of the MpegEncContext.
213  */
214 static void MPV_encode_defaults(MpegEncContext *s)
215 {
216     int i;
217     ff_MPV_common_defaults(s);
218
219     for (i = -16; i < 16; i++) {
220         default_fcode_tab[i + MAX_MV] = 1;
221     }
222     s->me.mv_penalty = default_mv_penalty;
223     s->fcode_tab     = default_fcode_tab;
224
225     s->input_picture_number  = 0;
226     s->picture_in_gop_number = 0;
227 }
228
229 av_cold int ff_dct_encode_init(MpegEncContext *s) {
230     if (ARCH_X86)
231         ff_dct_encode_init_x86(s);
232
233     if (CONFIG_H263_ENCODER)
234         ff_h263dsp_init(&s->h263dsp);
235     if (!s->dct_quantize)
236         s->dct_quantize = ff_dct_quantize_c;
237     if (!s->denoise_dct)
238         s->denoise_dct  = denoise_dct_c;
239     s->fast_dct_quantize = s->dct_quantize;
240     if (s->avctx->trellis)
241         s->dct_quantize  = dct_quantize_trellis_c;
242
243     return 0;
244 }
245
246 /* init video encoder */
247 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
248 {
249     MpegEncContext *s = avctx->priv_data;
250     int i, ret, format_supported;
251
252     MPV_encode_defaults(s);
253
254     switch (avctx->codec_id) {
255     case AV_CODEC_ID_MPEG2VIDEO:
256         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
257             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
258             av_log(avctx, AV_LOG_ERROR,
259                    "only YUV420 and YUV422 are supported\n");
260             return -1;
261         }
262         break;
263     case AV_CODEC_ID_MJPEG:
264     case AV_CODEC_ID_AMV:
265         format_supported = 0;
266         /* JPEG color space */
267         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
268             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
269             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
270             (avctx->color_range == AVCOL_RANGE_JPEG &&
271              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
272               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
273               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
274             format_supported = 1;
275         /* MPEG color space */
276         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
277                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
278                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
279                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
280             format_supported = 1;
281
282         if (!format_supported) {
283             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
284             return -1;
285         }
286         break;
287     default:
288         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
289             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
290             return -1;
291         }
292     }
293
294     switch (avctx->pix_fmt) {
295     case AV_PIX_FMT_YUVJ444P:
296     case AV_PIX_FMT_YUV444P:
297         s->chroma_format = CHROMA_444;
298         break;
299     case AV_PIX_FMT_YUVJ422P:
300     case AV_PIX_FMT_YUV422P:
301         s->chroma_format = CHROMA_422;
302         break;
303     case AV_PIX_FMT_YUVJ420P:
304     case AV_PIX_FMT_YUV420P:
305     default:
306         s->chroma_format = CHROMA_420;
307         break;
308     }
309
310     s->bit_rate = avctx->bit_rate;
311     s->width    = avctx->width;
312     s->height   = avctx->height;
313     if (avctx->gop_size > 600 &&
314         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
315         av_log(avctx, AV_LOG_WARNING,
316                "keyframe interval too large!, reducing it from %d to %d\n",
317                avctx->gop_size, 600);
318         avctx->gop_size = 600;
319     }
320     s->gop_size     = avctx->gop_size;
321     s->avctx        = avctx;
322     s->flags        = avctx->flags;
323     s->flags2       = avctx->flags2;
324     if (avctx->max_b_frames > MAX_B_FRAMES) {
325         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
326                "is %d.\n", MAX_B_FRAMES);
327         avctx->max_b_frames = MAX_B_FRAMES;
328     }
329     s->max_b_frames = avctx->max_b_frames;
330     s->codec_id     = avctx->codec->id;
331     s->strict_std_compliance = avctx->strict_std_compliance;
332     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
333     s->mpeg_quant         = avctx->mpeg_quant;
334     s->rtp_mode           = !!avctx->rtp_payload_size;
335     s->intra_dc_precision = avctx->intra_dc_precision;
336     s->user_specified_pts = AV_NOPTS_VALUE;
337
338     if (s->gop_size <= 1) {
339         s->intra_only = 1;
340         s->gop_size   = 12;
341     } else {
342         s->intra_only = 0;
343     }
344
345     s->me_method = avctx->me_method;
346
347     /* Fixed QSCALE */
348     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
349
350     s->adaptive_quant = (s->avctx->lumi_masking ||
351                          s->avctx->dark_masking ||
352                          s->avctx->temporal_cplx_masking ||
353                          s->avctx->spatial_cplx_masking  ||
354                          s->avctx->p_masking      ||
355                          s->avctx->border_masking ||
356                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
357                         !s->fixed_qscale;
358
359     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
360
361     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
362         switch(avctx->codec_id) {
363         case AV_CODEC_ID_MPEG1VIDEO:
364         case AV_CODEC_ID_MPEG2VIDEO:
365             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
366             break;
367         case AV_CODEC_ID_MPEG4:
368         case AV_CODEC_ID_MSMPEG4V1:
369         case AV_CODEC_ID_MSMPEG4V2:
370         case AV_CODEC_ID_MSMPEG4V3:
371             if       (avctx->rc_max_rate >= 15000000) {
372                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
373             } else if(avctx->rc_max_rate >=  2000000) {
374                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
375             } else if(avctx->rc_max_rate >=   384000) {
376                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
377             } else
378                 avctx->rc_buffer_size = 40;
379             avctx->rc_buffer_size *= 16384;
380             break;
381         }
382         if (avctx->rc_buffer_size) {
383             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
384         }
385     }
386
387     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
388         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
389         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
390             return -1;
391     }
392
393     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
394         av_log(avctx, AV_LOG_INFO,
395                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
396     }
397
398     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
399         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
400         return -1;
401     }
402
403     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
404         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
405         return -1;
406     }
407
408     if (avctx->rc_max_rate &&
409         avctx->rc_max_rate == avctx->bit_rate &&
410         avctx->rc_max_rate != avctx->rc_min_rate) {
411         av_log(avctx, AV_LOG_INFO,
412                "impossible bitrate constraints, this will fail\n");
413     }
414
415     if (avctx->rc_buffer_size &&
416         avctx->bit_rate * (int64_t)avctx->time_base.num >
417             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
418         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
419         return -1;
420     }
421
422     if (!s->fixed_qscale &&
423         avctx->bit_rate * av_q2d(avctx->time_base) >
424             avctx->bit_rate_tolerance) {
425         av_log(avctx, AV_LOG_WARNING,
426                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
427         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
428     }
429
430     if (s->avctx->rc_max_rate &&
431         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
432         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
433          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
434         90000LL * (avctx->rc_buffer_size - 1) >
435             s->avctx->rc_max_rate * 0xFFFFLL) {
436         av_log(avctx, AV_LOG_INFO,
437                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
438                "specified vbv buffer is too large for the given bitrate!\n");
439     }
440
441     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
442         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
443         s->codec_id != AV_CODEC_ID_FLV1) {
444         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
445         return -1;
446     }
447
448     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
449         av_log(avctx, AV_LOG_ERROR,
450                "OBMC is only supported with simple mb decision\n");
451         return -1;
452     }
453
454     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
455         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
456         return -1;
457     }
458
459     if (s->max_b_frames                    &&
460         s->codec_id != AV_CODEC_ID_MPEG4      &&
461         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
462         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
463         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
464         return -1;
465     }
466     if (s->max_b_frames < 0) {
467         av_log(avctx, AV_LOG_ERROR,
468                "max b frames must be 0 or positive for mpegvideo based encoders\n");
469         return -1;
470     }
471
472     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
473          s->codec_id == AV_CODEC_ID_H263  ||
474          s->codec_id == AV_CODEC_ID_H263P) &&
475         (avctx->sample_aspect_ratio.num > 255 ||
476          avctx->sample_aspect_ratio.den > 255)) {
477         av_log(avctx, AV_LOG_WARNING,
478                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
479                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
480         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
481                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
482     }
483
484     if ((s->codec_id == AV_CODEC_ID_H263  ||
485          s->codec_id == AV_CODEC_ID_H263P) &&
486         (avctx->width  > 2048 ||
487          avctx->height > 1152 )) {
488         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
489         return -1;
490     }
491     if ((s->codec_id == AV_CODEC_ID_H263  ||
492          s->codec_id == AV_CODEC_ID_H263P) &&
493         ((avctx->width &3) ||
494          (avctx->height&3) )) {
495         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
496         return -1;
497     }
498
499     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
500         (avctx->width  > 4095 ||
501          avctx->height > 4095 )) {
502         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
503         return -1;
504     }
505
506     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
507         (avctx->width  > 16383 ||
508          avctx->height > 16383 )) {
509         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
510         return -1;
511     }
512
513     if (s->codec_id == AV_CODEC_ID_RV10 &&
514         (avctx->width &15 ||
515          avctx->height&15 )) {
516         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
517         return AVERROR(EINVAL);
518     }
519
520     if (s->codec_id == AV_CODEC_ID_RV20 &&
521         (avctx->width &3 ||
522          avctx->height&3 )) {
523         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
524         return AVERROR(EINVAL);
525     }
526
527     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
528          s->codec_id == AV_CODEC_ID_WMV2) &&
529          avctx->width & 1) {
530          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
531          return -1;
532     }
533
534     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
535         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
536         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
537         return -1;
538     }
539
540     // FIXME mpeg2 uses that too
541     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
542                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
543         av_log(avctx, AV_LOG_ERROR,
544                "mpeg2 style quantization not supported by codec\n");
545         return -1;
546     }
547
548     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
549         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
550         return -1;
551     }
552
553     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
554         s->avctx->mb_decision != FF_MB_DECISION_RD) {
555         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
556         return -1;
557     }
558
559     if (s->avctx->scenechange_threshold < 1000000000 &&
560         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
561         av_log(avctx, AV_LOG_ERROR,
562                "closed gop with scene change detection are not supported yet, "
563                "set threshold to 1000000000\n");
564         return -1;
565     }
566
567     if (s->flags & CODEC_FLAG_LOW_DELAY) {
568         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
569             av_log(avctx, AV_LOG_ERROR,
570                   "low delay forcing is only available for mpeg2\n");
571             return -1;
572         }
573         if (s->max_b_frames != 0) {
574             av_log(avctx, AV_LOG_ERROR,
575                    "b frames cannot be used with low delay\n");
576             return -1;
577         }
578     }
579
580     if (s->q_scale_type == 1) {
581         if (avctx->qmax > 12) {
582             av_log(avctx, AV_LOG_ERROR,
583                    "non linear quant only supports qmax <= 12 currently\n");
584             return -1;
585         }
586     }
587
588     if (s->avctx->thread_count > 1         &&
589         s->codec_id != AV_CODEC_ID_MPEG4      &&
590         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
591         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
592         s->codec_id != AV_CODEC_ID_MJPEG      &&
593         (s->codec_id != AV_CODEC_ID_H263P)) {
594         av_log(avctx, AV_LOG_ERROR,
595                "multi threaded encoding not supported by codec\n");
596         return -1;
597     }
598
599     if (s->avctx->thread_count < 1) {
600         av_log(avctx, AV_LOG_ERROR,
601                "automatic thread number detection not supported by codec, "
602                "patch welcome\n");
603         return -1;
604     }
605
606     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
607         s->rtp_mode = 1;
608
609     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
610         s->h263_slice_structured = 1;
611
612     if (!avctx->time_base.den || !avctx->time_base.num) {
613         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
614         return -1;
615     }
616
617     i = (INT_MAX / 2 + 128) >> 8;
618     if (avctx->mb_threshold >= i) {
619         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
620                i - 1);
621         return -1;
622     }
623
624     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
625         av_log(avctx, AV_LOG_INFO,
626                "notice: b_frame_strategy only affects the first pass\n");
627         avctx->b_frame_strategy = 0;
628     }
629
630     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
631     if (i > 1) {
632         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
633         avctx->time_base.den /= i;
634         avctx->time_base.num /= i;
635         //return -1;
636     }
637
638     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
639         // (a + x * 3 / 8) / x
640         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
641         s->inter_quant_bias = 0;
642     } else {
643         s->intra_quant_bias = 0;
644         // (a - x / 4) / x
645         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
646     }
647
648     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
649         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
650         return AVERROR(EINVAL);
651     }
652
653     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
654         s->intra_quant_bias = avctx->intra_quant_bias;
655     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
656         s->inter_quant_bias = avctx->inter_quant_bias;
657
658     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
659
660     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
661         s->avctx->time_base.den > (1 << 16) - 1) {
662         av_log(avctx, AV_LOG_ERROR,
663                "timebase %d/%d not supported by MPEG 4 standard, "
664                "the maximum admitted value for the timebase denominator "
665                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
666                (1 << 16) - 1);
667         return -1;
668     }
669     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
670
671     switch (avctx->codec->id) {
672     case AV_CODEC_ID_MPEG1VIDEO:
673         s->out_format = FMT_MPEG1;
674         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
675         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
676         break;
677     case AV_CODEC_ID_MPEG2VIDEO:
678         s->out_format = FMT_MPEG1;
679         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
680         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
681         s->rtp_mode   = 1;
682         break;
683     case AV_CODEC_ID_MJPEG:
684     case AV_CODEC_ID_AMV:
685         s->out_format = FMT_MJPEG;
686         s->intra_only = 1; /* force intra only for jpeg */
687         if (!CONFIG_MJPEG_ENCODER ||
688             ff_mjpeg_encode_init(s) < 0)
689             return -1;
690         avctx->delay = 0;
691         s->low_delay = 1;
692         break;
693     case AV_CODEC_ID_H261:
694         if (!CONFIG_H261_ENCODER)
695             return -1;
696         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
697             av_log(avctx, AV_LOG_ERROR,
698                    "The specified picture size of %dx%d is not valid for the "
699                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
700                     s->width, s->height);
701             return -1;
702         }
703         s->out_format = FMT_H261;
704         avctx->delay  = 0;
705         s->low_delay  = 1;
706         break;
707     case AV_CODEC_ID_H263:
708         if (!CONFIG_H263_ENCODER)
709             return -1;
710         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
711                              s->width, s->height) == 8) {
712             av_log(avctx, AV_LOG_ERROR,
713                    "The specified picture size of %dx%d is not valid for "
714                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
715                    "352x288, 704x576, and 1408x1152. "
716                    "Try H.263+.\n", s->width, s->height);
717             return -1;
718         }
719         s->out_format = FMT_H263;
720         avctx->delay  = 0;
721         s->low_delay  = 1;
722         break;
723     case AV_CODEC_ID_H263P:
724         s->out_format = FMT_H263;
725         s->h263_plus  = 1;
726         /* Fx */
727         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
728         s->modified_quant  = s->h263_aic;
729         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
730         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
731
732         /* /Fx */
733         /* These are just to be sure */
734         avctx->delay = 0;
735         s->low_delay = 1;
736         break;
737     case AV_CODEC_ID_FLV1:
738         s->out_format      = FMT_H263;
739         s->h263_flv        = 2; /* format = 1; 11-bit codes */
740         s->unrestricted_mv = 1;
741         s->rtp_mode  = 0; /* don't allow GOB */
742         avctx->delay = 0;
743         s->low_delay = 1;
744         break;
745     case AV_CODEC_ID_RV10:
746         s->out_format = FMT_H263;
747         avctx->delay  = 0;
748         s->low_delay  = 1;
749         break;
750     case AV_CODEC_ID_RV20:
751         s->out_format      = FMT_H263;
752         avctx->delay       = 0;
753         s->low_delay       = 1;
754         s->modified_quant  = 1;
755         s->h263_aic        = 1;
756         s->h263_plus       = 1;
757         s->loop_filter     = 1;
758         s->unrestricted_mv = 0;
759         break;
760     case AV_CODEC_ID_MPEG4:
761         s->out_format      = FMT_H263;
762         s->h263_pred       = 1;
763         s->unrestricted_mv = 1;
764         s->low_delay       = s->max_b_frames ? 0 : 1;
765         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
766         break;
767     case AV_CODEC_ID_MSMPEG4V2:
768         s->out_format      = FMT_H263;
769         s->h263_pred       = 1;
770         s->unrestricted_mv = 1;
771         s->msmpeg4_version = 2;
772         avctx->delay       = 0;
773         s->low_delay       = 1;
774         break;
775     case AV_CODEC_ID_MSMPEG4V3:
776         s->out_format        = FMT_H263;
777         s->h263_pred         = 1;
778         s->unrestricted_mv   = 1;
779         s->msmpeg4_version   = 3;
780         s->flipflop_rounding = 1;
781         avctx->delay         = 0;
782         s->low_delay         = 1;
783         break;
784     case AV_CODEC_ID_WMV1:
785         s->out_format        = FMT_H263;
786         s->h263_pred         = 1;
787         s->unrestricted_mv   = 1;
788         s->msmpeg4_version   = 4;
789         s->flipflop_rounding = 1;
790         avctx->delay         = 0;
791         s->low_delay         = 1;
792         break;
793     case AV_CODEC_ID_WMV2:
794         s->out_format        = FMT_H263;
795         s->h263_pred         = 1;
796         s->unrestricted_mv   = 1;
797         s->msmpeg4_version   = 5;
798         s->flipflop_rounding = 1;
799         avctx->delay         = 0;
800         s->low_delay         = 1;
801         break;
802     default:
803         return -1;
804     }
805
806     avctx->has_b_frames = !s->low_delay;
807
808     s->encoding = 1;
809
810     s->progressive_frame    =
811     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
812                                                 CODEC_FLAG_INTERLACED_ME) ||
813                                 s->alternate_scan);
814
815     /* init */
816     if (ff_MPV_common_init(s) < 0)
817         return -1;
818
819     ff_qpeldsp_init(&s->qdsp);
820
821     s->avctx->coded_frame = s->current_picture.f;
822
823     if (s->msmpeg4_version) {
824         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
825                           2 * 2 * (MAX_LEVEL + 1) *
826                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
827     }
828     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
829
830     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
831     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
832     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
833     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
834     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
835     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
836     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
837                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
838     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
839                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
840
841     if (s->avctx->noise_reduction) {
842         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
843                           2 * 64 * sizeof(uint16_t), fail);
844     }
845
846     ff_dct_encode_init(s);
847
848     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
849         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
850
851     s->quant_precision = 5;
852
853     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
854     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
855
856     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
857         ff_h261_encode_init(s);
858     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
859         ff_h263_encode_init(s);
860     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
861         ff_msmpeg4_encode_init(s);
862     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
863         && s->out_format == FMT_MPEG1)
864         ff_mpeg1_encode_init(s);
865
866     /* init q matrix */
867     for (i = 0; i < 64; i++) {
868         int j = s->dsp.idct_permutation[i];
869         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
870             s->mpeg_quant) {
871             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
872             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
873         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
874             s->intra_matrix[j] =
875             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
876         } else {
877             /* mpeg1/2 */
878             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
879             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
880         }
881         if (s->avctx->intra_matrix)
882             s->intra_matrix[j] = s->avctx->intra_matrix[i];
883         if (s->avctx->inter_matrix)
884             s->inter_matrix[j] = s->avctx->inter_matrix[i];
885     }
886
887     /* precompute matrix */
888     /* for mjpeg, we do include qscale in the matrix */
889     if (s->out_format != FMT_MJPEG) {
890         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
891                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
892                           31, 1);
893         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
894                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
895                           31, 0);
896     }
897
898     if (ff_rate_control_init(s) < 0)
899         return -1;
900
901 #if FF_API_ERROR_RATE
902     FF_DISABLE_DEPRECATION_WARNINGS
903     if (avctx->error_rate)
904         s->error_rate = avctx->error_rate;
905     FF_ENABLE_DEPRECATION_WARNINGS;
906 #endif
907
908 #if FF_API_NORMALIZE_AQP
909     FF_DISABLE_DEPRECATION_WARNINGS
910     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
911         s->mpv_flags |= FF_MPV_FLAG_NAQ;
912     FF_ENABLE_DEPRECATION_WARNINGS;
913 #endif
914
915 #if FF_API_MV0
916     FF_DISABLE_DEPRECATION_WARNINGS
917     if (avctx->flags & CODEC_FLAG_MV0)
918         s->mpv_flags |= FF_MPV_FLAG_MV0;
919     FF_ENABLE_DEPRECATION_WARNINGS
920 #endif
921
922     if (avctx->b_frame_strategy == 2) {
923         for (i = 0; i < s->max_b_frames + 2; i++) {
924             s->tmp_frames[i] = av_frame_alloc();
925             if (!s->tmp_frames[i])
926                 return AVERROR(ENOMEM);
927
928             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
929             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
930             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
931
932             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
933             if (ret < 0)
934                 return ret;
935         }
936     }
937
938     return 0;
939 fail:
940     ff_MPV_encode_end(avctx);
941     return AVERROR_UNKNOWN;
942 }
943
944 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
945 {
946     MpegEncContext *s = avctx->priv_data;
947     int i;
948
949     ff_rate_control_uninit(s);
950
951     ff_MPV_common_end(s);
952     if (CONFIG_MJPEG_ENCODER &&
953         s->out_format == FMT_MJPEG)
954         ff_mjpeg_encode_close(s);
955
956     av_freep(&avctx->extradata);
957
958     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
959         av_frame_free(&s->tmp_frames[i]);
960
961     ff_free_picture_tables(&s->new_picture);
962     ff_mpeg_unref_picture(s, &s->new_picture);
963
964     av_freep(&s->avctx->stats_out);
965     av_freep(&s->ac_stats);
966
967     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
968     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
969     s->q_chroma_intra_matrix=   NULL;
970     s->q_chroma_intra_matrix16= NULL;
971     av_freep(&s->q_intra_matrix);
972     av_freep(&s->q_inter_matrix);
973     av_freep(&s->q_intra_matrix16);
974     av_freep(&s->q_inter_matrix16);
975     av_freep(&s->input_picture);
976     av_freep(&s->reordered_input_picture);
977     av_freep(&s->dct_offset);
978
979     return 0;
980 }
981
982 static int get_sae(uint8_t *src, int ref, int stride)
983 {
984     int x,y;
985     int acc = 0;
986
987     for (y = 0; y < 16; y++) {
988         for (x = 0; x < 16; x++) {
989             acc += FFABS(src[x + y * stride] - ref);
990         }
991     }
992
993     return acc;
994 }
995
996 static int get_intra_count(MpegEncContext *s, uint8_t *src,
997                            uint8_t *ref, int stride)
998 {
999     int x, y, w, h;
1000     int acc = 0;
1001
1002     w = s->width  & ~15;
1003     h = s->height & ~15;
1004
1005     for (y = 0; y < h; y += 16) {
1006         for (x = 0; x < w; x += 16) {
1007             int offset = x + y * stride;
1008             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
1009                                      16);
1010             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
1011             int sae  = get_sae(src + offset, mean, stride);
1012
1013             acc += sae + 500 < sad;
1014         }
1015     }
1016     return acc;
1017 }
1018
1019
1020 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1021 {
1022     Picture *pic = NULL;
1023     int64_t pts;
1024     int i, display_picture_number = 0, ret;
1025     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1026                                                  (s->low_delay ? 0 : 1);
1027     int direct = 1;
1028
1029     if (pic_arg) {
1030         pts = pic_arg->pts;
1031         display_picture_number = s->input_picture_number++;
1032
1033         if (pts != AV_NOPTS_VALUE) {
1034             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1035                 int64_t last = s->user_specified_pts;
1036
1037                 if (pts <= last) {
1038                     av_log(s->avctx, AV_LOG_ERROR,
1039                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1040                            pts, last);
1041                     return AVERROR(EINVAL);
1042                 }
1043
1044                 if (!s->low_delay && display_picture_number == 1)
1045                     s->dts_delta = pts - last;
1046             }
1047             s->user_specified_pts = pts;
1048         } else {
1049             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1050                 s->user_specified_pts =
1051                 pts = s->user_specified_pts + 1;
1052                 av_log(s->avctx, AV_LOG_INFO,
1053                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1054                        pts);
1055             } else {
1056                 pts = display_picture_number;
1057             }
1058         }
1059     }
1060
1061     if (pic_arg) {
1062         if (!pic_arg->buf[0])
1063             direct = 0;
1064         if (pic_arg->linesize[0] != s->linesize)
1065             direct = 0;
1066         if (pic_arg->linesize[1] != s->uvlinesize)
1067             direct = 0;
1068         if (pic_arg->linesize[2] != s->uvlinesize)
1069             direct = 0;
1070         if ((s->width & 15) || (s->height & 15))
1071             direct = 0;
1072         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1073             direct = 0;
1074         if (s->linesize & (STRIDE_ALIGN-1))
1075             direct = 0;
1076
1077         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1078                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1079
1080         if (direct) {
1081             i = ff_find_unused_picture(s, 1);
1082             if (i < 0)
1083                 return i;
1084
1085             pic = &s->picture[i];
1086             pic->reference = 3;
1087
1088             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1089                 return ret;
1090             if (ff_alloc_picture(s, pic, 1) < 0) {
1091                 return -1;
1092             }
1093         } else {
1094             i = ff_find_unused_picture(s, 0);
1095             if (i < 0)
1096                 return i;
1097
1098             pic = &s->picture[i];
1099             pic->reference = 3;
1100
1101             if (ff_alloc_picture(s, pic, 0) < 0) {
1102                 return -1;
1103             }
1104
1105             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1106                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1107                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1108                 // empty
1109             } else {
1110                 int h_chroma_shift, v_chroma_shift;
1111                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1112                                                  &h_chroma_shift,
1113                                                  &v_chroma_shift);
1114
1115                 for (i = 0; i < 3; i++) {
1116                     int src_stride = pic_arg->linesize[i];
1117                     int dst_stride = i ? s->uvlinesize : s->linesize;
1118                     int h_shift = i ? h_chroma_shift : 0;
1119                     int v_shift = i ? v_chroma_shift : 0;
1120                     int w = s->width  >> h_shift;
1121                     int h = s->height >> v_shift;
1122                     uint8_t *src = pic_arg->data[i];
1123                     uint8_t *dst = pic->f->data[i];
1124                     int vpad = 16;
1125
1126                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1127                         && !s->progressive_sequence)
1128                         vpad = 32;
1129
1130                     if (!s->avctx->rc_buffer_size)
1131                         dst += INPLACE_OFFSET;
1132
1133                     if (src_stride == dst_stride)
1134                         memcpy(dst, src, src_stride * h);
1135                     else {
1136                         int h2 = h;
1137                         uint8_t *dst2 = dst;
1138                         while (h2--) {
1139                             memcpy(dst2, src, w);
1140                             dst2 += dst_stride;
1141                             src += src_stride;
1142                         }
1143                     }
1144                     if ((s->width & 15) || (s->height & (vpad-1))) {
1145                         s->dsp.draw_edges(dst, dst_stride,
1146                                           w, h,
1147                                           16>>h_shift,
1148                                           vpad>>v_shift,
1149                                           EDGE_BOTTOM);
1150                     }
1151                 }
1152             }
1153         }
1154         ret = av_frame_copy_props(pic->f, pic_arg);
1155         if (ret < 0)
1156             return ret;
1157
1158         pic->f->display_picture_number = display_picture_number;
1159         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1160     }
1161
1162     /* shift buffer entries */
1163     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1164         s->input_picture[i - 1] = s->input_picture[i];
1165
1166     s->input_picture[encoding_delay] = (Picture*) pic;
1167
1168     return 0;
1169 }
1170
1171 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1172 {
1173     int x, y, plane;
1174     int score = 0;
1175     int64_t score64 = 0;
1176
1177     for (plane = 0; plane < 3; plane++) {
1178         const int stride = p->f->linesize[plane];
1179         const int bw = plane ? 1 : 2;
1180         for (y = 0; y < s->mb_height * bw; y++) {
1181             for (x = 0; x < s->mb_width * bw; x++) {
1182                 int off = p->shared ? 0 : 16;
1183                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1184                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1185                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1186
1187                 switch (FFABS(s->avctx->frame_skip_exp)) {
1188                 case 0: score    =  FFMAX(score, v);          break;
1189                 case 1: score   += FFABS(v);                  break;
1190                 case 2: score64 += v * (int64_t)v;                       break;
1191                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1192                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1193                 }
1194             }
1195         }
1196     }
1197     emms_c();
1198
1199     if (score)
1200         score64 = score;
1201     if (s->avctx->frame_skip_exp < 0)
1202         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1203                       -1.0/s->avctx->frame_skip_exp);
1204
1205     if (score64 < s->avctx->frame_skip_threshold)
1206         return 1;
1207     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1208         return 1;
1209     return 0;
1210 }
1211
1212 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1213 {
1214     AVPacket pkt = { 0 };
1215     int ret, got_output;
1216
1217     av_init_packet(&pkt);
1218     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1219     if (ret < 0)
1220         return ret;
1221
1222     ret = pkt.size;
1223     av_free_packet(&pkt);
1224     return ret;
1225 }
1226
1227 static int estimate_best_b_count(MpegEncContext *s)
1228 {
1229     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1230     AVCodecContext *c = avcodec_alloc_context3(NULL);
1231     const int scale = s->avctx->brd_scale;
1232     int i, j, out_size, p_lambda, b_lambda, lambda2;
1233     int64_t best_rd  = INT64_MAX;
1234     int best_b_count = -1;
1235
1236     av_assert0(scale >= 0 && scale <= 3);
1237
1238     //emms_c();
1239     //s->next_picture_ptr->quality;
1240     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1241     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1242     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1243     if (!b_lambda) // FIXME we should do this somewhere else
1244         b_lambda = p_lambda;
1245     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1246                FF_LAMBDA_SHIFT;
1247
1248     c->width        = s->width  >> scale;
1249     c->height       = s->height >> scale;
1250     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1251     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1252     c->mb_decision  = s->avctx->mb_decision;
1253     c->me_cmp       = s->avctx->me_cmp;
1254     c->mb_cmp       = s->avctx->mb_cmp;
1255     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1256     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1257     c->time_base    = s->avctx->time_base;
1258     c->max_b_frames = s->max_b_frames;
1259
1260     if (avcodec_open2(c, codec, NULL) < 0)
1261         return -1;
1262
1263     for (i = 0; i < s->max_b_frames + 2; i++) {
1264         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1265                                                 s->next_picture_ptr;
1266
1267         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1268             pre_input = *pre_input_ptr;
1269
1270             if (!pre_input.shared && i) {
1271                 pre_input.f->data[0] += INPLACE_OFFSET;
1272                 pre_input.f->data[1] += INPLACE_OFFSET;
1273                 pre_input.f->data[2] += INPLACE_OFFSET;
1274             }
1275
1276             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1277                                  pre_input.f->data[0], pre_input.f->linesize[0],
1278                                  c->width,      c->height);
1279             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1280                                  pre_input.f->data[1], pre_input.f->linesize[1],
1281                                  c->width >> 1, c->height >> 1);
1282             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1283                                  pre_input.f->data[2], pre_input.f->linesize[2],
1284                                  c->width >> 1, c->height >> 1);
1285         }
1286     }
1287
1288     for (j = 0; j < s->max_b_frames + 1; j++) {
1289         int64_t rd = 0;
1290
1291         if (!s->input_picture[j])
1292             break;
1293
1294         c->error[0] = c->error[1] = c->error[2] = 0;
1295
1296         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1297         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1298
1299         out_size = encode_frame(c, s->tmp_frames[0]);
1300
1301         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1302
1303         for (i = 0; i < s->max_b_frames + 1; i++) {
1304             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1305
1306             s->tmp_frames[i + 1]->pict_type = is_p ?
1307                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1308             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1309
1310             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1311
1312             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1313         }
1314
1315         /* get the delayed frames */
1316         while (out_size) {
1317             out_size = encode_frame(c, NULL);
1318             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1319         }
1320
1321         rd += c->error[0] + c->error[1] + c->error[2];
1322
1323         if (rd < best_rd) {
1324             best_rd = rd;
1325             best_b_count = j;
1326         }
1327     }
1328
1329     avcodec_close(c);
1330     av_freep(&c);
1331
1332     return best_b_count;
1333 }
1334
1335 static int select_input_picture(MpegEncContext *s)
1336 {
1337     int i, ret;
1338
1339     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1340         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1341     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1342
1343     /* set next picture type & ordering */
1344     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1345         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1346             if (s->picture_in_gop_number < s->gop_size &&
1347                 s->next_picture_ptr &&
1348                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1349                 // FIXME check that te gop check above is +-1 correct
1350                 av_frame_unref(s->input_picture[0]->f);
1351
1352                 ff_vbv_update(s, 0);
1353
1354                 goto no_output_pic;
1355             }
1356         }
1357
1358         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1359             s->next_picture_ptr == NULL || s->intra_only) {
1360             s->reordered_input_picture[0] = s->input_picture[0];
1361             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1362             s->reordered_input_picture[0]->f->coded_picture_number =
1363                 s->coded_picture_number++;
1364         } else {
1365             int b_frames;
1366
1367             if (s->flags & CODEC_FLAG_PASS2) {
1368                 for (i = 0; i < s->max_b_frames + 1; i++) {
1369                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1370
1371                     if (pict_num >= s->rc_context.num_entries)
1372                         break;
1373                     if (!s->input_picture[i]) {
1374                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1375                         break;
1376                     }
1377
1378                     s->input_picture[i]->f->pict_type =
1379                         s->rc_context.entry[pict_num].new_pict_type;
1380                 }
1381             }
1382
1383             if (s->avctx->b_frame_strategy == 0) {
1384                 b_frames = s->max_b_frames;
1385                 while (b_frames && !s->input_picture[b_frames])
1386                     b_frames--;
1387             } else if (s->avctx->b_frame_strategy == 1) {
1388                 for (i = 1; i < s->max_b_frames + 1; i++) {
1389                     if (s->input_picture[i] &&
1390                         s->input_picture[i]->b_frame_score == 0) {
1391                         s->input_picture[i]->b_frame_score =
1392                             get_intra_count(s,
1393                                             s->input_picture[i    ]->f->data[0],
1394                                             s->input_picture[i - 1]->f->data[0],
1395                                             s->linesize) + 1;
1396                     }
1397                 }
1398                 for (i = 0; i < s->max_b_frames + 1; i++) {
1399                     if (s->input_picture[i] == NULL ||
1400                         s->input_picture[i]->b_frame_score - 1 >
1401                             s->mb_num / s->avctx->b_sensitivity)
1402                         break;
1403                 }
1404
1405                 b_frames = FFMAX(0, i - 1);
1406
1407                 /* reset scores */
1408                 for (i = 0; i < b_frames + 1; i++) {
1409                     s->input_picture[i]->b_frame_score = 0;
1410                 }
1411             } else if (s->avctx->b_frame_strategy == 2) {
1412                 b_frames = estimate_best_b_count(s);
1413             } else {
1414                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1415                 b_frames = 0;
1416             }
1417
1418             emms_c();
1419
1420             for (i = b_frames - 1; i >= 0; i--) {
1421                 int type = s->input_picture[i]->f->pict_type;
1422                 if (type && type != AV_PICTURE_TYPE_B)
1423                     b_frames = i;
1424             }
1425             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1426                 b_frames == s->max_b_frames) {
1427                 av_log(s->avctx, AV_LOG_ERROR,
1428                        "warning, too many b frames in a row\n");
1429             }
1430
1431             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1432                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1433                     s->gop_size > s->picture_in_gop_number) {
1434                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1435                 } else {
1436                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1437                         b_frames = 0;
1438                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1439                 }
1440             }
1441
1442             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1443                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1444                 b_frames--;
1445
1446             s->reordered_input_picture[0] = s->input_picture[b_frames];
1447             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1448                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1449             s->reordered_input_picture[0]->f->coded_picture_number =
1450                 s->coded_picture_number++;
1451             for (i = 0; i < b_frames; i++) {
1452                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1453                 s->reordered_input_picture[i + 1]->f->pict_type =
1454                     AV_PICTURE_TYPE_B;
1455                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1456                     s->coded_picture_number++;
1457             }
1458         }
1459     }
1460 no_output_pic:
1461     if (s->reordered_input_picture[0]) {
1462         s->reordered_input_picture[0]->reference =
1463            s->reordered_input_picture[0]->f->pict_type !=
1464                AV_PICTURE_TYPE_B ? 3 : 0;
1465
1466         ff_mpeg_unref_picture(s, &s->new_picture);
1467         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1468             return ret;
1469
1470         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1471             // input is a shared pix, so we can't modifiy it -> alloc a new
1472             // one & ensure that the shared one is reuseable
1473
1474             Picture *pic;
1475             int i = ff_find_unused_picture(s, 0);
1476             if (i < 0)
1477                 return i;
1478             pic = &s->picture[i];
1479
1480             pic->reference = s->reordered_input_picture[0]->reference;
1481             if (ff_alloc_picture(s, pic, 0) < 0) {
1482                 return -1;
1483             }
1484
1485             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1486             if (ret < 0)
1487                 return ret;
1488
1489             /* mark us unused / free shared pic */
1490             av_frame_unref(s->reordered_input_picture[0]->f);
1491             s->reordered_input_picture[0]->shared = 0;
1492
1493             s->current_picture_ptr = pic;
1494         } else {
1495             // input is not a shared pix -> reuse buffer for current_pix
1496             s->current_picture_ptr = s->reordered_input_picture[0];
1497             for (i = 0; i < 4; i++) {
1498                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1499             }
1500         }
1501         ff_mpeg_unref_picture(s, &s->current_picture);
1502         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1503                                        s->current_picture_ptr)) < 0)
1504             return ret;
1505
1506         s->picture_number = s->new_picture.f->display_picture_number;
1507     } else {
1508         ff_mpeg_unref_picture(s, &s->new_picture);
1509     }
1510     return 0;
1511 }
1512
1513 static void frame_end(MpegEncContext *s)
1514 {
1515     if (s->unrestricted_mv &&
1516         s->current_picture.reference &&
1517         !s->intra_only) {
1518         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1519         int hshift = desc->log2_chroma_w;
1520         int vshift = desc->log2_chroma_h;
1521         s->dsp.draw_edges(s->current_picture.f->data[0], s->current_picture.f->linesize[0],
1522                           s->h_edge_pos, s->v_edge_pos,
1523                           EDGE_WIDTH, EDGE_WIDTH,
1524                           EDGE_TOP | EDGE_BOTTOM);
1525         s->dsp.draw_edges(s->current_picture.f->data[1], s->current_picture.f->linesize[1],
1526                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1527                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1528                           EDGE_TOP | EDGE_BOTTOM);
1529         s->dsp.draw_edges(s->current_picture.f->data[2], s->current_picture.f->linesize[2],
1530                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1531                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1532                           EDGE_TOP | EDGE_BOTTOM);
1533     }
1534
1535     emms_c();
1536
1537     s->last_pict_type                 = s->pict_type;
1538     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1539     if (s->pict_type!= AV_PICTURE_TYPE_B)
1540         s->last_non_b_pict_type = s->pict_type;
1541
1542     s->avctx->coded_frame = s->current_picture_ptr->f;
1543
1544 }
1545
1546 static void update_noise_reduction(MpegEncContext *s)
1547 {
1548     int intra, i;
1549
1550     for (intra = 0; intra < 2; intra++) {
1551         if (s->dct_count[intra] > (1 << 16)) {
1552             for (i = 0; i < 64; i++) {
1553                 s->dct_error_sum[intra][i] >>= 1;
1554             }
1555             s->dct_count[intra] >>= 1;
1556         }
1557
1558         for (i = 0; i < 64; i++) {
1559             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1560                                        s->dct_count[intra] +
1561                                        s->dct_error_sum[intra][i] / 2) /
1562                                       (s->dct_error_sum[intra][i] + 1);
1563         }
1564     }
1565 }
1566
1567 static int frame_start(MpegEncContext *s)
1568 {
1569     int ret;
1570
1571     /* mark & release old frames */
1572     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1573         s->last_picture_ptr != s->next_picture_ptr &&
1574         s->last_picture_ptr->f->buf[0]) {
1575         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1576     }
1577
1578     s->current_picture_ptr->f->pict_type = s->pict_type;
1579     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1580
1581     ff_mpeg_unref_picture(s, &s->current_picture);
1582     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1583                                    s->current_picture_ptr)) < 0)
1584         return ret;
1585
1586     if (s->pict_type != AV_PICTURE_TYPE_B) {
1587         s->last_picture_ptr = s->next_picture_ptr;
1588         if (!s->droppable)
1589             s->next_picture_ptr = s->current_picture_ptr;
1590     }
1591
1592     if (s->last_picture_ptr) {
1593         ff_mpeg_unref_picture(s, &s->last_picture);
1594         if (s->last_picture_ptr->f->buf[0] &&
1595             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1596                                        s->last_picture_ptr)) < 0)
1597             return ret;
1598     }
1599     if (s->next_picture_ptr) {
1600         ff_mpeg_unref_picture(s, &s->next_picture);
1601         if (s->next_picture_ptr->f->buf[0] &&
1602             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1603                                        s->next_picture_ptr)) < 0)
1604             return ret;
1605     }
1606
1607     if (s->picture_structure!= PICT_FRAME) {
1608         int i;
1609         for (i = 0; i < 4; i++) {
1610             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1611                 s->current_picture.f->data[i] +=
1612                     s->current_picture.f->linesize[i];
1613             }
1614             s->current_picture.f->linesize[i] *= 2;
1615             s->last_picture.f->linesize[i]    *= 2;
1616             s->next_picture.f->linesize[i]    *= 2;
1617         }
1618     }
1619
1620     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1621         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1622         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1623     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1624         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1625         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1626     } else {
1627         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1628         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1629     }
1630
1631     if (s->dct_error_sum) {
1632         av_assert2(s->avctx->noise_reduction && s->encoding);
1633         update_noise_reduction(s);
1634     }
1635
1636     return 0;
1637 }
1638
1639 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1640                           const AVFrame *pic_arg, int *got_packet)
1641 {
1642     MpegEncContext *s = avctx->priv_data;
1643     int i, stuffing_count, ret;
1644     int context_count = s->slice_context_count;
1645
1646     s->picture_in_gop_number++;
1647
1648     if (load_input_picture(s, pic_arg) < 0)
1649         return -1;
1650
1651     if (select_input_picture(s) < 0) {
1652         return -1;
1653     }
1654
1655     /* output? */
1656     if (s->new_picture.f->data[0]) {
1657         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1658             return ret;
1659         if (s->mb_info) {
1660             s->mb_info_ptr = av_packet_new_side_data(pkt,
1661                                  AV_PKT_DATA_H263_MB_INFO,
1662                                  s->mb_width*s->mb_height*12);
1663             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1664         }
1665
1666         for (i = 0; i < context_count; i++) {
1667             int start_y = s->thread_context[i]->start_mb_y;
1668             int   end_y = s->thread_context[i]->  end_mb_y;
1669             int h       = s->mb_height;
1670             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1671             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1672
1673             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1674         }
1675
1676         s->pict_type = s->new_picture.f->pict_type;
1677         //emms_c();
1678         ret = frame_start(s);
1679         if (ret < 0)
1680             return ret;
1681 vbv_retry:
1682         if (encode_picture(s, s->picture_number) < 0)
1683             return -1;
1684
1685         avctx->header_bits = s->header_bits;
1686         avctx->mv_bits     = s->mv_bits;
1687         avctx->misc_bits   = s->misc_bits;
1688         avctx->i_tex_bits  = s->i_tex_bits;
1689         avctx->p_tex_bits  = s->p_tex_bits;
1690         avctx->i_count     = s->i_count;
1691         // FIXME f/b_count in avctx
1692         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1693         avctx->skip_count  = s->skip_count;
1694
1695         frame_end(s);
1696
1697         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1698             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1699
1700         if (avctx->rc_buffer_size) {
1701             RateControlContext *rcc = &s->rc_context;
1702             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1703
1704             if (put_bits_count(&s->pb) > max_size &&
1705                 s->lambda < s->avctx->lmax) {
1706                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1707                                        (s->qscale + 1) / s->qscale);
1708                 if (s->adaptive_quant) {
1709                     int i;
1710                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1711                         s->lambda_table[i] =
1712                             FFMAX(s->lambda_table[i] + 1,
1713                                   s->lambda_table[i] * (s->qscale + 1) /
1714                                   s->qscale);
1715                 }
1716                 s->mb_skipped = 0;        // done in frame_start()
1717                 // done in encode_picture() so we must undo it
1718                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1719                     if (s->flipflop_rounding          ||
1720                         s->codec_id == AV_CODEC_ID_H263P ||
1721                         s->codec_id == AV_CODEC_ID_MPEG4)
1722                         s->no_rounding ^= 1;
1723                 }
1724                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1725                     s->time_base       = s->last_time_base;
1726                     s->last_non_b_time = s->time - s->pp_time;
1727                 }
1728                 for (i = 0; i < context_count; i++) {
1729                     PutBitContext *pb = &s->thread_context[i]->pb;
1730                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1731                 }
1732                 goto vbv_retry;
1733             }
1734
1735             av_assert0(s->avctx->rc_max_rate);
1736         }
1737
1738         if (s->flags & CODEC_FLAG_PASS1)
1739             ff_write_pass1_stats(s);
1740
1741         for (i = 0; i < 4; i++) {
1742             s->current_picture_ptr->f->error[i] =
1743             s->current_picture.f->error[i] =
1744                 s->current_picture.error[i];
1745             avctx->error[i] += s->current_picture_ptr->f->error[i];
1746         }
1747
1748         if (s->flags & CODEC_FLAG_PASS1)
1749             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1750                    avctx->i_tex_bits + avctx->p_tex_bits ==
1751                        put_bits_count(&s->pb));
1752         flush_put_bits(&s->pb);
1753         s->frame_bits  = put_bits_count(&s->pb);
1754
1755         stuffing_count = ff_vbv_update(s, s->frame_bits);
1756         s->stuffing_bits = 8*stuffing_count;
1757         if (stuffing_count) {
1758             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1759                     stuffing_count + 50) {
1760                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1761                 return -1;
1762             }
1763
1764             switch (s->codec_id) {
1765             case AV_CODEC_ID_MPEG1VIDEO:
1766             case AV_CODEC_ID_MPEG2VIDEO:
1767                 while (stuffing_count--) {
1768                     put_bits(&s->pb, 8, 0);
1769                 }
1770             break;
1771             case AV_CODEC_ID_MPEG4:
1772                 put_bits(&s->pb, 16, 0);
1773                 put_bits(&s->pb, 16, 0x1C3);
1774                 stuffing_count -= 4;
1775                 while (stuffing_count--) {
1776                     put_bits(&s->pb, 8, 0xFF);
1777                 }
1778             break;
1779             default:
1780                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1781             }
1782             flush_put_bits(&s->pb);
1783             s->frame_bits  = put_bits_count(&s->pb);
1784         }
1785
1786         /* update mpeg1/2 vbv_delay for CBR */
1787         if (s->avctx->rc_max_rate                          &&
1788             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1789             s->out_format == FMT_MPEG1                     &&
1790             90000LL * (avctx->rc_buffer_size - 1) <=
1791                 s->avctx->rc_max_rate * 0xFFFFLL) {
1792             int vbv_delay, min_delay;
1793             double inbits  = s->avctx->rc_max_rate *
1794                              av_q2d(s->avctx->time_base);
1795             int    minbits = s->frame_bits - 8 *
1796                              (s->vbv_delay_ptr - s->pb.buf - 1);
1797             double bits    = s->rc_context.buffer_index + minbits - inbits;
1798
1799             if (bits < 0)
1800                 av_log(s->avctx, AV_LOG_ERROR,
1801                        "Internal error, negative bits\n");
1802
1803             assert(s->repeat_first_field == 0);
1804
1805             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1806             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1807                         s->avctx->rc_max_rate;
1808
1809             vbv_delay = FFMAX(vbv_delay, min_delay);
1810
1811             av_assert0(vbv_delay < 0xFFFF);
1812
1813             s->vbv_delay_ptr[0] &= 0xF8;
1814             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1815             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1816             s->vbv_delay_ptr[2] &= 0x07;
1817             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1818             avctx->vbv_delay     = vbv_delay * 300;
1819         }
1820         s->total_bits     += s->frame_bits;
1821         avctx->frame_bits  = s->frame_bits;
1822
1823         pkt->pts = s->current_picture.f->pts;
1824         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1825             if (!s->current_picture.f->coded_picture_number)
1826                 pkt->dts = pkt->pts - s->dts_delta;
1827             else
1828                 pkt->dts = s->reordered_pts;
1829             s->reordered_pts = pkt->pts;
1830         } else
1831             pkt->dts = pkt->pts;
1832         if (s->current_picture.f->key_frame)
1833             pkt->flags |= AV_PKT_FLAG_KEY;
1834         if (s->mb_info)
1835             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1836     } else {
1837         s->frame_bits = 0;
1838     }
1839
1840     /* release non-reference frames */
1841     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1842         if (!s->picture[i].reference)
1843             ff_mpeg_unref_picture(s, &s->picture[i]);
1844     }
1845
1846     av_assert1((s->frame_bits & 7) == 0);
1847
1848     pkt->size = s->frame_bits / 8;
1849     *got_packet = !!pkt->size;
1850     return 0;
1851 }
1852
1853 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1854                                                 int n, int threshold)
1855 {
1856     static const char tab[64] = {
1857         3, 2, 2, 1, 1, 1, 1, 1,
1858         1, 1, 1, 1, 1, 1, 1, 1,
1859         1, 1, 1, 1, 1, 1, 1, 1,
1860         0, 0, 0, 0, 0, 0, 0, 0,
1861         0, 0, 0, 0, 0, 0, 0, 0,
1862         0, 0, 0, 0, 0, 0, 0, 0,
1863         0, 0, 0, 0, 0, 0, 0, 0,
1864         0, 0, 0, 0, 0, 0, 0, 0
1865     };
1866     int score = 0;
1867     int run = 0;
1868     int i;
1869     int16_t *block = s->block[n];
1870     const int last_index = s->block_last_index[n];
1871     int skip_dc;
1872
1873     if (threshold < 0) {
1874         skip_dc = 0;
1875         threshold = -threshold;
1876     } else
1877         skip_dc = 1;
1878
1879     /* Are all we could set to zero already zero? */
1880     if (last_index <= skip_dc - 1)
1881         return;
1882
1883     for (i = 0; i <= last_index; i++) {
1884         const int j = s->intra_scantable.permutated[i];
1885         const int level = FFABS(block[j]);
1886         if (level == 1) {
1887             if (skip_dc && i == 0)
1888                 continue;
1889             score += tab[run];
1890             run = 0;
1891         } else if (level > 1) {
1892             return;
1893         } else {
1894             run++;
1895         }
1896     }
1897     if (score >= threshold)
1898         return;
1899     for (i = skip_dc; i <= last_index; i++) {
1900         const int j = s->intra_scantable.permutated[i];
1901         block[j] = 0;
1902     }
1903     if (block[0])
1904         s->block_last_index[n] = 0;
1905     else
1906         s->block_last_index[n] = -1;
1907 }
1908
1909 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1910                                int last_index)
1911 {
1912     int i;
1913     const int maxlevel = s->max_qcoeff;
1914     const int minlevel = s->min_qcoeff;
1915     int overflow = 0;
1916
1917     if (s->mb_intra) {
1918         i = 1; // skip clipping of intra dc
1919     } else
1920         i = 0;
1921
1922     for (; i <= last_index; i++) {
1923         const int j = s->intra_scantable.permutated[i];
1924         int level = block[j];
1925
1926         if (level > maxlevel) {
1927             level = maxlevel;
1928             overflow++;
1929         } else if (level < minlevel) {
1930             level = minlevel;
1931             overflow++;
1932         }
1933
1934         block[j] = level;
1935     }
1936
1937     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1938         av_log(s->avctx, AV_LOG_INFO,
1939                "warning, clipping %d dct coefficients to %d..%d\n",
1940                overflow, minlevel, maxlevel);
1941 }
1942
1943 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1944 {
1945     int x, y;
1946     // FIXME optimize
1947     for (y = 0; y < 8; y++) {
1948         for (x = 0; x < 8; x++) {
1949             int x2, y2;
1950             int sum = 0;
1951             int sqr = 0;
1952             int count = 0;
1953
1954             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1955                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1956                     int v = ptr[x2 + y2 * stride];
1957                     sum += v;
1958                     sqr += v * v;
1959                     count++;
1960                 }
1961             }
1962             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1963         }
1964     }
1965 }
1966
1967 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1968                                                 int motion_x, int motion_y,
1969                                                 int mb_block_height,
1970                                                 int mb_block_width,
1971                                                 int mb_block_count)
1972 {
1973     int16_t weight[12][64];
1974     int16_t orig[12][64];
1975     const int mb_x = s->mb_x;
1976     const int mb_y = s->mb_y;
1977     int i;
1978     int skip_dct[12];
1979     int dct_offset = s->linesize * 8; // default for progressive frames
1980     int uv_dct_offset = s->uvlinesize * 8;
1981     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1982     ptrdiff_t wrap_y, wrap_c;
1983
1984     for (i = 0; i < mb_block_count; i++)
1985         skip_dct[i] = s->skipdct;
1986
1987     if (s->adaptive_quant) {
1988         const int last_qp = s->qscale;
1989         const int mb_xy = mb_x + mb_y * s->mb_stride;
1990
1991         s->lambda = s->lambda_table[mb_xy];
1992         update_qscale(s);
1993
1994         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1995             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1996             s->dquant = s->qscale - last_qp;
1997
1998             if (s->out_format == FMT_H263) {
1999                 s->dquant = av_clip(s->dquant, -2, 2);
2000
2001                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2002                     if (!s->mb_intra) {
2003                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2004                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2005                                 s->dquant = 0;
2006                         }
2007                         if (s->mv_type == MV_TYPE_8X8)
2008                             s->dquant = 0;
2009                     }
2010                 }
2011             }
2012         }
2013         ff_set_qscale(s, last_qp + s->dquant);
2014     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2015         ff_set_qscale(s, s->qscale + s->dquant);
2016
2017     wrap_y = s->linesize;
2018     wrap_c = s->uvlinesize;
2019     ptr_y  = s->new_picture.f->data[0] +
2020              (mb_y * 16 * wrap_y)              + mb_x * 16;
2021     ptr_cb = s->new_picture.f->data[1] +
2022              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2023     ptr_cr = s->new_picture.f->data[2] +
2024              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2025
2026     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2027         uint8_t *ebuf = s->edge_emu_buffer + 32;
2028         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2029         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2030         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2031                                  wrap_y, wrap_y,
2032                                  16, 16, mb_x * 16, mb_y * 16,
2033                                  s->width, s->height);
2034         ptr_y = ebuf;
2035         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
2036                                  wrap_c, wrap_c,
2037                                  mb_block_width, mb_block_height,
2038                                  mb_x * mb_block_width, mb_y * mb_block_height,
2039                                  cw, ch);
2040         ptr_cb = ebuf + 18 * wrap_y;
2041         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 16, ptr_cr,
2042                                  wrap_c, wrap_c,
2043                                  mb_block_width, mb_block_height,
2044                                  mb_x * mb_block_width, mb_y * mb_block_height,
2045                                  cw, ch);
2046         ptr_cr = ebuf + 18 * wrap_y + 16;
2047     }
2048
2049     if (s->mb_intra) {
2050         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2051             int progressive_score, interlaced_score;
2052
2053             s->interlaced_dct = 0;
2054             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
2055                                                     NULL, wrap_y, 8) +
2056                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2057                                                     NULL, wrap_y, 8) - 400;
2058
2059             if (progressive_score > 0) {
2060                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
2061                                                        NULL, wrap_y * 2, 8) +
2062                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
2063                                                        NULL, wrap_y * 2, 8);
2064                 if (progressive_score > interlaced_score) {
2065                     s->interlaced_dct = 1;
2066
2067                     dct_offset = wrap_y;
2068                     uv_dct_offset = wrap_c;
2069                     wrap_y <<= 1;
2070                     if (s->chroma_format == CHROMA_422 ||
2071                         s->chroma_format == CHROMA_444)
2072                         wrap_c <<= 1;
2073                 }
2074             }
2075         }
2076
2077         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
2078         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
2079         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
2080         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
2081
2082         if (s->flags & CODEC_FLAG_GRAY) {
2083             skip_dct[4] = 1;
2084             skip_dct[5] = 1;
2085         } else {
2086             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2087             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2088             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2089                 s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2090                 s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2091             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2092                 s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c);
2093                 s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c);
2094                 s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c);
2095                 s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c);
2096                 s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2097                 s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2098             }
2099         }
2100     } else {
2101         op_pixels_func (*op_pix)[4];
2102         qpel_mc_func (*op_qpix)[16];
2103         uint8_t *dest_y, *dest_cb, *dest_cr;
2104
2105         dest_y  = s->dest[0];
2106         dest_cb = s->dest[1];
2107         dest_cr = s->dest[2];
2108
2109         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2110             op_pix  = s->hdsp.put_pixels_tab;
2111             op_qpix = s->qdsp.put_qpel_pixels_tab;
2112         } else {
2113             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2114             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2115         }
2116
2117         if (s->mv_dir & MV_DIR_FORWARD) {
2118             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
2119                           s->last_picture.f->data,
2120                           op_pix, op_qpix);
2121             op_pix  = s->hdsp.avg_pixels_tab;
2122             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2123         }
2124         if (s->mv_dir & MV_DIR_BACKWARD) {
2125             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
2126                           s->next_picture.f->data,
2127                           op_pix, op_qpix);
2128         }
2129
2130         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2131             int progressive_score, interlaced_score;
2132
2133             s->interlaced_dct = 0;
2134             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
2135                                                     ptr_y,              wrap_y,
2136                                                     8) +
2137                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
2138                                                     ptr_y + wrap_y * 8, wrap_y,
2139                                                     8) - 400;
2140
2141             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2142                 progressive_score -= 400;
2143
2144             if (progressive_score > 0) {
2145                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
2146                                                        ptr_y,
2147                                                        wrap_y * 2, 8) +
2148                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
2149                                                        ptr_y + wrap_y,
2150                                                        wrap_y * 2, 8);
2151
2152                 if (progressive_score > interlaced_score) {
2153                     s->interlaced_dct = 1;
2154
2155                     dct_offset = wrap_y;
2156                     uv_dct_offset = wrap_c;
2157                     wrap_y <<= 1;
2158                     if (s->chroma_format == CHROMA_422)
2159                         wrap_c <<= 1;
2160                 }
2161             }
2162         }
2163
2164         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2165         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2166         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2167                            dest_y + dct_offset, wrap_y);
2168         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2169                            dest_y + dct_offset + 8, wrap_y);
2170
2171         if (s->flags & CODEC_FLAG_GRAY) {
2172             skip_dct[4] = 1;
2173             skip_dct[5] = 1;
2174         } else {
2175             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2176             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2177             if (!s->chroma_y_shift) { /* 422 */
2178                 s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2179                                    dest_cb + uv_dct_offset, wrap_c);
2180                 s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2181                                    dest_cr + uv_dct_offset, wrap_c);
2182             }
2183         }
2184         /* pre quantization */
2185         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2186                 2 * s->qscale * s->qscale) {
2187             // FIXME optimize
2188             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2189                               wrap_y, 8) < 20 * s->qscale)
2190                 skip_dct[0] = 1;
2191             if (s->dsp.sad[1](NULL, ptr_y + 8,
2192                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2193                 skip_dct[1] = 1;
2194             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2195                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2196                 skip_dct[2] = 1;
2197             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2198                               dest_y + dct_offset + 8,
2199                               wrap_y, 8) < 20 * s->qscale)
2200                 skip_dct[3] = 1;
2201             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2202                               wrap_c, 8) < 20 * s->qscale)
2203                 skip_dct[4] = 1;
2204             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2205                               wrap_c, 8) < 20 * s->qscale)
2206                 skip_dct[5] = 1;
2207             if (!s->chroma_y_shift) { /* 422 */
2208                 if (s->dsp.sad[1](NULL, ptr_cb + uv_dct_offset,
2209                                   dest_cb + uv_dct_offset,
2210                                   wrap_c, 8) < 20 * s->qscale)
2211                     skip_dct[6] = 1;
2212                 if (s->dsp.sad[1](NULL, ptr_cr + uv_dct_offset,
2213                                   dest_cr + uv_dct_offset,
2214                                   wrap_c, 8) < 20 * s->qscale)
2215                     skip_dct[7] = 1;
2216             }
2217         }
2218     }
2219
2220     if (s->quantizer_noise_shaping) {
2221         if (!skip_dct[0])
2222             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2223         if (!skip_dct[1])
2224             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2225         if (!skip_dct[2])
2226             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2227         if (!skip_dct[3])
2228             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2229         if (!skip_dct[4])
2230             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2231         if (!skip_dct[5])
2232             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2233         if (!s->chroma_y_shift) { /* 422 */
2234             if (!skip_dct[6])
2235                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2236                                   wrap_c);
2237             if (!skip_dct[7])
2238                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2239                                   wrap_c);
2240         }
2241         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2242     }
2243
2244     /* DCT & quantize */
2245     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2246     {
2247         for (i = 0; i < mb_block_count; i++) {
2248             if (!skip_dct[i]) {
2249                 int overflow;
2250                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2251                 // FIXME we could decide to change to quantizer instead of
2252                 // clipping
2253                 // JS: I don't think that would be a good idea it could lower
2254                 //     quality instead of improve it. Just INTRADC clipping
2255                 //     deserves changes in quantizer
2256                 if (overflow)
2257                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2258             } else
2259                 s->block_last_index[i] = -1;
2260         }
2261         if (s->quantizer_noise_shaping) {
2262             for (i = 0; i < mb_block_count; i++) {
2263                 if (!skip_dct[i]) {
2264                     s->block_last_index[i] =
2265                         dct_quantize_refine(s, s->block[i], weight[i],
2266                                             orig[i], i, s->qscale);
2267                 }
2268             }
2269         }
2270
2271         if (s->luma_elim_threshold && !s->mb_intra)
2272             for (i = 0; i < 4; i++)
2273                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2274         if (s->chroma_elim_threshold && !s->mb_intra)
2275             for (i = 4; i < mb_block_count; i++)
2276                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2277
2278         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2279             for (i = 0; i < mb_block_count; i++) {
2280                 if (s->block_last_index[i] == -1)
2281                     s->coded_score[i] = INT_MAX / 256;
2282             }
2283         }
2284     }
2285
2286     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2287         s->block_last_index[4] =
2288         s->block_last_index[5] = 0;
2289         s->block[4][0] =
2290         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2291         if (!s->chroma_y_shift) { /* 422 / 444 */
2292             for (i=6; i<12; i++) {
2293                 s->block_last_index[i] = 0;
2294                 s->block[i][0] = s->block[4][0];
2295             }
2296         }
2297     }
2298
2299     // non c quantize code returns incorrect block_last_index FIXME
2300     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2301         for (i = 0; i < mb_block_count; i++) {
2302             int j;
2303             if (s->block_last_index[i] > 0) {
2304                 for (j = 63; j > 0; j--) {
2305                     if (s->block[i][s->intra_scantable.permutated[j]])
2306                         break;
2307                 }
2308                 s->block_last_index[i] = j;
2309             }
2310         }
2311     }
2312
2313     /* huffman encode */
2314     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2315     case AV_CODEC_ID_MPEG1VIDEO:
2316     case AV_CODEC_ID_MPEG2VIDEO:
2317         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2318             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2319         break;
2320     case AV_CODEC_ID_MPEG4:
2321         if (CONFIG_MPEG4_ENCODER)
2322             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2323         break;
2324     case AV_CODEC_ID_MSMPEG4V2:
2325     case AV_CODEC_ID_MSMPEG4V3:
2326     case AV_CODEC_ID_WMV1:
2327         if (CONFIG_MSMPEG4_ENCODER)
2328             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2329         break;
2330     case AV_CODEC_ID_WMV2:
2331         if (CONFIG_WMV2_ENCODER)
2332             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2333         break;
2334     case AV_CODEC_ID_H261:
2335         if (CONFIG_H261_ENCODER)
2336             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2337         break;
2338     case AV_CODEC_ID_H263:
2339     case AV_CODEC_ID_H263P:
2340     case AV_CODEC_ID_FLV1:
2341     case AV_CODEC_ID_RV10:
2342     case AV_CODEC_ID_RV20:
2343         if (CONFIG_H263_ENCODER)
2344             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2345         break;
2346     case AV_CODEC_ID_MJPEG:
2347     case AV_CODEC_ID_AMV:
2348         if (CONFIG_MJPEG_ENCODER)
2349             ff_mjpeg_encode_mb(s, s->block);
2350         break;
2351     default:
2352         av_assert1(0);
2353     }
2354 }
2355
2356 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2357 {
2358     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2359     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2360     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2361 }
2362
2363 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2364     int i;
2365
2366     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2367
2368     /* mpeg1 */
2369     d->mb_skip_run= s->mb_skip_run;
2370     for(i=0; i<3; i++)
2371         d->last_dc[i] = s->last_dc[i];
2372
2373     /* statistics */
2374     d->mv_bits= s->mv_bits;
2375     d->i_tex_bits= s->i_tex_bits;
2376     d->p_tex_bits= s->p_tex_bits;
2377     d->i_count= s->i_count;
2378     d->f_count= s->f_count;
2379     d->b_count= s->b_count;
2380     d->skip_count= s->skip_count;
2381     d->misc_bits= s->misc_bits;
2382     d->last_bits= 0;
2383
2384     d->mb_skipped= 0;
2385     d->qscale= s->qscale;
2386     d->dquant= s->dquant;
2387
2388     d->esc3_level_length= s->esc3_level_length;
2389 }
2390
2391 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2392     int i;
2393
2394     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2395     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2396
2397     /* mpeg1 */
2398     d->mb_skip_run= s->mb_skip_run;
2399     for(i=0; i<3; i++)
2400         d->last_dc[i] = s->last_dc[i];
2401
2402     /* statistics */
2403     d->mv_bits= s->mv_bits;
2404     d->i_tex_bits= s->i_tex_bits;
2405     d->p_tex_bits= s->p_tex_bits;
2406     d->i_count= s->i_count;
2407     d->f_count= s->f_count;
2408     d->b_count= s->b_count;
2409     d->skip_count= s->skip_count;
2410     d->misc_bits= s->misc_bits;
2411
2412     d->mb_intra= s->mb_intra;
2413     d->mb_skipped= s->mb_skipped;
2414     d->mv_type= s->mv_type;
2415     d->mv_dir= s->mv_dir;
2416     d->pb= s->pb;
2417     if(s->data_partitioning){
2418         d->pb2= s->pb2;
2419         d->tex_pb= s->tex_pb;
2420     }
2421     d->block= s->block;
2422     for(i=0; i<8; i++)
2423         d->block_last_index[i]= s->block_last_index[i];
2424     d->interlaced_dct= s->interlaced_dct;
2425     d->qscale= s->qscale;
2426
2427     d->esc3_level_length= s->esc3_level_length;
2428 }
2429
2430 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2431                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2432                            int *dmin, int *next_block, int motion_x, int motion_y)
2433 {
2434     int score;
2435     uint8_t *dest_backup[3];
2436
2437     copy_context_before_encode(s, backup, type);
2438
2439     s->block= s->blocks[*next_block];
2440     s->pb= pb[*next_block];
2441     if(s->data_partitioning){
2442         s->pb2   = pb2   [*next_block];
2443         s->tex_pb= tex_pb[*next_block];
2444     }
2445
2446     if(*next_block){
2447         memcpy(dest_backup, s->dest, sizeof(s->dest));
2448         s->dest[0] = s->rd_scratchpad;
2449         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2450         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2451         av_assert0(s->linesize >= 32); //FIXME
2452     }
2453
2454     encode_mb(s, motion_x, motion_y);
2455
2456     score= put_bits_count(&s->pb);
2457     if(s->data_partitioning){
2458         score+= put_bits_count(&s->pb2);
2459         score+= put_bits_count(&s->tex_pb);
2460     }
2461
2462     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2463         ff_MPV_decode_mb(s, s->block);
2464
2465         score *= s->lambda2;
2466         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2467     }
2468
2469     if(*next_block){
2470         memcpy(s->dest, dest_backup, sizeof(s->dest));
2471     }
2472
2473     if(score<*dmin){
2474         *dmin= score;
2475         *next_block^=1;
2476
2477         copy_context_after_encode(best, s, type);
2478     }
2479 }
2480
2481 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2482     uint32_t *sq = ff_square_tab + 256;
2483     int acc=0;
2484     int x,y;
2485
2486     if(w==16 && h==16)
2487         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2488     else if(w==8 && h==8)
2489         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2490
2491     for(y=0; y<h; y++){
2492         for(x=0; x<w; x++){
2493             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2494         }
2495     }
2496
2497     av_assert2(acc>=0);
2498
2499     return acc;
2500 }
2501
2502 static int sse_mb(MpegEncContext *s){
2503     int w= 16;
2504     int h= 16;
2505
2506     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2507     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2508
2509     if(w==16 && h==16)
2510       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2511         return  s->dsp.nsse[0](s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2512                +s->dsp.nsse[1](s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2513                +s->dsp.nsse[1](s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2514       }else{
2515         return  s->dsp.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2516                +s->dsp.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2517                +s->dsp.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2518       }
2519     else
2520         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2521                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2522                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2523 }
2524
2525 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2526     MpegEncContext *s= *(void**)arg;
2527
2528
2529     s->me.pre_pass=1;
2530     s->me.dia_size= s->avctx->pre_dia_size;
2531     s->first_slice_line=1;
2532     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2533         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2534             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2535         }
2536         s->first_slice_line=0;
2537     }
2538
2539     s->me.pre_pass=0;
2540
2541     return 0;
2542 }
2543
2544 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2545     MpegEncContext *s= *(void**)arg;
2546
2547     ff_check_alignment();
2548
2549     s->me.dia_size= s->avctx->dia_size;
2550     s->first_slice_line=1;
2551     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2552         s->mb_x=0; //for block init below
2553         ff_init_block_index(s);
2554         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2555             s->block_index[0]+=2;
2556             s->block_index[1]+=2;
2557             s->block_index[2]+=2;
2558             s->block_index[3]+=2;
2559
2560             /* compute motion vector & mb_type and store in context */
2561             if(s->pict_type==AV_PICTURE_TYPE_B)
2562                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2563             else
2564                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2565         }
2566         s->first_slice_line=0;
2567     }
2568     return 0;
2569 }
2570
2571 static int mb_var_thread(AVCodecContext *c, void *arg){
2572     MpegEncContext *s= *(void**)arg;
2573     int mb_x, mb_y;
2574
2575     ff_check_alignment();
2576
2577     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2578         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2579             int xx = mb_x * 16;
2580             int yy = mb_y * 16;
2581             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2582             int varc;
2583             int sum = s->dsp.pix_sum(pix, s->linesize);
2584
2585             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2586
2587             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2588             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2589             s->me.mb_var_sum_temp    += varc;
2590         }
2591     }
2592     return 0;
2593 }
2594
2595 static void write_slice_end(MpegEncContext *s){
2596     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2597         if(s->partitioned_frame){
2598             ff_mpeg4_merge_partitions(s);
2599         }
2600
2601         ff_mpeg4_stuffing(&s->pb);
2602     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2603         ff_mjpeg_encode_stuffing(s);
2604     }
2605
2606     avpriv_align_put_bits(&s->pb);
2607     flush_put_bits(&s->pb);
2608
2609     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2610         s->misc_bits+= get_bits_diff(s);
2611 }
2612
2613 static void write_mb_info(MpegEncContext *s)
2614 {
2615     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2616     int offset = put_bits_count(&s->pb);
2617     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2618     int gobn = s->mb_y / s->gob_index;
2619     int pred_x, pred_y;
2620     if (CONFIG_H263_ENCODER)
2621         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2622     bytestream_put_le32(&ptr, offset);
2623     bytestream_put_byte(&ptr, s->qscale);
2624     bytestream_put_byte(&ptr, gobn);
2625     bytestream_put_le16(&ptr, mba);
2626     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2627     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2628     /* 4MV not implemented */
2629     bytestream_put_byte(&ptr, 0); /* hmv2 */
2630     bytestream_put_byte(&ptr, 0); /* vmv2 */
2631 }
2632
2633 static void update_mb_info(MpegEncContext *s, int startcode)
2634 {
2635     if (!s->mb_info)
2636         return;
2637     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2638         s->mb_info_size += 12;
2639         s->prev_mb_info = s->last_mb_info;
2640     }
2641     if (startcode) {
2642         s->prev_mb_info = put_bits_count(&s->pb)/8;
2643         /* This might have incremented mb_info_size above, and we return without
2644          * actually writing any info into that slot yet. But in that case,
2645          * this will be called again at the start of the after writing the
2646          * start code, actually writing the mb info. */
2647         return;
2648     }
2649
2650     s->last_mb_info = put_bits_count(&s->pb)/8;
2651     if (!s->mb_info_size)
2652         s->mb_info_size += 12;
2653     write_mb_info(s);
2654 }
2655
2656 static int encode_thread(AVCodecContext *c, void *arg){
2657     MpegEncContext *s= *(void**)arg;
2658     int mb_x, mb_y, pdif = 0;
2659     int chr_h= 16>>s->chroma_y_shift;
2660     int i, j;
2661     MpegEncContext best_s, backup_s;
2662     uint8_t bit_buf[2][MAX_MB_BYTES];
2663     uint8_t bit_buf2[2][MAX_MB_BYTES];
2664     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2665     PutBitContext pb[2], pb2[2], tex_pb[2];
2666
2667     ff_check_alignment();
2668
2669     for(i=0; i<2; i++){
2670         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2671         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2672         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2673     }
2674
2675     s->last_bits= put_bits_count(&s->pb);
2676     s->mv_bits=0;
2677     s->misc_bits=0;
2678     s->i_tex_bits=0;
2679     s->p_tex_bits=0;
2680     s->i_count=0;
2681     s->f_count=0;
2682     s->b_count=0;
2683     s->skip_count=0;
2684
2685     for(i=0; i<3; i++){
2686         /* init last dc values */
2687         /* note: quant matrix value (8) is implied here */
2688         s->last_dc[i] = 128 << s->intra_dc_precision;
2689
2690         s->current_picture.error[i] = 0;
2691     }
2692     if(s->codec_id==AV_CODEC_ID_AMV){
2693         s->last_dc[0] = 128*8/13;
2694         s->last_dc[1] = 128*8/14;
2695         s->last_dc[2] = 128*8/14;
2696     }
2697     s->mb_skip_run = 0;
2698     memset(s->last_mv, 0, sizeof(s->last_mv));
2699
2700     s->last_mv_dir = 0;
2701
2702     switch(s->codec_id){
2703     case AV_CODEC_ID_H263:
2704     case AV_CODEC_ID_H263P:
2705     case AV_CODEC_ID_FLV1:
2706         if (CONFIG_H263_ENCODER)
2707             s->gob_index = ff_h263_get_gob_height(s);
2708         break;
2709     case AV_CODEC_ID_MPEG4:
2710         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2711             ff_mpeg4_init_partitions(s);
2712         break;
2713     }
2714
2715     s->resync_mb_x=0;
2716     s->resync_mb_y=0;
2717     s->first_slice_line = 1;
2718     s->ptr_lastgob = s->pb.buf;
2719     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2720         s->mb_x=0;
2721         s->mb_y= mb_y;
2722
2723         ff_set_qscale(s, s->qscale);
2724         ff_init_block_index(s);
2725
2726         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2727             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2728             int mb_type= s->mb_type[xy];
2729 //            int d;
2730             int dmin= INT_MAX;
2731             int dir;
2732
2733             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2734                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2735                 return -1;
2736             }
2737             if(s->data_partitioning){
2738                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2739                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2740                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2741                     return -1;
2742                 }
2743             }
2744
2745             s->mb_x = mb_x;
2746             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2747             ff_update_block_index(s);
2748
2749             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2750                 ff_h261_reorder_mb_index(s);
2751                 xy= s->mb_y*s->mb_stride + s->mb_x;
2752                 mb_type= s->mb_type[xy];
2753             }
2754
2755             /* write gob / video packet header  */
2756             if(s->rtp_mode){
2757                 int current_packet_size, is_gob_start;
2758
2759                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2760
2761                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2762
2763                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2764
2765                 switch(s->codec_id){
2766                 case AV_CODEC_ID_H263:
2767                 case AV_CODEC_ID_H263P:
2768                     if(!s->h263_slice_structured)
2769                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2770                     break;
2771                 case AV_CODEC_ID_MPEG2VIDEO:
2772                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2773                 case AV_CODEC_ID_MPEG1VIDEO:
2774                     if(s->mb_skip_run) is_gob_start=0;
2775                     break;
2776                 case AV_CODEC_ID_MJPEG:
2777                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2778                     break;
2779                 }
2780
2781                 if(is_gob_start){
2782                     if(s->start_mb_y != mb_y || mb_x!=0){
2783                         write_slice_end(s);
2784
2785                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2786                             ff_mpeg4_init_partitions(s);
2787                         }
2788                     }
2789
2790                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2791                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2792
2793                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2794                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2795                         int d = 100 / s->error_rate;
2796                         if(r % d == 0){
2797                             current_packet_size=0;
2798                             s->pb.buf_ptr= s->ptr_lastgob;
2799                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2800                         }
2801                     }
2802
2803                     if (s->avctx->rtp_callback){
2804                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2805                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2806                     }
2807                     update_mb_info(s, 1);
2808
2809                     switch(s->codec_id){
2810                     case AV_CODEC_ID_MPEG4:
2811                         if (CONFIG_MPEG4_ENCODER) {
2812                             ff_mpeg4_encode_video_packet_header(s);
2813                             ff_mpeg4_clean_buffers(s);
2814                         }
2815                     break;
2816                     case AV_CODEC_ID_MPEG1VIDEO:
2817                     case AV_CODEC_ID_MPEG2VIDEO:
2818                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2819                             ff_mpeg1_encode_slice_header(s);
2820                             ff_mpeg1_clean_buffers(s);
2821                         }
2822                     break;
2823                     case AV_CODEC_ID_H263:
2824                     case AV_CODEC_ID_H263P:
2825                         if (CONFIG_H263_ENCODER)
2826                             ff_h263_encode_gob_header(s, mb_y);
2827                     break;
2828                     }
2829
2830                     if(s->flags&CODEC_FLAG_PASS1){
2831                         int bits= put_bits_count(&s->pb);
2832                         s->misc_bits+= bits - s->last_bits;
2833                         s->last_bits= bits;
2834                     }
2835
2836                     s->ptr_lastgob += current_packet_size;
2837                     s->first_slice_line=1;
2838                     s->resync_mb_x=mb_x;
2839                     s->resync_mb_y=mb_y;
2840                 }
2841             }
2842
2843             if(  (s->resync_mb_x   == s->mb_x)
2844                && s->resync_mb_y+1 == s->mb_y){
2845                 s->first_slice_line=0;
2846             }
2847
2848             s->mb_skipped=0;
2849             s->dquant=0; //only for QP_RD
2850
2851             update_mb_info(s, 0);
2852
2853             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2854                 int next_block=0;
2855                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2856
2857                 copy_context_before_encode(&backup_s, s, -1);
2858                 backup_s.pb= s->pb;
2859                 best_s.data_partitioning= s->data_partitioning;
2860                 best_s.partitioned_frame= s->partitioned_frame;
2861                 if(s->data_partitioning){
2862                     backup_s.pb2= s->pb2;
2863                     backup_s.tex_pb= s->tex_pb;
2864                 }
2865
2866                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2867                     s->mv_dir = MV_DIR_FORWARD;
2868                     s->mv_type = MV_TYPE_16X16;
2869                     s->mb_intra= 0;
2870                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2871                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2872                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2873                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2874                 }
2875                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2876                     s->mv_dir = MV_DIR_FORWARD;
2877                     s->mv_type = MV_TYPE_FIELD;
2878                     s->mb_intra= 0;
2879                     for(i=0; i<2; i++){
2880                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2881                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2882                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2883                     }
2884                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2885                                  &dmin, &next_block, 0, 0);
2886                 }
2887                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2888                     s->mv_dir = MV_DIR_FORWARD;
2889                     s->mv_type = MV_TYPE_16X16;
2890                     s->mb_intra= 0;
2891                     s->mv[0][0][0] = 0;
2892                     s->mv[0][0][1] = 0;
2893                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2894                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2895                 }
2896                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2897                     s->mv_dir = MV_DIR_FORWARD;
2898                     s->mv_type = MV_TYPE_8X8;
2899                     s->mb_intra= 0;
2900                     for(i=0; i<4; i++){
2901                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2902                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2903                     }
2904                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2905                                  &dmin, &next_block, 0, 0);
2906                 }
2907                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2908                     s->mv_dir = MV_DIR_FORWARD;
2909                     s->mv_type = MV_TYPE_16X16;
2910                     s->mb_intra= 0;
2911                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2912                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2913                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2914                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2915                 }
2916                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2917                     s->mv_dir = MV_DIR_BACKWARD;
2918                     s->mv_type = MV_TYPE_16X16;
2919                     s->mb_intra= 0;
2920                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2921                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2922                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2923                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2924                 }
2925                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2926                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2927                     s->mv_type = MV_TYPE_16X16;
2928                     s->mb_intra= 0;
2929                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2930                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2931                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2932                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2933                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2934                                  &dmin, &next_block, 0, 0);
2935                 }
2936                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2937                     s->mv_dir = MV_DIR_FORWARD;
2938                     s->mv_type = MV_TYPE_FIELD;
2939                     s->mb_intra= 0;
2940                     for(i=0; i<2; i++){
2941                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2942                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2943                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2944                     }
2945                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2946                                  &dmin, &next_block, 0, 0);
2947                 }
2948                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2949                     s->mv_dir = MV_DIR_BACKWARD;
2950                     s->mv_type = MV_TYPE_FIELD;
2951                     s->mb_intra= 0;
2952                     for(i=0; i<2; i++){
2953                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2954                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2955                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2956                     }
2957                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2958                                  &dmin, &next_block, 0, 0);
2959                 }
2960                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2961                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2962                     s->mv_type = MV_TYPE_FIELD;
2963                     s->mb_intra= 0;
2964                     for(dir=0; dir<2; dir++){
2965                         for(i=0; i<2; i++){
2966                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2967                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2968                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2969                         }
2970                     }
2971                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2972                                  &dmin, &next_block, 0, 0);
2973                 }
2974                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2975                     s->mv_dir = 0;
2976                     s->mv_type = MV_TYPE_16X16;
2977                     s->mb_intra= 1;
2978                     s->mv[0][0][0] = 0;
2979                     s->mv[0][0][1] = 0;
2980                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2981                                  &dmin, &next_block, 0, 0);
2982                     if(s->h263_pred || s->h263_aic){
2983                         if(best_s.mb_intra)
2984                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2985                         else
2986                             ff_clean_intra_table_entries(s); //old mode?
2987                     }
2988                 }
2989
2990                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2991                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2992                         const int last_qp= backup_s.qscale;
2993                         int qpi, qp, dc[6];
2994                         int16_t ac[6][16];
2995                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2996                         static const int dquant_tab[4]={-1,1,-2,2};
2997                         int storecoefs = s->mb_intra && s->dc_val[0];
2998
2999                         av_assert2(backup_s.dquant == 0);
3000
3001                         //FIXME intra
3002                         s->mv_dir= best_s.mv_dir;
3003                         s->mv_type = MV_TYPE_16X16;
3004                         s->mb_intra= best_s.mb_intra;
3005                         s->mv[0][0][0] = best_s.mv[0][0][0];
3006                         s->mv[0][0][1] = best_s.mv[0][0][1];
3007                         s->mv[1][0][0] = best_s.mv[1][0][0];
3008                         s->mv[1][0][1] = best_s.mv[1][0][1];
3009
3010                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3011                         for(; qpi<4; qpi++){
3012                             int dquant= dquant_tab[qpi];
3013                             qp= last_qp + dquant;
3014                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3015                                 continue;
3016                             backup_s.dquant= dquant;
3017                             if(storecoefs){
3018                                 for(i=0; i<6; i++){
3019                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3020                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3021                                 }
3022                             }
3023
3024                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3025                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3026                             if(best_s.qscale != qp){
3027                                 if(storecoefs){
3028                                     for(i=0; i<6; i++){
3029                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3030                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3031                                     }
3032                                 }
3033                             }
3034                         }
3035                     }
3036                 }
3037                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3038                     int mx= s->b_direct_mv_table[xy][0];
3039                     int my= s->b_direct_mv_table[xy][1];
3040
3041                     backup_s.dquant = 0;
3042                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3043                     s->mb_intra= 0;
3044                     ff_mpeg4_set_direct_mv(s, mx, my);
3045                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3046                                  &dmin, &next_block, mx, my);
3047                 }
3048                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3049                     backup_s.dquant = 0;
3050                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3051                     s->mb_intra= 0;
3052                     ff_mpeg4_set_direct_mv(s, 0, 0);
3053                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3054                                  &dmin, &next_block, 0, 0);
3055                 }
3056                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3057                     int coded=0;
3058                     for(i=0; i<6; i++)
3059                         coded |= s->block_last_index[i];
3060                     if(coded){
3061                         int mx,my;
3062                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3063                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3064                             mx=my=0; //FIXME find the one we actually used
3065                             ff_mpeg4_set_direct_mv(s, mx, my);
3066                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3067                             mx= s->mv[1][0][0];
3068                             my= s->mv[1][0][1];
3069                         }else{
3070                             mx= s->mv[0][0][0];
3071                             my= s->mv[0][0][1];
3072                         }
3073
3074                         s->mv_dir= best_s.mv_dir;
3075                         s->mv_type = best_s.mv_type;
3076                         s->mb_intra= 0;
3077 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3078                         s->mv[0][0][1] = best_s.mv[0][0][1];
3079                         s->mv[1][0][0] = best_s.mv[1][0][0];
3080                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3081                         backup_s.dquant= 0;
3082                         s->skipdct=1;
3083                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3084                                         &dmin, &next_block, mx, my);
3085                         s->skipdct=0;
3086                     }
3087                 }
3088
3089                 s->current_picture.qscale_table[xy] = best_s.qscale;
3090
3091                 copy_context_after_encode(s, &best_s, -1);
3092
3093                 pb_bits_count= put_bits_count(&s->pb);
3094                 flush_put_bits(&s->pb);
3095                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3096                 s->pb= backup_s.pb;
3097
3098                 if(s->data_partitioning){
3099                     pb2_bits_count= put_bits_count(&s->pb2);
3100                     flush_put_bits(&s->pb2);
3101                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3102                     s->pb2= backup_s.pb2;
3103
3104                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3105                     flush_put_bits(&s->tex_pb);
3106                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3107                     s->tex_pb= backup_s.tex_pb;
3108                 }
3109                 s->last_bits= put_bits_count(&s->pb);
3110
3111                 if (CONFIG_H263_ENCODER &&
3112                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3113                     ff_h263_update_motion_val(s);
3114
3115                 if(next_block==0){ //FIXME 16 vs linesize16
3116                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3117                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3118                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3119                 }
3120
3121                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3122                     ff_MPV_decode_mb(s, s->block);
3123             } else {
3124                 int motion_x = 0, motion_y = 0;
3125                 s->mv_type=MV_TYPE_16X16;
3126                 // only one MB-Type possible
3127
3128                 switch(mb_type){
3129                 case CANDIDATE_MB_TYPE_INTRA:
3130                     s->mv_dir = 0;
3131                     s->mb_intra= 1;
3132                     motion_x= s->mv[0][0][0] = 0;
3133                     motion_y= s->mv[0][0][1] = 0;
3134                     break;
3135                 case CANDIDATE_MB_TYPE_INTER:
3136                     s->mv_dir = MV_DIR_FORWARD;
3137                     s->mb_intra= 0;
3138                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3139                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3140                     break;
3141                 case CANDIDATE_MB_TYPE_INTER_I:
3142                     s->mv_dir = MV_DIR_FORWARD;
3143                     s->mv_type = MV_TYPE_FIELD;
3144                     s->mb_intra= 0;
3145                     for(i=0; i<2; i++){
3146                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3147                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3148                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3149                     }
3150                     break;
3151                 case CANDIDATE_MB_TYPE_INTER4V:
3152                     s->mv_dir = MV_DIR_FORWARD;
3153                     s->mv_type = MV_TYPE_8X8;
3154                     s->mb_intra= 0;
3155                     for(i=0; i<4; i++){
3156                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3157                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3158                     }
3159                     break;
3160                 case CANDIDATE_MB_TYPE_DIRECT:
3161                     if (CONFIG_MPEG4_ENCODER) {
3162                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3163                         s->mb_intra= 0;
3164                         motion_x=s->b_direct_mv_table[xy][0];
3165                         motion_y=s->b_direct_mv_table[xy][1];
3166                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3167                     }
3168                     break;
3169                 case CANDIDATE_MB_TYPE_DIRECT0:
3170                     if (CONFIG_MPEG4_ENCODER) {
3171                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3172                         s->mb_intra= 0;
3173                         ff_mpeg4_set_direct_mv(s, 0, 0);
3174                     }
3175                     break;
3176                 case CANDIDATE_MB_TYPE_BIDIR:
3177                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3178                     s->mb_intra= 0;
3179                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3180                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3181                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3182                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3183                     break;
3184                 case CANDIDATE_MB_TYPE_BACKWARD:
3185                     s->mv_dir = MV_DIR_BACKWARD;
3186                     s->mb_intra= 0;
3187                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3188                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3189                     break;
3190                 case CANDIDATE_MB_TYPE_FORWARD:
3191                     s->mv_dir = MV_DIR_FORWARD;
3192                     s->mb_intra= 0;
3193                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3194                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3195                     break;
3196                 case CANDIDATE_MB_TYPE_FORWARD_I:
3197                     s->mv_dir = MV_DIR_FORWARD;
3198                     s->mv_type = MV_TYPE_FIELD;
3199                     s->mb_intra= 0;
3200                     for(i=0; i<2; i++){
3201                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3202                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3203                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3204                     }
3205                     break;
3206                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3207                     s->mv_dir = MV_DIR_BACKWARD;
3208                     s->mv_type = MV_TYPE_FIELD;
3209                     s->mb_intra= 0;
3210                     for(i=0; i<2; i++){
3211                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3212                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3213                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3214                     }
3215                     break;
3216                 case CANDIDATE_MB_TYPE_BIDIR_I:
3217                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3218                     s->mv_type = MV_TYPE_FIELD;
3219                     s->mb_intra= 0;
3220                     for(dir=0; dir<2; dir++){
3221                         for(i=0; i<2; i++){
3222                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3223                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3224                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3225                         }
3226                     }
3227                     break;
3228                 default:
3229                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3230                 }
3231
3232                 encode_mb(s, motion_x, motion_y);
3233
3234                 // RAL: Update last macroblock type
3235                 s->last_mv_dir = s->mv_dir;
3236
3237                 if (CONFIG_H263_ENCODER &&
3238                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3239                     ff_h263_update_motion_val(s);
3240
3241                 ff_MPV_decode_mb(s, s->block);
3242             }
3243
3244             /* clean the MV table in IPS frames for direct mode in B frames */
3245             if(s->mb_intra /* && I,P,S_TYPE */){
3246                 s->p_mv_table[xy][0]=0;
3247                 s->p_mv_table[xy][1]=0;
3248             }
3249
3250             if(s->flags&CODEC_FLAG_PSNR){
3251                 int w= 16;
3252                 int h= 16;
3253
3254                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3255                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3256
3257                 s->current_picture.error[0] += sse(
3258                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3259                     s->dest[0], w, h, s->linesize);
3260                 s->current_picture.error[1] += sse(
3261                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3262                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3263                 s->current_picture.error[2] += sse(
3264                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3265                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3266             }
3267             if(s->loop_filter){
3268                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3269                     ff_h263_loop_filter(s);
3270             }
3271             av_dlog(s->avctx, "MB %d %d bits\n",
3272                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3273         }
3274     }
3275
3276     //not beautiful here but we must write it before flushing so it has to be here
3277     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3278         ff_msmpeg4_encode_ext_header(s);
3279
3280     write_slice_end(s);
3281
3282     /* Send the last GOB if RTP */
3283     if (s->avctx->rtp_callback) {
3284         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3285         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3286         /* Call the RTP callback to send the last GOB */
3287         emms_c();
3288         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3289     }
3290
3291     return 0;
3292 }
3293
3294 #define MERGE(field) dst->field += src->field; src->field=0
3295 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3296     MERGE(me.scene_change_score);
3297     MERGE(me.mc_mb_var_sum_temp);
3298     MERGE(me.mb_var_sum_temp);
3299 }
3300
3301 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3302     int i;
3303
3304     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3305     MERGE(dct_count[1]);
3306     MERGE(mv_bits);
3307     MERGE(i_tex_bits);
3308     MERGE(p_tex_bits);
3309     MERGE(i_count);
3310     MERGE(f_count);
3311     MERGE(b_count);
3312     MERGE(skip_count);
3313     MERGE(misc_bits);
3314     MERGE(er.error_count);
3315     MERGE(padding_bug_score);
3316     MERGE(current_picture.error[0]);
3317     MERGE(current_picture.error[1]);
3318     MERGE(current_picture.error[2]);
3319
3320     if(dst->avctx->noise_reduction){
3321         for(i=0; i<64; i++){
3322             MERGE(dct_error_sum[0][i]);
3323             MERGE(dct_error_sum[1][i]);
3324         }
3325     }
3326
3327     assert(put_bits_count(&src->pb) % 8 ==0);
3328     assert(put_bits_count(&dst->pb) % 8 ==0);
3329     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3330     flush_put_bits(&dst->pb);
3331 }
3332
3333 static int estimate_qp(MpegEncContext *s, int dry_run){
3334     if (s->next_lambda){
3335         s->current_picture_ptr->f->quality =
3336         s->current_picture.f->quality = s->next_lambda;
3337         if(!dry_run) s->next_lambda= 0;
3338     } else if (!s->fixed_qscale) {
3339         s->current_picture_ptr->f->quality =
3340         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3341         if (s->current_picture.f->quality < 0)
3342             return -1;
3343     }
3344
3345     if(s->adaptive_quant){
3346         switch(s->codec_id){
3347         case AV_CODEC_ID_MPEG4:
3348             if (CONFIG_MPEG4_ENCODER)
3349                 ff_clean_mpeg4_qscales(s);
3350             break;
3351         case AV_CODEC_ID_H263:
3352         case AV_CODEC_ID_H263P:
3353         case AV_CODEC_ID_FLV1:
3354             if (CONFIG_H263_ENCODER)
3355                 ff_clean_h263_qscales(s);
3356             break;
3357         default:
3358             ff_init_qscale_tab(s);
3359         }
3360
3361         s->lambda= s->lambda_table[0];
3362         //FIXME broken
3363     }else
3364         s->lambda = s->current_picture.f->quality;
3365     update_qscale(s);
3366     return 0;
3367 }
3368
3369 /* must be called before writing the header */
3370 static void set_frame_distances(MpegEncContext * s){
3371     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3372     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3373
3374     if(s->pict_type==AV_PICTURE_TYPE_B){
3375         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3376         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3377     }else{
3378         s->pp_time= s->time - s->last_non_b_time;
3379         s->last_non_b_time= s->time;
3380         assert(s->picture_number==0 || s->pp_time > 0);
3381     }
3382 }
3383
3384 static int encode_picture(MpegEncContext *s, int picture_number)
3385 {
3386     int i, ret;
3387     int bits;
3388     int context_count = s->slice_context_count;
3389
3390     s->picture_number = picture_number;
3391
3392     /* Reset the average MB variance */
3393     s->me.mb_var_sum_temp    =
3394     s->me.mc_mb_var_sum_temp = 0;
3395
3396     /* we need to initialize some time vars before we can encode b-frames */
3397     // RAL: Condition added for MPEG1VIDEO
3398     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3399         set_frame_distances(s);
3400     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3401         ff_set_mpeg4_time(s);
3402
3403     s->me.scene_change_score=0;
3404
3405 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3406
3407     if(s->pict_type==AV_PICTURE_TYPE_I){
3408         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3409         else                        s->no_rounding=0;
3410     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3411         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3412             s->no_rounding ^= 1;
3413     }
3414
3415     if(s->flags & CODEC_FLAG_PASS2){
3416         if (estimate_qp(s,1) < 0)
3417             return -1;
3418         ff_get_2pass_fcode(s);
3419     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3420         if(s->pict_type==AV_PICTURE_TYPE_B)
3421             s->lambda= s->last_lambda_for[s->pict_type];
3422         else
3423             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3424         update_qscale(s);
3425     }
3426
3427     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3428         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3429         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3430         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3431         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3432     }
3433
3434     s->mb_intra=0; //for the rate distortion & bit compare functions
3435     for(i=1; i<context_count; i++){
3436         ret = ff_update_duplicate_context(s->thread_context[i], s);
3437         if (ret < 0)
3438             return ret;
3439     }
3440
3441     if(ff_init_me(s)<0)
3442         return -1;
3443
3444     /* Estimate motion for every MB */
3445     if(s->pict_type != AV_PICTURE_TYPE_I){
3446         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3447         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3448         if (s->pict_type != AV_PICTURE_TYPE_B) {
3449             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3450                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3451             }
3452         }
3453
3454         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3455     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3456         /* I-Frame */
3457         for(i=0; i<s->mb_stride*s->mb_height; i++)
3458             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3459
3460         if(!s->fixed_qscale){
3461             /* finding spatial complexity for I-frame rate control */
3462             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3463         }
3464     }
3465     for(i=1; i<context_count; i++){
3466         merge_context_after_me(s, s->thread_context[i]);
3467     }
3468     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3469     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3470     emms_c();
3471
3472     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3473         s->pict_type= AV_PICTURE_TYPE_I;
3474         for(i=0; i<s->mb_stride*s->mb_height; i++)
3475             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3476         if(s->msmpeg4_version >= 3)
3477             s->no_rounding=1;
3478         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3479                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3480     }
3481
3482     if(!s->umvplus){
3483         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3484             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3485
3486             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3487                 int a,b;
3488                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3489                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3490                 s->f_code= FFMAX3(s->f_code, a, b);
3491             }
3492
3493             ff_fix_long_p_mvs(s);
3494             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3495             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3496                 int j;
3497                 for(i=0; i<2; i++){
3498                     for(j=0; j<2; j++)
3499                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3500                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3501                 }
3502             }
3503         }
3504
3505         if(s->pict_type==AV_PICTURE_TYPE_B){
3506             int a, b;
3507
3508             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3509             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3510             s->f_code = FFMAX(a, b);
3511
3512             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3513             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3514             s->b_code = FFMAX(a, b);
3515
3516             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3517             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3518             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3519             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3520             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3521                 int dir, j;
3522                 for(dir=0; dir<2; dir++){
3523                     for(i=0; i<2; i++){
3524                         for(j=0; j<2; j++){
3525                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3526                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3527                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3528                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3529                         }
3530                     }
3531                 }
3532             }
3533         }
3534     }
3535
3536     if (estimate_qp(s, 0) < 0)
3537         return -1;
3538
3539     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3540         s->qscale= 3; //reduce clipping problems
3541
3542     if (s->out_format == FMT_MJPEG) {
3543         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3544         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3545
3546         if (s->avctx->intra_matrix) {
3547             chroma_matrix =
3548             luma_matrix = s->avctx->intra_matrix;
3549         }
3550         if (s->avctx->chroma_intra_matrix)
3551             chroma_matrix = s->avctx->chroma_intra_matrix;
3552
3553         /* for mjpeg, we do include qscale in the matrix */
3554         for(i=1;i<64;i++){
3555             int j= s->dsp.idct_permutation[i];
3556
3557             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3558             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3559         }
3560         s->y_dc_scale_table=
3561         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3562         s->chroma_intra_matrix[0] =
3563         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3564         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3565                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3566         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3567                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3568         s->qscale= 8;
3569     }
3570     if(s->codec_id == AV_CODEC_ID_AMV){
3571         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3572         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3573         for(i=1;i<64;i++){
3574             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3575
3576             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3577             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3578         }
3579         s->y_dc_scale_table= y;
3580         s->c_dc_scale_table= c;
3581         s->intra_matrix[0] = 13;
3582         s->chroma_intra_matrix[0] = 14;
3583         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3584                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3585         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3586                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3587         s->qscale= 8;
3588     }
3589
3590     //FIXME var duplication
3591     s->current_picture_ptr->f->key_frame =
3592     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3593     s->current_picture_ptr->f->pict_type =
3594     s->current_picture.f->pict_type = s->pict_type;
3595
3596     if (s->current_picture.f->key_frame)
3597         s->picture_in_gop_number=0;
3598
3599     s->mb_x = s->mb_y = 0;
3600     s->last_bits= put_bits_count(&s->pb);
3601     switch(s->out_format) {
3602     case FMT_MJPEG:
3603         if (CONFIG_MJPEG_ENCODER)
3604             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3605                                            s->intra_matrix, s->chroma_intra_matrix);
3606         break;
3607     case FMT_H261:
3608         if (CONFIG_H261_ENCODER)
3609             ff_h261_encode_picture_header(s, picture_number);
3610         break;
3611     case FMT_H263:
3612         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3613             ff_wmv2_encode_picture_header(s, picture_number);
3614         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3615             ff_msmpeg4_encode_picture_header(s, picture_number);
3616         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3617             ff_mpeg4_encode_picture_header(s, picture_number);
3618         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3619             ff_rv10_encode_picture_header(s, picture_number);
3620         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3621             ff_rv20_encode_picture_header(s, picture_number);
3622         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3623             ff_flv_encode_picture_header(s, picture_number);
3624         else if (CONFIG_H263_ENCODER)
3625             ff_h263_encode_picture_header(s, picture_number);
3626         break;
3627     case FMT_MPEG1:
3628         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3629             ff_mpeg1_encode_picture_header(s, picture_number);
3630         break;
3631     default:
3632         av_assert0(0);
3633     }
3634     bits= put_bits_count(&s->pb);
3635     s->header_bits= bits - s->last_bits;
3636
3637     for(i=1; i<context_count; i++){
3638         update_duplicate_context_after_me(s->thread_context[i], s);
3639     }
3640     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3641     for(i=1; i<context_count; i++){
3642         merge_context_after_encode(s, s->thread_context[i]);
3643     }
3644     emms_c();
3645     return 0;
3646 }
3647
3648 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3649     const int intra= s->mb_intra;
3650     int i;
3651
3652     s->dct_count[intra]++;
3653
3654     for(i=0; i<64; i++){
3655         int level= block[i];
3656
3657         if(level){
3658             if(level>0){
3659                 s->dct_error_sum[intra][i] += level;
3660                 level -= s->dct_offset[intra][i];
3661                 if(level<0) level=0;
3662             }else{
3663                 s->dct_error_sum[intra][i] -= level;
3664                 level += s->dct_offset[intra][i];
3665                 if(level>0) level=0;
3666             }
3667             block[i]= level;
3668         }
3669     }
3670 }
3671
3672 static int dct_quantize_trellis_c(MpegEncContext *s,
3673                                   int16_t *block, int n,
3674                                   int qscale, int *overflow){
3675     const int *qmat;
3676     const uint8_t *scantable= s->intra_scantable.scantable;
3677     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3678     int max=0;
3679     unsigned int threshold1, threshold2;
3680     int bias=0;
3681     int run_tab[65];
3682     int level_tab[65];
3683     int score_tab[65];
3684     int survivor[65];
3685     int survivor_count;
3686     int last_run=0;
3687     int last_level=0;
3688     int last_score= 0;
3689     int last_i;
3690     int coeff[2][64];
3691     int coeff_count[64];
3692     int qmul, qadd, start_i, last_non_zero, i, dc;
3693     const int esc_length= s->ac_esc_length;
3694     uint8_t * length;
3695     uint8_t * last_length;
3696     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3697
3698     s->dsp.fdct (block);
3699
3700     if(s->dct_error_sum)
3701         s->denoise_dct(s, block);
3702     qmul= qscale*16;
3703     qadd= ((qscale-1)|1)*8;
3704
3705     if (s->mb_intra) {
3706         int q;
3707         if (!s->h263_aic) {
3708             if (n < 4)
3709                 q = s->y_dc_scale;
3710             else
3711                 q = s->c_dc_scale;
3712             q = q << 3;
3713         } else{
3714             /* For AIC we skip quant/dequant of INTRADC */
3715             q = 1 << 3;
3716             qadd=0;
3717         }
3718
3719         /* note: block[0] is assumed to be positive */
3720         block[0] = (block[0] + (q >> 1)) / q;
3721         start_i = 1;
3722         last_non_zero = 0;
3723         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3724         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3725             bias= 1<<(QMAT_SHIFT-1);
3726         length     = s->intra_ac_vlc_length;
3727         last_length= s->intra_ac_vlc_last_length;
3728     } else {
3729         start_i = 0;
3730         last_non_zero = -1;
3731         qmat = s->q_inter_matrix[qscale];
3732         length     = s->inter_ac_vlc_length;
3733         last_length= s->inter_ac_vlc_last_length;
3734     }
3735     last_i= start_i;
3736
3737     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3738     threshold2= (threshold1<<1);
3739
3740     for(i=63; i>=start_i; i--) {
3741         const int j = scantable[i];
3742         int level = block[j] * qmat[j];
3743
3744         if(((unsigned)(level+threshold1))>threshold2){
3745             last_non_zero = i;
3746             break;
3747         }
3748     }
3749
3750     for(i=start_i; i<=last_non_zero; i++) {
3751         const int j = scantable[i];
3752         int level = block[j] * qmat[j];
3753
3754 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3755 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3756         if(((unsigned)(level+threshold1))>threshold2){
3757             if(level>0){
3758                 level= (bias + level)>>QMAT_SHIFT;
3759                 coeff[0][i]= level;
3760                 coeff[1][i]= level-1;
3761 //                coeff[2][k]= level-2;
3762             }else{
3763                 level= (bias - level)>>QMAT_SHIFT;
3764                 coeff[0][i]= -level;
3765                 coeff[1][i]= -level+1;
3766 //                coeff[2][k]= -level+2;
3767             }
3768             coeff_count[i]= FFMIN(level, 2);
3769             av_assert2(coeff_count[i]);
3770             max |=level;
3771         }else{
3772             coeff[0][i]= (level>>31)|1;
3773             coeff_count[i]= 1;
3774         }
3775     }
3776
3777     *overflow= s->max_qcoeff < max; //overflow might have happened
3778
3779     if(last_non_zero < start_i){
3780         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3781         return last_non_zero;
3782     }
3783
3784     score_tab[start_i]= 0;
3785     survivor[0]= start_i;
3786     survivor_count= 1;
3787
3788     for(i=start_i; i<=last_non_zero; i++){
3789         int level_index, j, zero_distortion;
3790         int dct_coeff= FFABS(block[ scantable[i] ]);
3791         int best_score=256*256*256*120;
3792
3793         if (s->dsp.fdct == ff_fdct_ifast)
3794             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3795         zero_distortion= dct_coeff*dct_coeff;
3796
3797         for(level_index=0; level_index < coeff_count[i]; level_index++){
3798             int distortion;
3799             int level= coeff[level_index][i];
3800             const int alevel= FFABS(level);
3801             int unquant_coeff;
3802
3803             av_assert2(level);
3804
3805             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3806                 unquant_coeff= alevel*qmul + qadd;
3807             }else{ //MPEG1
3808                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3809                 if(s->mb_intra){
3810                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3811                         unquant_coeff =   (unquant_coeff - 1) | 1;
3812                 }else{
3813                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3814                         unquant_coeff =   (unquant_coeff - 1) | 1;
3815                 }
3816                 unquant_coeff<<= 3;
3817             }
3818
3819             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3820             level+=64;
3821             if((level&(~127)) == 0){
3822                 for(j=survivor_count-1; j>=0; j--){
3823                     int run= i - survivor[j];
3824                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3825                     score += score_tab[i-run];
3826
3827                     if(score < best_score){
3828                         best_score= score;
3829                         run_tab[i+1]= run;
3830                         level_tab[i+1]= level-64;
3831                     }
3832                 }
3833
3834                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3835                     for(j=survivor_count-1; j>=0; j--){
3836                         int run= i - survivor[j];
3837                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3838                         score += score_tab[i-run];
3839                         if(score < last_score){
3840                             last_score= score;
3841                             last_run= run;
3842                             last_level= level-64;
3843                             last_i= i+1;
3844                         }
3845                     }
3846                 }
3847             }else{
3848                 distortion += esc_length*lambda;
3849                 for(j=survivor_count-1; j>=0; j--){
3850                     int run= i - survivor[j];
3851                     int score= distortion + score_tab[i-run];
3852
3853                     if(score < best_score){
3854                         best_score= score;
3855                         run_tab[i+1]= run;
3856                         level_tab[i+1]= level-64;
3857                     }
3858                 }
3859
3860                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3861                   for(j=survivor_count-1; j>=0; j--){
3862                         int run= i - survivor[j];
3863                         int score= distortion + score_tab[i-run];
3864                         if(score < last_score){
3865                             last_score= score;
3866                             last_run= run;
3867                             last_level= level-64;
3868                             last_i= i+1;
3869                         }
3870                     }
3871                 }
3872             }
3873         }
3874
3875         score_tab[i+1]= best_score;
3876
3877         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3878         if(last_non_zero <= 27){
3879             for(; survivor_count; survivor_count--){
3880                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3881                     break;
3882             }
3883         }else{
3884             for(; survivor_count; survivor_count--){
3885                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3886                     break;
3887             }
3888         }
3889
3890         survivor[ survivor_count++ ]= i+1;
3891     }
3892
3893     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3894         last_score= 256*256*256*120;
3895         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3896             int score= score_tab[i];
3897             if(i) score += lambda*2; //FIXME exacter?
3898
3899             if(score < last_score){
3900                 last_score= score;
3901                 last_i= i;
3902                 last_level= level_tab[i];
3903                 last_run= run_tab[i];
3904             }
3905         }
3906     }
3907
3908     s->coded_score[n] = last_score;
3909
3910     dc= FFABS(block[0]);
3911     last_non_zero= last_i - 1;
3912     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3913
3914     if(last_non_zero < start_i)
3915         return last_non_zero;
3916
3917     if(last_non_zero == 0 && start_i == 0){
3918         int best_level= 0;
3919         int best_score= dc * dc;
3920
3921         for(i=0; i<coeff_count[0]; i++){
3922             int level= coeff[i][0];
3923             int alevel= FFABS(level);
3924             int unquant_coeff, score, distortion;
3925
3926             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3927                     unquant_coeff= (alevel*qmul + qadd)>>3;
3928             }else{ //MPEG1
3929                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3930                     unquant_coeff =   (unquant_coeff - 1) | 1;
3931             }
3932             unquant_coeff = (unquant_coeff + 4) >> 3;
3933             unquant_coeff<<= 3 + 3;
3934
3935             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3936             level+=64;
3937             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3938             else                    score= distortion + esc_length*lambda;
3939
3940             if(score < best_score){
3941                 best_score= score;
3942                 best_level= level - 64;
3943             }
3944         }
3945         block[0]= best_level;
3946         s->coded_score[n] = best_score - dc*dc;
3947         if(best_level == 0) return -1;
3948         else                return last_non_zero;
3949     }
3950
3951     i= last_i;
3952     av_assert2(last_level);
3953
3954     block[ perm_scantable[last_non_zero] ]= last_level;
3955     i -= last_run + 1;
3956
3957     for(; i>start_i; i -= run_tab[i] + 1){
3958         block[ perm_scantable[i-1] ]= level_tab[i];
3959     }
3960
3961     return last_non_zero;
3962 }
3963
3964 //#define REFINE_STATS 1
3965 static int16_t basis[64][64];
3966
3967 static void build_basis(uint8_t *perm){
3968     int i, j, x, y;
3969     emms_c();
3970     for(i=0; i<8; i++){
3971         for(j=0; j<8; j++){
3972             for(y=0; y<8; y++){
3973                 for(x=0; x<8; x++){
3974                     double s= 0.25*(1<<BASIS_SHIFT);
3975                     int index= 8*i + j;
3976                     int perm_index= perm[index];
3977                     if(i==0) s*= sqrt(0.5);
3978                     if(j==0) s*= sqrt(0.5);
3979                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3980                 }
3981             }
3982         }
3983     }
3984 }
3985
3986 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3987                         int16_t *block, int16_t *weight, int16_t *orig,
3988                         int n, int qscale){
3989     int16_t rem[64];
3990     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3991     const uint8_t *scantable= s->intra_scantable.scantable;
3992     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3993 //    unsigned int threshold1, threshold2;
3994 //    int bias=0;
3995     int run_tab[65];
3996     int prev_run=0;
3997     int prev_level=0;
3998     int qmul, qadd, start_i, last_non_zero, i, dc;
3999     uint8_t * length;
4000     uint8_t * last_length;
4001     int lambda;
4002     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4003 #ifdef REFINE_STATS
4004 static int count=0;
4005 static int after_last=0;
4006 static int to_zero=0;
4007 static int from_zero=0;
4008 static int raise=0;
4009 static int lower=0;
4010 static int messed_sign=0;
4011 #endif
4012
4013     if(basis[0][0] == 0)
4014         build_basis(s->dsp.idct_permutation);
4015
4016     qmul= qscale*2;
4017     qadd= (qscale-1)|1;
4018     if (s->mb_intra) {
4019         if (!s->h263_aic) {
4020             if (n < 4)
4021                 q = s->y_dc_scale;
4022             else
4023                 q = s->c_dc_scale;
4024         } else{
4025             /* For AIC we skip quant/dequant of INTRADC */
4026             q = 1;
4027             qadd=0;
4028         }
4029         q <<= RECON_SHIFT-3;
4030         /* note: block[0] is assumed to be positive */
4031         dc= block[0]*q;
4032 //        block[0] = (block[0] + (q >> 1)) / q;
4033         start_i = 1;
4034 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4035 //            bias= 1<<(QMAT_SHIFT-1);
4036         length     = s->intra_ac_vlc_length;
4037         last_length= s->intra_ac_vlc_last_length;
4038     } else {
4039         dc= 0;
4040         start_i = 0;
4041         length     = s->inter_ac_vlc_length;
4042         last_length= s->inter_ac_vlc_last_length;
4043     }
4044     last_non_zero = s->block_last_index[n];
4045
4046 #ifdef REFINE_STATS
4047 {START_TIMER
4048 #endif
4049     dc += (1<<(RECON_SHIFT-1));
4050     for(i=0; i<64; i++){
4051         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4052     }
4053 #ifdef REFINE_STATS
4054 STOP_TIMER("memset rem[]")}
4055 #endif
4056     sum=0;
4057     for(i=0; i<64; i++){
4058         int one= 36;
4059         int qns=4;
4060         int w;
4061
4062         w= FFABS(weight[i]) + qns*one;
4063         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4064
4065         weight[i] = w;
4066 //        w=weight[i] = (63*qns + (w/2)) / w;
4067
4068         av_assert2(w>0);
4069         av_assert2(w<(1<<6));
4070         sum += w*w;
4071     }
4072     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4073 #ifdef REFINE_STATS
4074 {START_TIMER
4075 #endif
4076     run=0;
4077     rle_index=0;
4078     for(i=start_i; i<=last_non_zero; i++){
4079         int j= perm_scantable[i];
4080         const int level= block[j];
4081         int coeff;
4082
4083         if(level){
4084             if(level<0) coeff= qmul*level - qadd;
4085             else        coeff= qmul*level + qadd;
4086             run_tab[rle_index++]=run;
4087             run=0;
4088
4089             s->dsp.add_8x8basis(rem, basis[j], coeff);
4090         }else{
4091             run++;
4092         }
4093     }
4094 #ifdef REFINE_STATS
4095 if(last_non_zero>0){
4096 STOP_TIMER("init rem[]")
4097 }
4098 }
4099
4100 {START_TIMER
4101 #endif
4102     for(;;){
4103         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
4104         int best_coeff=0;
4105         int best_change=0;
4106         int run2, best_unquant_change=0, analyze_gradient;
4107 #ifdef REFINE_STATS
4108 {START_TIMER
4109 #endif
4110         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4111
4112         if(analyze_gradient){
4113 #ifdef REFINE_STATS
4114 {START_TIMER
4115 #endif
4116             for(i=0; i<64; i++){
4117                 int w= weight[i];
4118
4119                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4120             }
4121 #ifdef REFINE_STATS
4122 STOP_TIMER("rem*w*w")}
4123 {START_TIMER
4124 #endif
4125             s->dsp.fdct(d1);
4126 #ifdef REFINE_STATS
4127 STOP_TIMER("dct")}
4128 #endif
4129         }
4130
4131         if(start_i){
4132             const int level= block[0];
4133             int change, old_coeff;
4134
4135             av_assert2(s->mb_intra);
4136
4137             old_coeff= q*level;
4138
4139             for(change=-1; change<=1; change+=2){
4140                 int new_level= level + change;
4141                 int score, new_coeff;
4142
4143                 new_coeff= q*new_level;
4144                 if(new_coeff >= 2048 || new_coeff < 0)
4145                     continue;
4146
4147                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
4148                 if(score<best_score){
4149                     best_score= score;
4150                     best_coeff= 0;
4151                     best_change= change;
4152                     best_unquant_change= new_coeff - old_coeff;
4153                 }
4154             }
4155         }
4156
4157         run=0;
4158         rle_index=0;
4159         run2= run_tab[rle_index++];
4160         prev_level=0;
4161         prev_run=0;
4162
4163         for(i=start_i; i<64; i++){
4164             int j= perm_scantable[i];
4165             const int level= block[j];
4166             int change, old_coeff;
4167
4168             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4169                 break;
4170
4171             if(level){
4172                 if(level<0) old_coeff= qmul*level - qadd;
4173                 else        old_coeff= qmul*level + qadd;
4174                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4175             }else{
4176                 old_coeff=0;
4177                 run2--;
4178                 av_assert2(run2>=0 || i >= last_non_zero );
4179             }
4180
4181             for(change=-1; change<=1; change+=2){
4182                 int new_level= level + change;
4183                 int score, new_coeff, unquant_change;
4184
4185                 score=0;
4186                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4187                    continue;
4188
4189                 if(new_level){
4190                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4191                     else            new_coeff= qmul*new_level + qadd;
4192                     if(new_coeff >= 2048 || new_coeff <= -2048)
4193                         continue;
4194                     //FIXME check for overflow
4195
4196                     if(level){
4197                         if(level < 63 && level > -63){
4198                             if(i < last_non_zero)
4199                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4200                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4201                             else
4202                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4203                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4204                         }
4205                     }else{
4206                         av_assert2(FFABS(new_level)==1);
4207
4208                         if(analyze_gradient){
4209                             int g= d1[ scantable[i] ];
4210                             if(g && (g^new_level) >= 0)
4211                                 continue;
4212                         }
4213
4214                         if(i < last_non_zero){
4215                             int next_i= i + run2 + 1;
4216                             int next_level= block[ perm_scantable[next_i] ] + 64;
4217
4218                             if(next_level&(~127))
4219                                 next_level= 0;
4220
4221                             if(next_i < last_non_zero)
4222                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4223                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4224                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4225                             else
4226                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4227                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4228                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4229                         }else{
4230                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4231                             if(prev_level){
4232                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4233                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4234                             }
4235                         }
4236                     }
4237                 }else{
4238                     new_coeff=0;
4239                     av_assert2(FFABS(level)==1);
4240
4241                     if(i < last_non_zero){
4242                         int next_i= i + run2 + 1;
4243                         int next_level= block[ perm_scantable[next_i] ] + 64;
4244
4245                         if(next_level&(~127))
4246                             next_level= 0;
4247
4248                         if(next_i < last_non_zero)
4249                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4250                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4251                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4252                         else
4253                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4254                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4255                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4256                     }else{
4257                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4258                         if(prev_level){
4259                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4260                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4261                         }
4262                     }
4263                 }
4264
4265                 score *= lambda;
4266
4267                 unquant_change= new_coeff - old_coeff;
4268                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4269
4270                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4271                 if(score<best_score){
4272                     best_score= score;
4273                     best_coeff= i;
4274                     best_change= change;
4275                     best_unquant_change= unquant_change;
4276                 }
4277             }
4278             if(level){
4279                 prev_level= level + 64;
4280                 if(prev_level&(~127))
4281                     prev_level= 0;
4282                 prev_run= run;
4283                 run=0;
4284             }else{
4285                 run++;
4286             }
4287         }
4288 #ifdef REFINE_STATS
4289 STOP_TIMER("iterative step")}
4290 #endif
4291
4292         if(best_change){
4293             int j= perm_scantable[ best_coeff ];
4294
4295             block[j] += best_change;
4296
4297             if(best_coeff > last_non_zero){
4298                 last_non_zero= best_coeff;
4299                 av_assert2(block[j]);
4300 #ifdef REFINE_STATS
4301 after_last++;
4302 #endif
4303             }else{
4304 #ifdef REFINE_STATS
4305 if(block[j]){
4306     if(block[j] - best_change){
4307         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4308             raise++;
4309         }else{
4310             lower++;
4311         }
4312     }else{
4313         from_zero++;
4314     }
4315 }else{
4316     to_zero++;
4317 }
4318 #endif
4319                 for(; last_non_zero>=start_i; last_non_zero--){
4320                     if(block[perm_scantable[last_non_zero]])
4321                         break;
4322                 }
4323             }
4324 #ifdef REFINE_STATS
4325 count++;
4326 if(256*256*256*64 % count == 0){
4327     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4328 }
4329 #endif
4330             run=0;
4331             rle_index=0;
4332             for(i=start_i; i<=last_non_zero; i++){
4333                 int j= perm_scantable[i];
4334                 const int level= block[j];
4335
4336                  if(level){
4337                      run_tab[rle_index++]=run;
4338                      run=0;
4339                  }else{
4340                      run++;
4341                  }
4342             }
4343
4344             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4345         }else{
4346             break;
4347         }
4348     }
4349 #ifdef REFINE_STATS
4350 if(last_non_zero>0){
4351 STOP_TIMER("iterative search")
4352 }
4353 }
4354 #endif
4355
4356     return last_non_zero;
4357 }
4358
4359 int ff_dct_quantize_c(MpegEncContext *s,
4360                         int16_t *block, int n,
4361                         int qscale, int *overflow)
4362 {
4363     int i, j, level, last_non_zero, q, start_i;
4364     const int *qmat;
4365     const uint8_t *scantable= s->intra_scantable.scantable;
4366     int bias;
4367     int max=0;
4368     unsigned int threshold1, threshold2;
4369
4370     s->dsp.fdct (block);
4371
4372     if(s->dct_error_sum)
4373         s->denoise_dct(s, block);
4374
4375     if (s->mb_intra) {
4376         if (!s->h263_aic) {
4377             if (n < 4)
4378                 q = s->y_dc_scale;
4379             else
4380                 q = s->c_dc_scale;
4381             q = q << 3;
4382         } else
4383             /* For AIC we skip quant/dequant of INTRADC */
4384             q = 1 << 3;
4385
4386         /* note: block[0] is assumed to be positive */
4387         block[0] = (block[0] + (q >> 1)) / q;
4388         start_i = 1;
4389         last_non_zero = 0;
4390         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4391         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4392     } else {
4393         start_i = 0;
4394         last_non_zero = -1;
4395         qmat = s->q_inter_matrix[qscale];
4396         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4397     }
4398     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4399     threshold2= (threshold1<<1);
4400     for(i=63;i>=start_i;i--) {
4401         j = scantable[i];
4402         level = block[j] * qmat[j];
4403
4404         if(((unsigned)(level+threshold1))>threshold2){
4405             last_non_zero = i;
4406             break;
4407         }else{
4408             block[j]=0;
4409         }
4410     }
4411     for(i=start_i; i<=last_non_zero; i++) {
4412         j = scantable[i];
4413         level = block[j] * qmat[j];
4414
4415 //        if(   bias+level >= (1<<QMAT_SHIFT)
4416 //           || bias-level >= (1<<QMAT_SHIFT)){
4417         if(((unsigned)(level+threshold1))>threshold2){
4418             if(level>0){
4419                 level= (bias + level)>>QMAT_SHIFT;
4420                 block[j]= level;
4421             }else{
4422                 level= (bias - level)>>QMAT_SHIFT;
4423                 block[j]= -level;
4424             }
4425             max |=level;
4426         }else{
4427             block[j]=0;
4428         }
4429     }
4430     *overflow= s->max_qcoeff < max; //overflow might have happened
4431
4432     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4433     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4434         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4435
4436     return last_non_zero;
4437 }
4438
4439 #define OFFSET(x) offsetof(MpegEncContext, x)
4440 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4441 static const AVOption h263_options[] = {
4442     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4443     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4444     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4445     FF_MPV_COMMON_OPTS
4446     { NULL },
4447 };
4448
4449 static const AVClass h263_class = {
4450     .class_name = "H.263 encoder",
4451     .item_name  = av_default_item_name,
4452     .option     = h263_options,
4453     .version    = LIBAVUTIL_VERSION_INT,
4454 };
4455
4456 AVCodec ff_h263_encoder = {
4457     .name           = "h263",
4458     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4459     .type           = AVMEDIA_TYPE_VIDEO,
4460     .id             = AV_CODEC_ID_H263,
4461     .priv_data_size = sizeof(MpegEncContext),
4462     .init           = ff_MPV_encode_init,
4463     .encode2        = ff_MPV_encode_picture,
4464     .close          = ff_MPV_encode_end,
4465     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4466     .priv_class     = &h263_class,
4467 };
4468
4469 static const AVOption h263p_options[] = {
4470     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4471     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4472     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4473     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4474     FF_MPV_COMMON_OPTS
4475     { NULL },
4476 };
4477 static const AVClass h263p_class = {
4478     .class_name = "H.263p encoder",
4479     .item_name  = av_default_item_name,
4480     .option     = h263p_options,
4481     .version    = LIBAVUTIL_VERSION_INT,
4482 };
4483
4484 AVCodec ff_h263p_encoder = {
4485     .name           = "h263p",
4486     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4487     .type           = AVMEDIA_TYPE_VIDEO,
4488     .id             = AV_CODEC_ID_H263P,
4489     .priv_data_size = sizeof(MpegEncContext),
4490     .init           = ff_MPV_encode_init,
4491     .encode2        = ff_MPV_encode_picture,
4492     .close          = ff_MPV_encode_end,
4493     .capabilities   = CODEC_CAP_SLICE_THREADS,
4494     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4495     .priv_class     = &h263p_class,
4496 };
4497
4498 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4499
4500 AVCodec ff_msmpeg4v2_encoder = {
4501     .name           = "msmpeg4v2",
4502     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4503     .type           = AVMEDIA_TYPE_VIDEO,
4504     .id             = AV_CODEC_ID_MSMPEG4V2,
4505     .priv_data_size = sizeof(MpegEncContext),
4506     .init           = ff_MPV_encode_init,
4507     .encode2        = ff_MPV_encode_picture,
4508     .close          = ff_MPV_encode_end,
4509     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4510     .priv_class     = &msmpeg4v2_class,
4511 };
4512
4513 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4514
4515 AVCodec ff_msmpeg4v3_encoder = {
4516     .name           = "msmpeg4",
4517     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4518     .type           = AVMEDIA_TYPE_VIDEO,
4519     .id             = AV_CODEC_ID_MSMPEG4V3,
4520     .priv_data_size = sizeof(MpegEncContext),
4521     .init           = ff_MPV_encode_init,
4522     .encode2        = ff_MPV_encode_picture,
4523     .close          = ff_MPV_encode_end,
4524     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4525     .priv_class     = &msmpeg4v3_class,
4526 };
4527
4528 FF_MPV_GENERIC_CLASS(wmv1)
4529
4530 AVCodec ff_wmv1_encoder = {
4531     .name           = "wmv1",
4532     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4533     .type           = AVMEDIA_TYPE_VIDEO,
4534     .id             = AV_CODEC_ID_WMV1,
4535     .priv_data_size = sizeof(MpegEncContext),
4536     .init           = ff_MPV_encode_init,
4537     .encode2        = ff_MPV_encode_picture,
4538     .close          = ff_MPV_encode_end,
4539     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4540     .priv_class     = &wmv1_class,
4541 };