]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge commit '85d805d5cccb2f57e7f2e2254d7a870f05926cfd'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "dsputil.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mathops.h"
46 #include "mpegutils.h"
47 #include "mjpegenc.h"
48 #include "msmpeg4.h"
49 #include "qpeldsp.h"
50 #include "faandct.h"
51 #include "thread.h"
52 #include "aandcttab.h"
53 #include "flv.h"
54 #include "mpeg4video.h"
55 #include "internal.h"
56 #include "bytestream.h"
57 #include <limits.h>
58 #include "sp5x.h"
59
60 static int encode_picture(MpegEncContext *s, int picture_number);
61 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
62 static int sse_mb(MpegEncContext *s);
63 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
64 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
65
66 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
67 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
68
69 const AVOption ff_mpv_generic_options[] = {
70     FF_MPV_COMMON_OPTS
71     { NULL },
72 };
73
74 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
75                        uint16_t (*qmat16)[2][64],
76                        const uint16_t *quant_matrix,
77                        int bias, int qmin, int qmax, int intra)
78 {
79     int qscale;
80     int shift = 0;
81
82     for (qscale = qmin; qscale <= qmax; qscale++) {
83         int i;
84         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
85             dsp->fdct == ff_jpeg_fdct_islow_10 ||
86             dsp->fdct == ff_faandct) {
87             for (i = 0; i < 64; i++) {
88                 const int j = dsp->idct_permutation[i];
89                 /* 16 <= qscale * quant_matrix[i] <= 7905
90                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
91                  *             19952 <=              x  <= 249205026
92                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
93                  *           3444240 >= (1 << 36) / (x) >= 275 */
94
95                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
96                                         (qscale * quant_matrix[j]));
97             }
98         } else if (dsp->fdct == ff_fdct_ifast) {
99             for (i = 0; i < 64; i++) {
100                 const int j = dsp->idct_permutation[i];
101                 /* 16 <= qscale * quant_matrix[i] <= 7905
102                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
103                  *             19952 <=              x  <= 249205026
104                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
105                  *           3444240 >= (1 << 36) / (x) >= 275 */
106
107                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
108                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
109             }
110         } else {
111             for (i = 0; i < 64; i++) {
112                 const int j = dsp->idct_permutation[i];
113                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
114                  * Assume x = qscale * quant_matrix[i]
115                  * So             16 <=              x  <= 7905
116                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
117                  * so          32768 >= (1 << 19) / (x) >= 67 */
118                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
119                                         (qscale * quant_matrix[j]));
120                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
121                 //                    (qscale * quant_matrix[i]);
122                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
123                                        (qscale * quant_matrix[j]);
124
125                 if (qmat16[qscale][0][i] == 0 ||
126                     qmat16[qscale][0][i] == 128 * 256)
127                     qmat16[qscale][0][i] = 128 * 256 - 1;
128                 qmat16[qscale][1][i] =
129                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
130                                 qmat16[qscale][0][i]);
131             }
132         }
133
134         for (i = intra; i < 64; i++) {
135             int64_t max = 8191;
136             if (dsp->fdct == ff_fdct_ifast) {
137                 max = (8191LL * ff_aanscales[i]) >> 14;
138             }
139             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
140                 shift++;
141             }
142         }
143     }
144     if (shift) {
145         av_log(NULL, AV_LOG_INFO,
146                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
147                QMAT_SHIFT - shift);
148     }
149 }
150
151 static inline void update_qscale(MpegEncContext *s)
152 {
153     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
154                 (FF_LAMBDA_SHIFT + 7);
155     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
156
157     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
158                  FF_LAMBDA_SHIFT;
159 }
160
161 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
162 {
163     int i;
164
165     if (matrix) {
166         put_bits(pb, 1, 1);
167         for (i = 0; i < 64; i++) {
168             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
169         }
170     } else
171         put_bits(pb, 1, 0);
172 }
173
174 /**
175  * init s->current_picture.qscale_table from s->lambda_table
176  */
177 void ff_init_qscale_tab(MpegEncContext *s)
178 {
179     int8_t * const qscale_table = s->current_picture.qscale_table;
180     int i;
181
182     for (i = 0; i < s->mb_num; i++) {
183         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
184         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
185         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
186                                                   s->avctx->qmax);
187     }
188 }
189
190 static void update_duplicate_context_after_me(MpegEncContext *dst,
191                                               MpegEncContext *src)
192 {
193 #define COPY(a) dst->a= src->a
194     COPY(pict_type);
195     COPY(current_picture);
196     COPY(f_code);
197     COPY(b_code);
198     COPY(qscale);
199     COPY(lambda);
200     COPY(lambda2);
201     COPY(picture_in_gop_number);
202     COPY(gop_picture_number);
203     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
204     COPY(progressive_frame);    // FIXME don't set in encode_header
205     COPY(partitioned_frame);    // FIXME don't set in encode_header
206 #undef COPY
207 }
208
209 /**
210  * Set the given MpegEncContext to defaults for encoding.
211  * the changed fields will not depend upon the prior state of the MpegEncContext.
212  */
213 static void MPV_encode_defaults(MpegEncContext *s)
214 {
215     int i;
216     ff_MPV_common_defaults(s);
217
218     for (i = -16; i < 16; i++) {
219         default_fcode_tab[i + MAX_MV] = 1;
220     }
221     s->me.mv_penalty = default_mv_penalty;
222     s->fcode_tab     = default_fcode_tab;
223
224     s->input_picture_number  = 0;
225     s->picture_in_gop_number = 0;
226 }
227
228 av_cold int ff_dct_encode_init(MpegEncContext *s) {
229     if (ARCH_X86)
230         ff_dct_encode_init_x86(s);
231
232     if (CONFIG_H263_ENCODER)
233         ff_h263dsp_init(&s->h263dsp);
234     if (!s->dct_quantize)
235         s->dct_quantize = ff_dct_quantize_c;
236     if (!s->denoise_dct)
237         s->denoise_dct  = denoise_dct_c;
238     s->fast_dct_quantize = s->dct_quantize;
239     if (s->avctx->trellis)
240         s->dct_quantize  = dct_quantize_trellis_c;
241
242     return 0;
243 }
244
245 /* init video encoder */
246 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
247 {
248     MpegEncContext *s = avctx->priv_data;
249     int i, ret, format_supported;
250
251     MPV_encode_defaults(s);
252
253     switch (avctx->codec_id) {
254     case AV_CODEC_ID_MPEG2VIDEO:
255         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
256             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
257             av_log(avctx, AV_LOG_ERROR,
258                    "only YUV420 and YUV422 are supported\n");
259             return -1;
260         }
261         break;
262     case AV_CODEC_ID_MJPEG:
263     case AV_CODEC_ID_AMV:
264         format_supported = 0;
265         /* JPEG color space */
266         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
267             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
268             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
269             (avctx->color_range == AVCOL_RANGE_JPEG &&
270              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
271               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
272               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
273             format_supported = 1;
274         /* MPEG color space */
275         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
276                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
277                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
278                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
279             format_supported = 1;
280
281         if (!format_supported) {
282             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
283             return -1;
284         }
285         break;
286     default:
287         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
288             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
289             return -1;
290         }
291     }
292
293     switch (avctx->pix_fmt) {
294     case AV_PIX_FMT_YUVJ444P:
295     case AV_PIX_FMT_YUV444P:
296         s->chroma_format = CHROMA_444;
297         break;
298     case AV_PIX_FMT_YUVJ422P:
299     case AV_PIX_FMT_YUV422P:
300         s->chroma_format = CHROMA_422;
301         break;
302     case AV_PIX_FMT_YUVJ420P:
303     case AV_PIX_FMT_YUV420P:
304     default:
305         s->chroma_format = CHROMA_420;
306         break;
307     }
308
309     s->bit_rate = avctx->bit_rate;
310     s->width    = avctx->width;
311     s->height   = avctx->height;
312     if (avctx->gop_size > 600 &&
313         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
314         av_log(avctx, AV_LOG_WARNING,
315                "keyframe interval too large!, reducing it from %d to %d\n",
316                avctx->gop_size, 600);
317         avctx->gop_size = 600;
318     }
319     s->gop_size     = avctx->gop_size;
320     s->avctx        = avctx;
321     s->flags        = avctx->flags;
322     s->flags2       = avctx->flags2;
323     if (avctx->max_b_frames > MAX_B_FRAMES) {
324         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
325                "is %d.\n", MAX_B_FRAMES);
326         avctx->max_b_frames = MAX_B_FRAMES;
327     }
328     s->max_b_frames = avctx->max_b_frames;
329     s->codec_id     = avctx->codec->id;
330     s->strict_std_compliance = avctx->strict_std_compliance;
331     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
332     s->mpeg_quant         = avctx->mpeg_quant;
333     s->rtp_mode           = !!avctx->rtp_payload_size;
334     s->intra_dc_precision = avctx->intra_dc_precision;
335     s->user_specified_pts = AV_NOPTS_VALUE;
336
337     if (s->gop_size <= 1) {
338         s->intra_only = 1;
339         s->gop_size   = 12;
340     } else {
341         s->intra_only = 0;
342     }
343
344     s->me_method = avctx->me_method;
345
346     /* Fixed QSCALE */
347     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
348
349     s->adaptive_quant = (s->avctx->lumi_masking ||
350                          s->avctx->dark_masking ||
351                          s->avctx->temporal_cplx_masking ||
352                          s->avctx->spatial_cplx_masking  ||
353                          s->avctx->p_masking      ||
354                          s->avctx->border_masking ||
355                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
356                         !s->fixed_qscale;
357
358     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
359
360     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
361         switch(avctx->codec_id) {
362         case AV_CODEC_ID_MPEG1VIDEO:
363         case AV_CODEC_ID_MPEG2VIDEO:
364             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
365             break;
366         case AV_CODEC_ID_MPEG4:
367         case AV_CODEC_ID_MSMPEG4V1:
368         case AV_CODEC_ID_MSMPEG4V2:
369         case AV_CODEC_ID_MSMPEG4V3:
370             if       (avctx->rc_max_rate >= 15000000) {
371                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
372             } else if(avctx->rc_max_rate >=  2000000) {
373                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
374             } else if(avctx->rc_max_rate >=   384000) {
375                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
376             } else
377                 avctx->rc_buffer_size = 40;
378             avctx->rc_buffer_size *= 16384;
379             break;
380         }
381         if (avctx->rc_buffer_size) {
382             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
383         }
384     }
385
386     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
387         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
388         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
389             return -1;
390     }
391
392     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
393         av_log(avctx, AV_LOG_INFO,
394                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
395     }
396
397     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
398         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
399         return -1;
400     }
401
402     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
403         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
404         return -1;
405     }
406
407     if (avctx->rc_max_rate &&
408         avctx->rc_max_rate == avctx->bit_rate &&
409         avctx->rc_max_rate != avctx->rc_min_rate) {
410         av_log(avctx, AV_LOG_INFO,
411                "impossible bitrate constraints, this will fail\n");
412     }
413
414     if (avctx->rc_buffer_size &&
415         avctx->bit_rate * (int64_t)avctx->time_base.num >
416             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
417         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
418         return -1;
419     }
420
421     if (!s->fixed_qscale &&
422         avctx->bit_rate * av_q2d(avctx->time_base) >
423             avctx->bit_rate_tolerance) {
424         av_log(avctx, AV_LOG_WARNING,
425                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
426         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
427     }
428
429     if (s->avctx->rc_max_rate &&
430         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
431         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
432          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
433         90000LL * (avctx->rc_buffer_size - 1) >
434             s->avctx->rc_max_rate * 0xFFFFLL) {
435         av_log(avctx, AV_LOG_INFO,
436                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
437                "specified vbv buffer is too large for the given bitrate!\n");
438     }
439
440     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
441         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
442         s->codec_id != AV_CODEC_ID_FLV1) {
443         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
444         return -1;
445     }
446
447     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
448         av_log(avctx, AV_LOG_ERROR,
449                "OBMC is only supported with simple mb decision\n");
450         return -1;
451     }
452
453     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
454         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
455         return -1;
456     }
457
458     if (s->max_b_frames                    &&
459         s->codec_id != AV_CODEC_ID_MPEG4      &&
460         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
461         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
462         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
463         return -1;
464     }
465     if (s->max_b_frames < 0) {
466         av_log(avctx, AV_LOG_ERROR,
467                "max b frames must be 0 or positive for mpegvideo based encoders\n");
468         return -1;
469     }
470
471     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
472          s->codec_id == AV_CODEC_ID_H263  ||
473          s->codec_id == AV_CODEC_ID_H263P) &&
474         (avctx->sample_aspect_ratio.num > 255 ||
475          avctx->sample_aspect_ratio.den > 255)) {
476         av_log(avctx, AV_LOG_WARNING,
477                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
478                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
479         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
480                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
481     }
482
483     if ((s->codec_id == AV_CODEC_ID_H263  ||
484          s->codec_id == AV_CODEC_ID_H263P) &&
485         (avctx->width  > 2048 ||
486          avctx->height > 1152 )) {
487         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
488         return -1;
489     }
490     if ((s->codec_id == AV_CODEC_ID_H263  ||
491          s->codec_id == AV_CODEC_ID_H263P) &&
492         ((avctx->width &3) ||
493          (avctx->height&3) )) {
494         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
495         return -1;
496     }
497
498     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
499         (avctx->width  > 4095 ||
500          avctx->height > 4095 )) {
501         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
502         return -1;
503     }
504
505     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
506         (avctx->width  > 16383 ||
507          avctx->height > 16383 )) {
508         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
509         return -1;
510     }
511
512     if (s->codec_id == AV_CODEC_ID_RV10 &&
513         (avctx->width &15 ||
514          avctx->height&15 )) {
515         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
516         return AVERROR(EINVAL);
517     }
518
519     if (s->codec_id == AV_CODEC_ID_RV20 &&
520         (avctx->width &3 ||
521          avctx->height&3 )) {
522         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
523         return AVERROR(EINVAL);
524     }
525
526     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
527          s->codec_id == AV_CODEC_ID_WMV2) &&
528          avctx->width & 1) {
529          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
530          return -1;
531     }
532
533     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
534         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
535         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
536         return -1;
537     }
538
539     // FIXME mpeg2 uses that too
540     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
541                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
542         av_log(avctx, AV_LOG_ERROR,
543                "mpeg2 style quantization not supported by codec\n");
544         return -1;
545     }
546
547     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
548         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
549         return -1;
550     }
551
552     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
553         s->avctx->mb_decision != FF_MB_DECISION_RD) {
554         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
555         return -1;
556     }
557
558     if (s->avctx->scenechange_threshold < 1000000000 &&
559         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
560         av_log(avctx, AV_LOG_ERROR,
561                "closed gop with scene change detection are not supported yet, "
562                "set threshold to 1000000000\n");
563         return -1;
564     }
565
566     if (s->flags & CODEC_FLAG_LOW_DELAY) {
567         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
568             av_log(avctx, AV_LOG_ERROR,
569                   "low delay forcing is only available for mpeg2\n");
570             return -1;
571         }
572         if (s->max_b_frames != 0) {
573             av_log(avctx, AV_LOG_ERROR,
574                    "b frames cannot be used with low delay\n");
575             return -1;
576         }
577     }
578
579     if (s->q_scale_type == 1) {
580         if (avctx->qmax > 12) {
581             av_log(avctx, AV_LOG_ERROR,
582                    "non linear quant only supports qmax <= 12 currently\n");
583             return -1;
584         }
585     }
586
587     if (s->avctx->thread_count > 1         &&
588         s->codec_id != AV_CODEC_ID_MPEG4      &&
589         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
590         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
591         s->codec_id != AV_CODEC_ID_MJPEG      &&
592         (s->codec_id != AV_CODEC_ID_H263P)) {
593         av_log(avctx, AV_LOG_ERROR,
594                "multi threaded encoding not supported by codec\n");
595         return -1;
596     }
597
598     if (s->avctx->thread_count < 1) {
599         av_log(avctx, AV_LOG_ERROR,
600                "automatic thread number detection not supported by codec, "
601                "patch welcome\n");
602         return -1;
603     }
604
605     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
606         s->rtp_mode = 1;
607
608     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
609         s->h263_slice_structured = 1;
610
611     if (!avctx->time_base.den || !avctx->time_base.num) {
612         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
613         return -1;
614     }
615
616     i = (INT_MAX / 2 + 128) >> 8;
617     if (avctx->mb_threshold >= i) {
618         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
619                i - 1);
620         return -1;
621     }
622
623     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
624         av_log(avctx, AV_LOG_INFO,
625                "notice: b_frame_strategy only affects the first pass\n");
626         avctx->b_frame_strategy = 0;
627     }
628
629     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
630     if (i > 1) {
631         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
632         avctx->time_base.den /= i;
633         avctx->time_base.num /= i;
634         //return -1;
635     }
636
637     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
638         // (a + x * 3 / 8) / x
639         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
640         s->inter_quant_bias = 0;
641     } else {
642         s->intra_quant_bias = 0;
643         // (a - x / 4) / x
644         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
645     }
646
647     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
648         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
649         return AVERROR(EINVAL);
650     }
651
652     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
653         s->intra_quant_bias = avctx->intra_quant_bias;
654     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
655         s->inter_quant_bias = avctx->inter_quant_bias;
656
657     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
658
659     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
660         s->avctx->time_base.den > (1 << 16) - 1) {
661         av_log(avctx, AV_LOG_ERROR,
662                "timebase %d/%d not supported by MPEG 4 standard, "
663                "the maximum admitted value for the timebase denominator "
664                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
665                (1 << 16) - 1);
666         return -1;
667     }
668     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
669
670     switch (avctx->codec->id) {
671     case AV_CODEC_ID_MPEG1VIDEO:
672         s->out_format = FMT_MPEG1;
673         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
674         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
675         break;
676     case AV_CODEC_ID_MPEG2VIDEO:
677         s->out_format = FMT_MPEG1;
678         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
679         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
680         s->rtp_mode   = 1;
681         break;
682     case AV_CODEC_ID_MJPEG:
683     case AV_CODEC_ID_AMV:
684         s->out_format = FMT_MJPEG;
685         s->intra_only = 1; /* force intra only for jpeg */
686         if (!CONFIG_MJPEG_ENCODER ||
687             ff_mjpeg_encode_init(s) < 0)
688             return -1;
689         avctx->delay = 0;
690         s->low_delay = 1;
691         break;
692     case AV_CODEC_ID_H261:
693         if (!CONFIG_H261_ENCODER)
694             return -1;
695         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
696             av_log(avctx, AV_LOG_ERROR,
697                    "The specified picture size of %dx%d is not valid for the "
698                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
699                     s->width, s->height);
700             return -1;
701         }
702         s->out_format = FMT_H261;
703         avctx->delay  = 0;
704         s->low_delay  = 1;
705         break;
706     case AV_CODEC_ID_H263:
707         if (!CONFIG_H263_ENCODER)
708             return -1;
709         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
710                              s->width, s->height) == 8) {
711             av_log(avctx, AV_LOG_ERROR,
712                    "The specified picture size of %dx%d is not valid for "
713                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
714                    "352x288, 704x576, and 1408x1152. "
715                    "Try H.263+.\n", s->width, s->height);
716             return -1;
717         }
718         s->out_format = FMT_H263;
719         avctx->delay  = 0;
720         s->low_delay  = 1;
721         break;
722     case AV_CODEC_ID_H263P:
723         s->out_format = FMT_H263;
724         s->h263_plus  = 1;
725         /* Fx */
726         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
727         s->modified_quant  = s->h263_aic;
728         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
729         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
730
731         /* /Fx */
732         /* These are just to be sure */
733         avctx->delay = 0;
734         s->low_delay = 1;
735         break;
736     case AV_CODEC_ID_FLV1:
737         s->out_format      = FMT_H263;
738         s->h263_flv        = 2; /* format = 1; 11-bit codes */
739         s->unrestricted_mv = 1;
740         s->rtp_mode  = 0; /* don't allow GOB */
741         avctx->delay = 0;
742         s->low_delay = 1;
743         break;
744     case AV_CODEC_ID_RV10:
745         s->out_format = FMT_H263;
746         avctx->delay  = 0;
747         s->low_delay  = 1;
748         break;
749     case AV_CODEC_ID_RV20:
750         s->out_format      = FMT_H263;
751         avctx->delay       = 0;
752         s->low_delay       = 1;
753         s->modified_quant  = 1;
754         s->h263_aic        = 1;
755         s->h263_plus       = 1;
756         s->loop_filter     = 1;
757         s->unrestricted_mv = 0;
758         break;
759     case AV_CODEC_ID_MPEG4:
760         s->out_format      = FMT_H263;
761         s->h263_pred       = 1;
762         s->unrestricted_mv = 1;
763         s->low_delay       = s->max_b_frames ? 0 : 1;
764         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
765         break;
766     case AV_CODEC_ID_MSMPEG4V2:
767         s->out_format      = FMT_H263;
768         s->h263_pred       = 1;
769         s->unrestricted_mv = 1;
770         s->msmpeg4_version = 2;
771         avctx->delay       = 0;
772         s->low_delay       = 1;
773         break;
774     case AV_CODEC_ID_MSMPEG4V3:
775         s->out_format        = FMT_H263;
776         s->h263_pred         = 1;
777         s->unrestricted_mv   = 1;
778         s->msmpeg4_version   = 3;
779         s->flipflop_rounding = 1;
780         avctx->delay         = 0;
781         s->low_delay         = 1;
782         break;
783     case AV_CODEC_ID_WMV1:
784         s->out_format        = FMT_H263;
785         s->h263_pred         = 1;
786         s->unrestricted_mv   = 1;
787         s->msmpeg4_version   = 4;
788         s->flipflop_rounding = 1;
789         avctx->delay         = 0;
790         s->low_delay         = 1;
791         break;
792     case AV_CODEC_ID_WMV2:
793         s->out_format        = FMT_H263;
794         s->h263_pred         = 1;
795         s->unrestricted_mv   = 1;
796         s->msmpeg4_version   = 5;
797         s->flipflop_rounding = 1;
798         avctx->delay         = 0;
799         s->low_delay         = 1;
800         break;
801     default:
802         return -1;
803     }
804
805     avctx->has_b_frames = !s->low_delay;
806
807     s->encoding = 1;
808
809     s->progressive_frame    =
810     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
811                                                 CODEC_FLAG_INTERLACED_ME) ||
812                                 s->alternate_scan);
813
814     /* init */
815     if (ff_MPV_common_init(s) < 0)
816         return -1;
817
818     ff_qpeldsp_init(&s->qdsp);
819
820     s->avctx->coded_frame = s->current_picture.f;
821
822     if (s->msmpeg4_version) {
823         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
824                           2 * 2 * (MAX_LEVEL + 1) *
825                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
826     }
827     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
828
829     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
830     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
831     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
832     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
833     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
834     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
835     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
836                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
837     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
838                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
839
840     if (s->avctx->noise_reduction) {
841         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
842                           2 * 64 * sizeof(uint16_t), fail);
843     }
844
845     ff_dct_encode_init(s);
846
847     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
848         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
849
850     s->quant_precision = 5;
851
852     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
853     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
854
855     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
856         ff_h261_encode_init(s);
857     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
858         ff_h263_encode_init(s);
859     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
860         ff_msmpeg4_encode_init(s);
861     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
862         && s->out_format == FMT_MPEG1)
863         ff_mpeg1_encode_init(s);
864
865     /* init q matrix */
866     for (i = 0; i < 64; i++) {
867         int j = s->dsp.idct_permutation[i];
868         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
869             s->mpeg_quant) {
870             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
871             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
872         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
873             s->intra_matrix[j] =
874             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
875         } else {
876             /* mpeg1/2 */
877             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
878             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
879         }
880         if (s->avctx->intra_matrix)
881             s->intra_matrix[j] = s->avctx->intra_matrix[i];
882         if (s->avctx->inter_matrix)
883             s->inter_matrix[j] = s->avctx->inter_matrix[i];
884     }
885
886     /* precompute matrix */
887     /* for mjpeg, we do include qscale in the matrix */
888     if (s->out_format != FMT_MJPEG) {
889         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
890                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
891                           31, 1);
892         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
893                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
894                           31, 0);
895     }
896
897     if (ff_rate_control_init(s) < 0)
898         return -1;
899
900 #if FF_API_ERROR_RATE
901     FF_DISABLE_DEPRECATION_WARNINGS
902     if (avctx->error_rate)
903         s->error_rate = avctx->error_rate;
904     FF_ENABLE_DEPRECATION_WARNINGS;
905 #endif
906
907 #if FF_API_NORMALIZE_AQP
908     FF_DISABLE_DEPRECATION_WARNINGS
909     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
910         s->mpv_flags |= FF_MPV_FLAG_NAQ;
911     FF_ENABLE_DEPRECATION_WARNINGS;
912 #endif
913
914 #if FF_API_MV0
915     FF_DISABLE_DEPRECATION_WARNINGS
916     if (avctx->flags & CODEC_FLAG_MV0)
917         s->mpv_flags |= FF_MPV_FLAG_MV0;
918     FF_ENABLE_DEPRECATION_WARNINGS
919 #endif
920
921     if (avctx->b_frame_strategy == 2) {
922         for (i = 0; i < s->max_b_frames + 2; i++) {
923             s->tmp_frames[i] = av_frame_alloc();
924             if (!s->tmp_frames[i])
925                 return AVERROR(ENOMEM);
926
927             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
928             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
929             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
930
931             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
932             if (ret < 0)
933                 return ret;
934         }
935     }
936
937     return 0;
938 fail:
939     ff_MPV_encode_end(avctx);
940     return AVERROR_UNKNOWN;
941 }
942
943 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
944 {
945     MpegEncContext *s = avctx->priv_data;
946     int i;
947
948     ff_rate_control_uninit(s);
949
950     ff_MPV_common_end(s);
951     if (CONFIG_MJPEG_ENCODER &&
952         s->out_format == FMT_MJPEG)
953         ff_mjpeg_encode_close(s);
954
955     av_freep(&avctx->extradata);
956
957     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
958         av_frame_free(&s->tmp_frames[i]);
959
960     ff_free_picture_tables(&s->new_picture);
961     ff_mpeg_unref_picture(s, &s->new_picture);
962
963     av_freep(&s->avctx->stats_out);
964     av_freep(&s->ac_stats);
965
966     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
967     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
968     s->q_chroma_intra_matrix=   NULL;
969     s->q_chroma_intra_matrix16= NULL;
970     av_freep(&s->q_intra_matrix);
971     av_freep(&s->q_inter_matrix);
972     av_freep(&s->q_intra_matrix16);
973     av_freep(&s->q_inter_matrix16);
974     av_freep(&s->input_picture);
975     av_freep(&s->reordered_input_picture);
976     av_freep(&s->dct_offset);
977
978     return 0;
979 }
980
981 static int get_sae(uint8_t *src, int ref, int stride)
982 {
983     int x,y;
984     int acc = 0;
985
986     for (y = 0; y < 16; y++) {
987         for (x = 0; x < 16; x++) {
988             acc += FFABS(src[x + y * stride] - ref);
989         }
990     }
991
992     return acc;
993 }
994
995 static int get_intra_count(MpegEncContext *s, uint8_t *src,
996                            uint8_t *ref, int stride)
997 {
998     int x, y, w, h;
999     int acc = 0;
1000
1001     w = s->width  & ~15;
1002     h = s->height & ~15;
1003
1004     for (y = 0; y < h; y += 16) {
1005         for (x = 0; x < w; x += 16) {
1006             int offset = x + y * stride;
1007             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
1008                                      16);
1009             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
1010             int sae  = get_sae(src + offset, mean, stride);
1011
1012             acc += sae + 500 < sad;
1013         }
1014     }
1015     return acc;
1016 }
1017
1018
1019 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1020 {
1021     Picture *pic = NULL;
1022     int64_t pts;
1023     int i, display_picture_number = 0, ret;
1024     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1025                                                  (s->low_delay ? 0 : 1);
1026     int direct = 1;
1027
1028     if (pic_arg) {
1029         pts = pic_arg->pts;
1030         display_picture_number = s->input_picture_number++;
1031
1032         if (pts != AV_NOPTS_VALUE) {
1033             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1034                 int64_t last = s->user_specified_pts;
1035
1036                 if (pts <= last) {
1037                     av_log(s->avctx, AV_LOG_ERROR,
1038                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1039                            pts, last);
1040                     return AVERROR(EINVAL);
1041                 }
1042
1043                 if (!s->low_delay && display_picture_number == 1)
1044                     s->dts_delta = pts - last;
1045             }
1046             s->user_specified_pts = pts;
1047         } else {
1048             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1049                 s->user_specified_pts =
1050                 pts = s->user_specified_pts + 1;
1051                 av_log(s->avctx, AV_LOG_INFO,
1052                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1053                        pts);
1054             } else {
1055                 pts = display_picture_number;
1056             }
1057         }
1058     }
1059
1060     if (pic_arg) {
1061         if (!pic_arg->buf[0])
1062             direct = 0;
1063         if (pic_arg->linesize[0] != s->linesize)
1064             direct = 0;
1065         if (pic_arg->linesize[1] != s->uvlinesize)
1066             direct = 0;
1067         if (pic_arg->linesize[2] != s->uvlinesize)
1068             direct = 0;
1069         if ((s->width & 15) || (s->height & 15))
1070             direct = 0;
1071         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1072             direct = 0;
1073         if (s->linesize & (STRIDE_ALIGN-1))
1074             direct = 0;
1075
1076         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1077                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1078
1079         if (direct) {
1080             i = ff_find_unused_picture(s, 1);
1081             if (i < 0)
1082                 return i;
1083
1084             pic = &s->picture[i];
1085             pic->reference = 3;
1086
1087             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1088                 return ret;
1089             if (ff_alloc_picture(s, pic, 1) < 0) {
1090                 return -1;
1091             }
1092         } else {
1093             i = ff_find_unused_picture(s, 0);
1094             if (i < 0)
1095                 return i;
1096
1097             pic = &s->picture[i];
1098             pic->reference = 3;
1099
1100             if (ff_alloc_picture(s, pic, 0) < 0) {
1101                 return -1;
1102             }
1103
1104             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1105                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1106                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1107                 // empty
1108             } else {
1109                 int h_chroma_shift, v_chroma_shift;
1110                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1111                                                  &h_chroma_shift,
1112                                                  &v_chroma_shift);
1113
1114                 for (i = 0; i < 3; i++) {
1115                     int src_stride = pic_arg->linesize[i];
1116                     int dst_stride = i ? s->uvlinesize : s->linesize;
1117                     int h_shift = i ? h_chroma_shift : 0;
1118                     int v_shift = i ? v_chroma_shift : 0;
1119                     int w = s->width  >> h_shift;
1120                     int h = s->height >> v_shift;
1121                     uint8_t *src = pic_arg->data[i];
1122                     uint8_t *dst = pic->f->data[i];
1123                     int vpad = 16;
1124
1125                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1126                         && !s->progressive_sequence)
1127                         vpad = 32;
1128
1129                     if (!s->avctx->rc_buffer_size)
1130                         dst += INPLACE_OFFSET;
1131
1132                     if (src_stride == dst_stride)
1133                         memcpy(dst, src, src_stride * h);
1134                     else {
1135                         int h2 = h;
1136                         uint8_t *dst2 = dst;
1137                         while (h2--) {
1138                             memcpy(dst2, src, w);
1139                             dst2 += dst_stride;
1140                             src += src_stride;
1141                         }
1142                     }
1143                     if ((s->width & 15) || (s->height & (vpad-1))) {
1144                         s->dsp.draw_edges(dst, dst_stride,
1145                                           w, h,
1146                                           16>>h_shift,
1147                                           vpad>>v_shift,
1148                                           EDGE_BOTTOM);
1149                     }
1150                 }
1151             }
1152         }
1153         ret = av_frame_copy_props(pic->f, pic_arg);
1154         if (ret < 0)
1155             return ret;
1156
1157         pic->f->display_picture_number = display_picture_number;
1158         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1159     }
1160
1161     /* shift buffer entries */
1162     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1163         s->input_picture[i - 1] = s->input_picture[i];
1164
1165     s->input_picture[encoding_delay] = (Picture*) pic;
1166
1167     return 0;
1168 }
1169
1170 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1171 {
1172     int x, y, plane;
1173     int score = 0;
1174     int64_t score64 = 0;
1175
1176     for (plane = 0; plane < 3; plane++) {
1177         const int stride = p->f->linesize[plane];
1178         const int bw = plane ? 1 : 2;
1179         for (y = 0; y < s->mb_height * bw; y++) {
1180             for (x = 0; x < s->mb_width * bw; x++) {
1181                 int off = p->shared ? 0 : 16;
1182                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1183                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1184                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1185
1186                 switch (FFABS(s->avctx->frame_skip_exp)) {
1187                 case 0: score    =  FFMAX(score, v);          break;
1188                 case 1: score   += FFABS(v);                  break;
1189                 case 2: score64 += v * (int64_t)v;                       break;
1190                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1191                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1192                 }
1193             }
1194         }
1195     }
1196     emms_c();
1197
1198     if (score)
1199         score64 = score;
1200     if (s->avctx->frame_skip_exp < 0)
1201         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1202                       -1.0/s->avctx->frame_skip_exp);
1203
1204     if (score64 < s->avctx->frame_skip_threshold)
1205         return 1;
1206     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1207         return 1;
1208     return 0;
1209 }
1210
1211 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1212 {
1213     AVPacket pkt = { 0 };
1214     int ret, got_output;
1215
1216     av_init_packet(&pkt);
1217     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1218     if (ret < 0)
1219         return ret;
1220
1221     ret = pkt.size;
1222     av_free_packet(&pkt);
1223     return ret;
1224 }
1225
1226 static int estimate_best_b_count(MpegEncContext *s)
1227 {
1228     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1229     AVCodecContext *c = avcodec_alloc_context3(NULL);
1230     const int scale = s->avctx->brd_scale;
1231     int i, j, out_size, p_lambda, b_lambda, lambda2;
1232     int64_t best_rd  = INT64_MAX;
1233     int best_b_count = -1;
1234
1235     av_assert0(scale >= 0 && scale <= 3);
1236
1237     //emms_c();
1238     //s->next_picture_ptr->quality;
1239     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1240     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1241     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1242     if (!b_lambda) // FIXME we should do this somewhere else
1243         b_lambda = p_lambda;
1244     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1245                FF_LAMBDA_SHIFT;
1246
1247     c->width        = s->width  >> scale;
1248     c->height       = s->height >> scale;
1249     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1250     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1251     c->mb_decision  = s->avctx->mb_decision;
1252     c->me_cmp       = s->avctx->me_cmp;
1253     c->mb_cmp       = s->avctx->mb_cmp;
1254     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1255     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1256     c->time_base    = s->avctx->time_base;
1257     c->max_b_frames = s->max_b_frames;
1258
1259     if (avcodec_open2(c, codec, NULL) < 0)
1260         return -1;
1261
1262     for (i = 0; i < s->max_b_frames + 2; i++) {
1263         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1264                                                 s->next_picture_ptr;
1265
1266         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1267             pre_input = *pre_input_ptr;
1268
1269             if (!pre_input.shared && i) {
1270                 pre_input.f->data[0] += INPLACE_OFFSET;
1271                 pre_input.f->data[1] += INPLACE_OFFSET;
1272                 pre_input.f->data[2] += INPLACE_OFFSET;
1273             }
1274
1275             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1276                                  pre_input.f->data[0], pre_input.f->linesize[0],
1277                                  c->width,      c->height);
1278             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1279                                  pre_input.f->data[1], pre_input.f->linesize[1],
1280                                  c->width >> 1, c->height >> 1);
1281             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1282                                  pre_input.f->data[2], pre_input.f->linesize[2],
1283                                  c->width >> 1, c->height >> 1);
1284         }
1285     }
1286
1287     for (j = 0; j < s->max_b_frames + 1; j++) {
1288         int64_t rd = 0;
1289
1290         if (!s->input_picture[j])
1291             break;
1292
1293         c->error[0] = c->error[1] = c->error[2] = 0;
1294
1295         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1296         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1297
1298         out_size = encode_frame(c, s->tmp_frames[0]);
1299
1300         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1301
1302         for (i = 0; i < s->max_b_frames + 1; i++) {
1303             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1304
1305             s->tmp_frames[i + 1]->pict_type = is_p ?
1306                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1307             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1308
1309             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1310
1311             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1312         }
1313
1314         /* get the delayed frames */
1315         while (out_size) {
1316             out_size = encode_frame(c, NULL);
1317             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1318         }
1319
1320         rd += c->error[0] + c->error[1] + c->error[2];
1321
1322         if (rd < best_rd) {
1323             best_rd = rd;
1324             best_b_count = j;
1325         }
1326     }
1327
1328     avcodec_close(c);
1329     av_freep(&c);
1330
1331     return best_b_count;
1332 }
1333
1334 static int select_input_picture(MpegEncContext *s)
1335 {
1336     int i, ret;
1337
1338     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1339         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1340     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1341
1342     /* set next picture type & ordering */
1343     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1344         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1345             if (s->picture_in_gop_number < s->gop_size &&
1346                 s->next_picture_ptr &&
1347                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1348                 // FIXME check that te gop check above is +-1 correct
1349                 av_frame_unref(s->input_picture[0]->f);
1350
1351                 ff_vbv_update(s, 0);
1352
1353                 goto no_output_pic;
1354             }
1355         }
1356
1357         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1358             s->next_picture_ptr == NULL || s->intra_only) {
1359             s->reordered_input_picture[0] = s->input_picture[0];
1360             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1361             s->reordered_input_picture[0]->f->coded_picture_number =
1362                 s->coded_picture_number++;
1363         } else {
1364             int b_frames;
1365
1366             if (s->flags & CODEC_FLAG_PASS2) {
1367                 for (i = 0; i < s->max_b_frames + 1; i++) {
1368                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1369
1370                     if (pict_num >= s->rc_context.num_entries)
1371                         break;
1372                     if (!s->input_picture[i]) {
1373                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1374                         break;
1375                     }
1376
1377                     s->input_picture[i]->f->pict_type =
1378                         s->rc_context.entry[pict_num].new_pict_type;
1379                 }
1380             }
1381
1382             if (s->avctx->b_frame_strategy == 0) {
1383                 b_frames = s->max_b_frames;
1384                 while (b_frames && !s->input_picture[b_frames])
1385                     b_frames--;
1386             } else if (s->avctx->b_frame_strategy == 1) {
1387                 for (i = 1; i < s->max_b_frames + 1; i++) {
1388                     if (s->input_picture[i] &&
1389                         s->input_picture[i]->b_frame_score == 0) {
1390                         s->input_picture[i]->b_frame_score =
1391                             get_intra_count(s,
1392                                             s->input_picture[i    ]->f->data[0],
1393                                             s->input_picture[i - 1]->f->data[0],
1394                                             s->linesize) + 1;
1395                     }
1396                 }
1397                 for (i = 0; i < s->max_b_frames + 1; i++) {
1398                     if (s->input_picture[i] == NULL ||
1399                         s->input_picture[i]->b_frame_score - 1 >
1400                             s->mb_num / s->avctx->b_sensitivity)
1401                         break;
1402                 }
1403
1404                 b_frames = FFMAX(0, i - 1);
1405
1406                 /* reset scores */
1407                 for (i = 0; i < b_frames + 1; i++) {
1408                     s->input_picture[i]->b_frame_score = 0;
1409                 }
1410             } else if (s->avctx->b_frame_strategy == 2) {
1411                 b_frames = estimate_best_b_count(s);
1412             } else {
1413                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1414                 b_frames = 0;
1415             }
1416
1417             emms_c();
1418
1419             for (i = b_frames - 1; i >= 0; i--) {
1420                 int type = s->input_picture[i]->f->pict_type;
1421                 if (type && type != AV_PICTURE_TYPE_B)
1422                     b_frames = i;
1423             }
1424             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1425                 b_frames == s->max_b_frames) {
1426                 av_log(s->avctx, AV_LOG_ERROR,
1427                        "warning, too many b frames in a row\n");
1428             }
1429
1430             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1431                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1432                     s->gop_size > s->picture_in_gop_number) {
1433                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1434                 } else {
1435                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1436                         b_frames = 0;
1437                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1438                 }
1439             }
1440
1441             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1442                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1443                 b_frames--;
1444
1445             s->reordered_input_picture[0] = s->input_picture[b_frames];
1446             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1447                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1448             s->reordered_input_picture[0]->f->coded_picture_number =
1449                 s->coded_picture_number++;
1450             for (i = 0; i < b_frames; i++) {
1451                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1452                 s->reordered_input_picture[i + 1]->f->pict_type =
1453                     AV_PICTURE_TYPE_B;
1454                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1455                     s->coded_picture_number++;
1456             }
1457         }
1458     }
1459 no_output_pic:
1460     if (s->reordered_input_picture[0]) {
1461         s->reordered_input_picture[0]->reference =
1462            s->reordered_input_picture[0]->f->pict_type !=
1463                AV_PICTURE_TYPE_B ? 3 : 0;
1464
1465         ff_mpeg_unref_picture(s, &s->new_picture);
1466         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1467             return ret;
1468
1469         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1470             // input is a shared pix, so we can't modifiy it -> alloc a new
1471             // one & ensure that the shared one is reuseable
1472
1473             Picture *pic;
1474             int i = ff_find_unused_picture(s, 0);
1475             if (i < 0)
1476                 return i;
1477             pic = &s->picture[i];
1478
1479             pic->reference = s->reordered_input_picture[0]->reference;
1480             if (ff_alloc_picture(s, pic, 0) < 0) {
1481                 return -1;
1482             }
1483
1484             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1485             if (ret < 0)
1486                 return ret;
1487
1488             /* mark us unused / free shared pic */
1489             av_frame_unref(s->reordered_input_picture[0]->f);
1490             s->reordered_input_picture[0]->shared = 0;
1491
1492             s->current_picture_ptr = pic;
1493         } else {
1494             // input is not a shared pix -> reuse buffer for current_pix
1495             s->current_picture_ptr = s->reordered_input_picture[0];
1496             for (i = 0; i < 4; i++) {
1497                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1498             }
1499         }
1500         ff_mpeg_unref_picture(s, &s->current_picture);
1501         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1502                                        s->current_picture_ptr)) < 0)
1503             return ret;
1504
1505         s->picture_number = s->new_picture.f->display_picture_number;
1506     } else {
1507         ff_mpeg_unref_picture(s, &s->new_picture);
1508     }
1509     return 0;
1510 }
1511
1512 static void frame_end(MpegEncContext *s)
1513 {
1514     if (s->unrestricted_mv &&
1515         s->current_picture.reference &&
1516         !s->intra_only) {
1517         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1518         int hshift = desc->log2_chroma_w;
1519         int vshift = desc->log2_chroma_h;
1520         s->dsp.draw_edges(s->current_picture.f->data[0], s->current_picture.f->linesize[0],
1521                           s->h_edge_pos, s->v_edge_pos,
1522                           EDGE_WIDTH, EDGE_WIDTH,
1523                           EDGE_TOP | EDGE_BOTTOM);
1524         s->dsp.draw_edges(s->current_picture.f->data[1], s->current_picture.f->linesize[1],
1525                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1526                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1527                           EDGE_TOP | EDGE_BOTTOM);
1528         s->dsp.draw_edges(s->current_picture.f->data[2], s->current_picture.f->linesize[2],
1529                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1530                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1531                           EDGE_TOP | EDGE_BOTTOM);
1532     }
1533
1534     emms_c();
1535
1536     s->last_pict_type                 = s->pict_type;
1537     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1538     if (s->pict_type!= AV_PICTURE_TYPE_B)
1539         s->last_non_b_pict_type = s->pict_type;
1540
1541     s->avctx->coded_frame = s->current_picture_ptr->f;
1542
1543 }
1544
1545 static void update_noise_reduction(MpegEncContext *s)
1546 {
1547     int intra, i;
1548
1549     for (intra = 0; intra < 2; intra++) {
1550         if (s->dct_count[intra] > (1 << 16)) {
1551             for (i = 0; i < 64; i++) {
1552                 s->dct_error_sum[intra][i] >>= 1;
1553             }
1554             s->dct_count[intra] >>= 1;
1555         }
1556
1557         for (i = 0; i < 64; i++) {
1558             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1559                                        s->dct_count[intra] +
1560                                        s->dct_error_sum[intra][i] / 2) /
1561                                       (s->dct_error_sum[intra][i] + 1);
1562         }
1563     }
1564 }
1565
1566 static int frame_start(MpegEncContext *s)
1567 {
1568     int ret;
1569
1570     /* mark & release old frames */
1571     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1572         s->last_picture_ptr != s->next_picture_ptr &&
1573         s->last_picture_ptr->f->buf[0]) {
1574         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1575     }
1576
1577     s->current_picture_ptr->f->pict_type = s->pict_type;
1578     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1579
1580     ff_mpeg_unref_picture(s, &s->current_picture);
1581     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1582                                    s->current_picture_ptr)) < 0)
1583         return ret;
1584
1585     if (s->pict_type != AV_PICTURE_TYPE_B) {
1586         s->last_picture_ptr = s->next_picture_ptr;
1587         if (!s->droppable)
1588             s->next_picture_ptr = s->current_picture_ptr;
1589     }
1590
1591     if (s->last_picture_ptr) {
1592         ff_mpeg_unref_picture(s, &s->last_picture);
1593         if (s->last_picture_ptr->f->buf[0] &&
1594             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1595                                        s->last_picture_ptr)) < 0)
1596             return ret;
1597     }
1598     if (s->next_picture_ptr) {
1599         ff_mpeg_unref_picture(s, &s->next_picture);
1600         if (s->next_picture_ptr->f->buf[0] &&
1601             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1602                                        s->next_picture_ptr)) < 0)
1603             return ret;
1604     }
1605
1606     if (s->picture_structure!= PICT_FRAME) {
1607         int i;
1608         for (i = 0; i < 4; i++) {
1609             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1610                 s->current_picture.f->data[i] +=
1611                     s->current_picture.f->linesize[i];
1612             }
1613             s->current_picture.f->linesize[i] *= 2;
1614             s->last_picture.f->linesize[i]    *= 2;
1615             s->next_picture.f->linesize[i]    *= 2;
1616         }
1617     }
1618
1619     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1620         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1621         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1622     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1623         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1624         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1625     } else {
1626         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1627         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1628     }
1629
1630     if (s->dct_error_sum) {
1631         av_assert2(s->avctx->noise_reduction && s->encoding);
1632         update_noise_reduction(s);
1633     }
1634
1635     return 0;
1636 }
1637
1638 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1639                           const AVFrame *pic_arg, int *got_packet)
1640 {
1641     MpegEncContext *s = avctx->priv_data;
1642     int i, stuffing_count, ret;
1643     int context_count = s->slice_context_count;
1644
1645     s->picture_in_gop_number++;
1646
1647     if (load_input_picture(s, pic_arg) < 0)
1648         return -1;
1649
1650     if (select_input_picture(s) < 0) {
1651         return -1;
1652     }
1653
1654     /* output? */
1655     if (s->new_picture.f->data[0]) {
1656         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1657             return ret;
1658         if (s->mb_info) {
1659             s->mb_info_ptr = av_packet_new_side_data(pkt,
1660                                  AV_PKT_DATA_H263_MB_INFO,
1661                                  s->mb_width*s->mb_height*12);
1662             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1663         }
1664
1665         for (i = 0; i < context_count; i++) {
1666             int start_y = s->thread_context[i]->start_mb_y;
1667             int   end_y = s->thread_context[i]->  end_mb_y;
1668             int h       = s->mb_height;
1669             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1670             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1671
1672             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1673         }
1674
1675         s->pict_type = s->new_picture.f->pict_type;
1676         //emms_c();
1677         ret = frame_start(s);
1678         if (ret < 0)
1679             return ret;
1680 vbv_retry:
1681         if (encode_picture(s, s->picture_number) < 0)
1682             return -1;
1683
1684         avctx->header_bits = s->header_bits;
1685         avctx->mv_bits     = s->mv_bits;
1686         avctx->misc_bits   = s->misc_bits;
1687         avctx->i_tex_bits  = s->i_tex_bits;
1688         avctx->p_tex_bits  = s->p_tex_bits;
1689         avctx->i_count     = s->i_count;
1690         // FIXME f/b_count in avctx
1691         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1692         avctx->skip_count  = s->skip_count;
1693
1694         frame_end(s);
1695
1696         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1697             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1698
1699         if (avctx->rc_buffer_size) {
1700             RateControlContext *rcc = &s->rc_context;
1701             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1702
1703             if (put_bits_count(&s->pb) > max_size &&
1704                 s->lambda < s->avctx->lmax) {
1705                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1706                                        (s->qscale + 1) / s->qscale);
1707                 if (s->adaptive_quant) {
1708                     int i;
1709                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1710                         s->lambda_table[i] =
1711                             FFMAX(s->lambda_table[i] + 1,
1712                                   s->lambda_table[i] * (s->qscale + 1) /
1713                                   s->qscale);
1714                 }
1715                 s->mb_skipped = 0;        // done in frame_start()
1716                 // done in encode_picture() so we must undo it
1717                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1718                     if (s->flipflop_rounding          ||
1719                         s->codec_id == AV_CODEC_ID_H263P ||
1720                         s->codec_id == AV_CODEC_ID_MPEG4)
1721                         s->no_rounding ^= 1;
1722                 }
1723                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1724                     s->time_base       = s->last_time_base;
1725                     s->last_non_b_time = s->time - s->pp_time;
1726                 }
1727                 for (i = 0; i < context_count; i++) {
1728                     PutBitContext *pb = &s->thread_context[i]->pb;
1729                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1730                 }
1731                 goto vbv_retry;
1732             }
1733
1734             av_assert0(s->avctx->rc_max_rate);
1735         }
1736
1737         if (s->flags & CODEC_FLAG_PASS1)
1738             ff_write_pass1_stats(s);
1739
1740         for (i = 0; i < 4; i++) {
1741             s->current_picture_ptr->f->error[i] =
1742             s->current_picture.f->error[i] =
1743                 s->current_picture.error[i];
1744             avctx->error[i] += s->current_picture_ptr->f->error[i];
1745         }
1746
1747         if (s->flags & CODEC_FLAG_PASS1)
1748             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1749                    avctx->i_tex_bits + avctx->p_tex_bits ==
1750                        put_bits_count(&s->pb));
1751         flush_put_bits(&s->pb);
1752         s->frame_bits  = put_bits_count(&s->pb);
1753
1754         stuffing_count = ff_vbv_update(s, s->frame_bits);
1755         s->stuffing_bits = 8*stuffing_count;
1756         if (stuffing_count) {
1757             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1758                     stuffing_count + 50) {
1759                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1760                 return -1;
1761             }
1762
1763             switch (s->codec_id) {
1764             case AV_CODEC_ID_MPEG1VIDEO:
1765             case AV_CODEC_ID_MPEG2VIDEO:
1766                 while (stuffing_count--) {
1767                     put_bits(&s->pb, 8, 0);
1768                 }
1769             break;
1770             case AV_CODEC_ID_MPEG4:
1771                 put_bits(&s->pb, 16, 0);
1772                 put_bits(&s->pb, 16, 0x1C3);
1773                 stuffing_count -= 4;
1774                 while (stuffing_count--) {
1775                     put_bits(&s->pb, 8, 0xFF);
1776                 }
1777             break;
1778             default:
1779                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1780             }
1781             flush_put_bits(&s->pb);
1782             s->frame_bits  = put_bits_count(&s->pb);
1783         }
1784
1785         /* update mpeg1/2 vbv_delay for CBR */
1786         if (s->avctx->rc_max_rate                          &&
1787             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1788             s->out_format == FMT_MPEG1                     &&
1789             90000LL * (avctx->rc_buffer_size - 1) <=
1790                 s->avctx->rc_max_rate * 0xFFFFLL) {
1791             int vbv_delay, min_delay;
1792             double inbits  = s->avctx->rc_max_rate *
1793                              av_q2d(s->avctx->time_base);
1794             int    minbits = s->frame_bits - 8 *
1795                              (s->vbv_delay_ptr - s->pb.buf - 1);
1796             double bits    = s->rc_context.buffer_index + minbits - inbits;
1797
1798             if (bits < 0)
1799                 av_log(s->avctx, AV_LOG_ERROR,
1800                        "Internal error, negative bits\n");
1801
1802             assert(s->repeat_first_field == 0);
1803
1804             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1805             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1806                         s->avctx->rc_max_rate;
1807
1808             vbv_delay = FFMAX(vbv_delay, min_delay);
1809
1810             av_assert0(vbv_delay < 0xFFFF);
1811
1812             s->vbv_delay_ptr[0] &= 0xF8;
1813             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1814             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1815             s->vbv_delay_ptr[2] &= 0x07;
1816             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1817             avctx->vbv_delay     = vbv_delay * 300;
1818         }
1819         s->total_bits     += s->frame_bits;
1820         avctx->frame_bits  = s->frame_bits;
1821
1822         pkt->pts = s->current_picture.f->pts;
1823         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1824             if (!s->current_picture.f->coded_picture_number)
1825                 pkt->dts = pkt->pts - s->dts_delta;
1826             else
1827                 pkt->dts = s->reordered_pts;
1828             s->reordered_pts = pkt->pts;
1829         } else
1830             pkt->dts = pkt->pts;
1831         if (s->current_picture.f->key_frame)
1832             pkt->flags |= AV_PKT_FLAG_KEY;
1833         if (s->mb_info)
1834             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1835     } else {
1836         s->frame_bits = 0;
1837     }
1838
1839     /* release non-reference frames */
1840     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1841         if (!s->picture[i].reference)
1842             ff_mpeg_unref_picture(s, &s->picture[i]);
1843     }
1844
1845     av_assert1((s->frame_bits & 7) == 0);
1846
1847     pkt->size = s->frame_bits / 8;
1848     *got_packet = !!pkt->size;
1849     return 0;
1850 }
1851
1852 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1853                                                 int n, int threshold)
1854 {
1855     static const char tab[64] = {
1856         3, 2, 2, 1, 1, 1, 1, 1,
1857         1, 1, 1, 1, 1, 1, 1, 1,
1858         1, 1, 1, 1, 1, 1, 1, 1,
1859         0, 0, 0, 0, 0, 0, 0, 0,
1860         0, 0, 0, 0, 0, 0, 0, 0,
1861         0, 0, 0, 0, 0, 0, 0, 0,
1862         0, 0, 0, 0, 0, 0, 0, 0,
1863         0, 0, 0, 0, 0, 0, 0, 0
1864     };
1865     int score = 0;
1866     int run = 0;
1867     int i;
1868     int16_t *block = s->block[n];
1869     const int last_index = s->block_last_index[n];
1870     int skip_dc;
1871
1872     if (threshold < 0) {
1873         skip_dc = 0;
1874         threshold = -threshold;
1875     } else
1876         skip_dc = 1;
1877
1878     /* Are all we could set to zero already zero? */
1879     if (last_index <= skip_dc - 1)
1880         return;
1881
1882     for (i = 0; i <= last_index; i++) {
1883         const int j = s->intra_scantable.permutated[i];
1884         const int level = FFABS(block[j]);
1885         if (level == 1) {
1886             if (skip_dc && i == 0)
1887                 continue;
1888             score += tab[run];
1889             run = 0;
1890         } else if (level > 1) {
1891             return;
1892         } else {
1893             run++;
1894         }
1895     }
1896     if (score >= threshold)
1897         return;
1898     for (i = skip_dc; i <= last_index; i++) {
1899         const int j = s->intra_scantable.permutated[i];
1900         block[j] = 0;
1901     }
1902     if (block[0])
1903         s->block_last_index[n] = 0;
1904     else
1905         s->block_last_index[n] = -1;
1906 }
1907
1908 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1909                                int last_index)
1910 {
1911     int i;
1912     const int maxlevel = s->max_qcoeff;
1913     const int minlevel = s->min_qcoeff;
1914     int overflow = 0;
1915
1916     if (s->mb_intra) {
1917         i = 1; // skip clipping of intra dc
1918     } else
1919         i = 0;
1920
1921     for (; i <= last_index; i++) {
1922         const int j = s->intra_scantable.permutated[i];
1923         int level = block[j];
1924
1925         if (level > maxlevel) {
1926             level = maxlevel;
1927             overflow++;
1928         } else if (level < minlevel) {
1929             level = minlevel;
1930             overflow++;
1931         }
1932
1933         block[j] = level;
1934     }
1935
1936     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1937         av_log(s->avctx, AV_LOG_INFO,
1938                "warning, clipping %d dct coefficients to %d..%d\n",
1939                overflow, minlevel, maxlevel);
1940 }
1941
1942 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1943 {
1944     int x, y;
1945     // FIXME optimize
1946     for (y = 0; y < 8; y++) {
1947         for (x = 0; x < 8; x++) {
1948             int x2, y2;
1949             int sum = 0;
1950             int sqr = 0;
1951             int count = 0;
1952
1953             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1954                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1955                     int v = ptr[x2 + y2 * stride];
1956                     sum += v;
1957                     sqr += v * v;
1958                     count++;
1959                 }
1960             }
1961             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1962         }
1963     }
1964 }
1965
1966 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1967                                                 int motion_x, int motion_y,
1968                                                 int mb_block_height,
1969                                                 int mb_block_width,
1970                                                 int mb_block_count)
1971 {
1972     int16_t weight[12][64];
1973     int16_t orig[12][64];
1974     const int mb_x = s->mb_x;
1975     const int mb_y = s->mb_y;
1976     int i;
1977     int skip_dct[12];
1978     int dct_offset = s->linesize * 8; // default for progressive frames
1979     int uv_dct_offset = s->uvlinesize * 8;
1980     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1981     ptrdiff_t wrap_y, wrap_c;
1982
1983     for (i = 0; i < mb_block_count; i++)
1984         skip_dct[i] = s->skipdct;
1985
1986     if (s->adaptive_quant) {
1987         const int last_qp = s->qscale;
1988         const int mb_xy = mb_x + mb_y * s->mb_stride;
1989
1990         s->lambda = s->lambda_table[mb_xy];
1991         update_qscale(s);
1992
1993         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1994             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1995             s->dquant = s->qscale - last_qp;
1996
1997             if (s->out_format == FMT_H263) {
1998                 s->dquant = av_clip(s->dquant, -2, 2);
1999
2000                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2001                     if (!s->mb_intra) {
2002                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2003                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2004                                 s->dquant = 0;
2005                         }
2006                         if (s->mv_type == MV_TYPE_8X8)
2007                             s->dquant = 0;
2008                     }
2009                 }
2010             }
2011         }
2012         ff_set_qscale(s, last_qp + s->dquant);
2013     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2014         ff_set_qscale(s, s->qscale + s->dquant);
2015
2016     wrap_y = s->linesize;
2017     wrap_c = s->uvlinesize;
2018     ptr_y  = s->new_picture.f->data[0] +
2019              (mb_y * 16 * wrap_y)              + mb_x * 16;
2020     ptr_cb = s->new_picture.f->data[1] +
2021              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2022     ptr_cr = s->new_picture.f->data[2] +
2023              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2024
2025     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2026         uint8_t *ebuf = s->edge_emu_buffer + 32;
2027         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2028         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2029         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2030                                  wrap_y, wrap_y,
2031                                  16, 16, mb_x * 16, mb_y * 16,
2032                                  s->width, s->height);
2033         ptr_y = ebuf;
2034         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
2035                                  wrap_c, wrap_c,
2036                                  mb_block_width, mb_block_height,
2037                                  mb_x * mb_block_width, mb_y * mb_block_height,
2038                                  cw, ch);
2039         ptr_cb = ebuf + 18 * wrap_y;
2040         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 16, ptr_cr,
2041                                  wrap_c, wrap_c,
2042                                  mb_block_width, mb_block_height,
2043                                  mb_x * mb_block_width, mb_y * mb_block_height,
2044                                  cw, ch);
2045         ptr_cr = ebuf + 18 * wrap_y + 16;
2046     }
2047
2048     if (s->mb_intra) {
2049         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2050             int progressive_score, interlaced_score;
2051
2052             s->interlaced_dct = 0;
2053             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
2054                                                     NULL, wrap_y, 8) +
2055                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2056                                                     NULL, wrap_y, 8) - 400;
2057
2058             if (progressive_score > 0) {
2059                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
2060                                                        NULL, wrap_y * 2, 8) +
2061                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
2062                                                        NULL, wrap_y * 2, 8);
2063                 if (progressive_score > interlaced_score) {
2064                     s->interlaced_dct = 1;
2065
2066                     dct_offset = wrap_y;
2067                     uv_dct_offset = wrap_c;
2068                     wrap_y <<= 1;
2069                     if (s->chroma_format == CHROMA_422 ||
2070                         s->chroma_format == CHROMA_444)
2071                         wrap_c <<= 1;
2072                 }
2073             }
2074         }
2075
2076         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
2077         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
2078         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
2079         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
2080
2081         if (s->flags & CODEC_FLAG_GRAY) {
2082             skip_dct[4] = 1;
2083             skip_dct[5] = 1;
2084         } else {
2085             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2086             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2087             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2088                 s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2089                 s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2090             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2091                 s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c);
2092                 s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c);
2093                 s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c);
2094                 s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c);
2095                 s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2096                 s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2097             }
2098         }
2099     } else {
2100         op_pixels_func (*op_pix)[4];
2101         qpel_mc_func (*op_qpix)[16];
2102         uint8_t *dest_y, *dest_cb, *dest_cr;
2103
2104         dest_y  = s->dest[0];
2105         dest_cb = s->dest[1];
2106         dest_cr = s->dest[2];
2107
2108         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2109             op_pix  = s->hdsp.put_pixels_tab;
2110             op_qpix = s->qdsp.put_qpel_pixels_tab;
2111         } else {
2112             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2113             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2114         }
2115
2116         if (s->mv_dir & MV_DIR_FORWARD) {
2117             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
2118                           s->last_picture.f->data,
2119                           op_pix, op_qpix);
2120             op_pix  = s->hdsp.avg_pixels_tab;
2121             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2122         }
2123         if (s->mv_dir & MV_DIR_BACKWARD) {
2124             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
2125                           s->next_picture.f->data,
2126                           op_pix, op_qpix);
2127         }
2128
2129         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2130             int progressive_score, interlaced_score;
2131
2132             s->interlaced_dct = 0;
2133             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
2134                                                     ptr_y,              wrap_y,
2135                                                     8) +
2136                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
2137                                                     ptr_y + wrap_y * 8, wrap_y,
2138                                                     8) - 400;
2139
2140             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2141                 progressive_score -= 400;
2142
2143             if (progressive_score > 0) {
2144                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
2145                                                        ptr_y,
2146                                                        wrap_y * 2, 8) +
2147                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
2148                                                        ptr_y + wrap_y,
2149                                                        wrap_y * 2, 8);
2150
2151                 if (progressive_score > interlaced_score) {
2152                     s->interlaced_dct = 1;
2153
2154                     dct_offset = wrap_y;
2155                     uv_dct_offset = wrap_c;
2156                     wrap_y <<= 1;
2157                     if (s->chroma_format == CHROMA_422)
2158                         wrap_c <<= 1;
2159                 }
2160             }
2161         }
2162
2163         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2164         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2165         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2166                            dest_y + dct_offset, wrap_y);
2167         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2168                            dest_y + dct_offset + 8, wrap_y);
2169
2170         if (s->flags & CODEC_FLAG_GRAY) {
2171             skip_dct[4] = 1;
2172             skip_dct[5] = 1;
2173         } else {
2174             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2175             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2176             if (!s->chroma_y_shift) { /* 422 */
2177                 s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2178                                    dest_cb + uv_dct_offset, wrap_c);
2179                 s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2180                                    dest_cr + uv_dct_offset, wrap_c);
2181             }
2182         }
2183         /* pre quantization */
2184         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2185                 2 * s->qscale * s->qscale) {
2186             // FIXME optimize
2187             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2188                               wrap_y, 8) < 20 * s->qscale)
2189                 skip_dct[0] = 1;
2190             if (s->dsp.sad[1](NULL, ptr_y + 8,
2191                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2192                 skip_dct[1] = 1;
2193             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2194                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2195                 skip_dct[2] = 1;
2196             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2197                               dest_y + dct_offset + 8,
2198                               wrap_y, 8) < 20 * s->qscale)
2199                 skip_dct[3] = 1;
2200             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2201                               wrap_c, 8) < 20 * s->qscale)
2202                 skip_dct[4] = 1;
2203             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2204                               wrap_c, 8) < 20 * s->qscale)
2205                 skip_dct[5] = 1;
2206             if (!s->chroma_y_shift) { /* 422 */
2207                 if (s->dsp.sad[1](NULL, ptr_cb + uv_dct_offset,
2208                                   dest_cb + uv_dct_offset,
2209                                   wrap_c, 8) < 20 * s->qscale)
2210                     skip_dct[6] = 1;
2211                 if (s->dsp.sad[1](NULL, ptr_cr + uv_dct_offset,
2212                                   dest_cr + uv_dct_offset,
2213                                   wrap_c, 8) < 20 * s->qscale)
2214                     skip_dct[7] = 1;
2215             }
2216         }
2217     }
2218
2219     if (s->quantizer_noise_shaping) {
2220         if (!skip_dct[0])
2221             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2222         if (!skip_dct[1])
2223             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2224         if (!skip_dct[2])
2225             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2226         if (!skip_dct[3])
2227             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2228         if (!skip_dct[4])
2229             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2230         if (!skip_dct[5])
2231             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2232         if (!s->chroma_y_shift) { /* 422 */
2233             if (!skip_dct[6])
2234                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2235                                   wrap_c);
2236             if (!skip_dct[7])
2237                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2238                                   wrap_c);
2239         }
2240         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2241     }
2242
2243     /* DCT & quantize */
2244     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2245     {
2246         for (i = 0; i < mb_block_count; i++) {
2247             if (!skip_dct[i]) {
2248                 int overflow;
2249                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2250                 // FIXME we could decide to change to quantizer instead of
2251                 // clipping
2252                 // JS: I don't think that would be a good idea it could lower
2253                 //     quality instead of improve it. Just INTRADC clipping
2254                 //     deserves changes in quantizer
2255                 if (overflow)
2256                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2257             } else
2258                 s->block_last_index[i] = -1;
2259         }
2260         if (s->quantizer_noise_shaping) {
2261             for (i = 0; i < mb_block_count; i++) {
2262                 if (!skip_dct[i]) {
2263                     s->block_last_index[i] =
2264                         dct_quantize_refine(s, s->block[i], weight[i],
2265                                             orig[i], i, s->qscale);
2266                 }
2267             }
2268         }
2269
2270         if (s->luma_elim_threshold && !s->mb_intra)
2271             for (i = 0; i < 4; i++)
2272                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2273         if (s->chroma_elim_threshold && !s->mb_intra)
2274             for (i = 4; i < mb_block_count; i++)
2275                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2276
2277         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2278             for (i = 0; i < mb_block_count; i++) {
2279                 if (s->block_last_index[i] == -1)
2280                     s->coded_score[i] = INT_MAX / 256;
2281             }
2282         }
2283     }
2284
2285     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2286         s->block_last_index[4] =
2287         s->block_last_index[5] = 0;
2288         s->block[4][0] =
2289         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2290         if (!s->chroma_y_shift) { /* 422 / 444 */
2291             for (i=6; i<12; i++) {
2292                 s->block_last_index[i] = 0;
2293                 s->block[i][0] = s->block[4][0];
2294             }
2295         }
2296     }
2297
2298     // non c quantize code returns incorrect block_last_index FIXME
2299     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2300         for (i = 0; i < mb_block_count; i++) {
2301             int j;
2302             if (s->block_last_index[i] > 0) {
2303                 for (j = 63; j > 0; j--) {
2304                     if (s->block[i][s->intra_scantable.permutated[j]])
2305                         break;
2306                 }
2307                 s->block_last_index[i] = j;
2308             }
2309         }
2310     }
2311
2312     /* huffman encode */
2313     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2314     case AV_CODEC_ID_MPEG1VIDEO:
2315     case AV_CODEC_ID_MPEG2VIDEO:
2316         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2317             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2318         break;
2319     case AV_CODEC_ID_MPEG4:
2320         if (CONFIG_MPEG4_ENCODER)
2321             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2322         break;
2323     case AV_CODEC_ID_MSMPEG4V2:
2324     case AV_CODEC_ID_MSMPEG4V3:
2325     case AV_CODEC_ID_WMV1:
2326         if (CONFIG_MSMPEG4_ENCODER)
2327             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2328         break;
2329     case AV_CODEC_ID_WMV2:
2330         if (CONFIG_WMV2_ENCODER)
2331             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2332         break;
2333     case AV_CODEC_ID_H261:
2334         if (CONFIG_H261_ENCODER)
2335             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2336         break;
2337     case AV_CODEC_ID_H263:
2338     case AV_CODEC_ID_H263P:
2339     case AV_CODEC_ID_FLV1:
2340     case AV_CODEC_ID_RV10:
2341     case AV_CODEC_ID_RV20:
2342         if (CONFIG_H263_ENCODER)
2343             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2344         break;
2345     case AV_CODEC_ID_MJPEG:
2346     case AV_CODEC_ID_AMV:
2347         if (CONFIG_MJPEG_ENCODER)
2348             ff_mjpeg_encode_mb(s, s->block);
2349         break;
2350     default:
2351         av_assert1(0);
2352     }
2353 }
2354
2355 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2356 {
2357     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2358     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2359     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2360 }
2361
2362 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2363     int i;
2364
2365     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2366
2367     /* mpeg1 */
2368     d->mb_skip_run= s->mb_skip_run;
2369     for(i=0; i<3; i++)
2370         d->last_dc[i] = s->last_dc[i];
2371
2372     /* statistics */
2373     d->mv_bits= s->mv_bits;
2374     d->i_tex_bits= s->i_tex_bits;
2375     d->p_tex_bits= s->p_tex_bits;
2376     d->i_count= s->i_count;
2377     d->f_count= s->f_count;
2378     d->b_count= s->b_count;
2379     d->skip_count= s->skip_count;
2380     d->misc_bits= s->misc_bits;
2381     d->last_bits= 0;
2382
2383     d->mb_skipped= 0;
2384     d->qscale= s->qscale;
2385     d->dquant= s->dquant;
2386
2387     d->esc3_level_length= s->esc3_level_length;
2388 }
2389
2390 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2391     int i;
2392
2393     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2394     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2395
2396     /* mpeg1 */
2397     d->mb_skip_run= s->mb_skip_run;
2398     for(i=0; i<3; i++)
2399         d->last_dc[i] = s->last_dc[i];
2400
2401     /* statistics */
2402     d->mv_bits= s->mv_bits;
2403     d->i_tex_bits= s->i_tex_bits;
2404     d->p_tex_bits= s->p_tex_bits;
2405     d->i_count= s->i_count;
2406     d->f_count= s->f_count;
2407     d->b_count= s->b_count;
2408     d->skip_count= s->skip_count;
2409     d->misc_bits= s->misc_bits;
2410
2411     d->mb_intra= s->mb_intra;
2412     d->mb_skipped= s->mb_skipped;
2413     d->mv_type= s->mv_type;
2414     d->mv_dir= s->mv_dir;
2415     d->pb= s->pb;
2416     if(s->data_partitioning){
2417         d->pb2= s->pb2;
2418         d->tex_pb= s->tex_pb;
2419     }
2420     d->block= s->block;
2421     for(i=0; i<8; i++)
2422         d->block_last_index[i]= s->block_last_index[i];
2423     d->interlaced_dct= s->interlaced_dct;
2424     d->qscale= s->qscale;
2425
2426     d->esc3_level_length= s->esc3_level_length;
2427 }
2428
2429 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2430                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2431                            int *dmin, int *next_block, int motion_x, int motion_y)
2432 {
2433     int score;
2434     uint8_t *dest_backup[3];
2435
2436     copy_context_before_encode(s, backup, type);
2437
2438     s->block= s->blocks[*next_block];
2439     s->pb= pb[*next_block];
2440     if(s->data_partitioning){
2441         s->pb2   = pb2   [*next_block];
2442         s->tex_pb= tex_pb[*next_block];
2443     }
2444
2445     if(*next_block){
2446         memcpy(dest_backup, s->dest, sizeof(s->dest));
2447         s->dest[0] = s->rd_scratchpad;
2448         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2449         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2450         av_assert0(s->linesize >= 32); //FIXME
2451     }
2452
2453     encode_mb(s, motion_x, motion_y);
2454
2455     score= put_bits_count(&s->pb);
2456     if(s->data_partitioning){
2457         score+= put_bits_count(&s->pb2);
2458         score+= put_bits_count(&s->tex_pb);
2459     }
2460
2461     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2462         ff_MPV_decode_mb(s, s->block);
2463
2464         score *= s->lambda2;
2465         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2466     }
2467
2468     if(*next_block){
2469         memcpy(s->dest, dest_backup, sizeof(s->dest));
2470     }
2471
2472     if(score<*dmin){
2473         *dmin= score;
2474         *next_block^=1;
2475
2476         copy_context_after_encode(best, s, type);
2477     }
2478 }
2479
2480 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2481     uint32_t *sq = ff_square_tab + 256;
2482     int acc=0;
2483     int x,y;
2484
2485     if(w==16 && h==16)
2486         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2487     else if(w==8 && h==8)
2488         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2489
2490     for(y=0; y<h; y++){
2491         for(x=0; x<w; x++){
2492             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2493         }
2494     }
2495
2496     av_assert2(acc>=0);
2497
2498     return acc;
2499 }
2500
2501 static int sse_mb(MpegEncContext *s){
2502     int w= 16;
2503     int h= 16;
2504
2505     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2506     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2507
2508     if(w==16 && h==16)
2509       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2510         return  s->dsp.nsse[0](s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2511                +s->dsp.nsse[1](s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2512                +s->dsp.nsse[1](s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2513       }else{
2514         return  s->dsp.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2515                +s->dsp.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2516                +s->dsp.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2517       }
2518     else
2519         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2520                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2521                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2522 }
2523
2524 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2525     MpegEncContext *s= *(void**)arg;
2526
2527
2528     s->me.pre_pass=1;
2529     s->me.dia_size= s->avctx->pre_dia_size;
2530     s->first_slice_line=1;
2531     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2532         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2533             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2534         }
2535         s->first_slice_line=0;
2536     }
2537
2538     s->me.pre_pass=0;
2539
2540     return 0;
2541 }
2542
2543 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2544     MpegEncContext *s= *(void**)arg;
2545
2546     ff_check_alignment();
2547
2548     s->me.dia_size= s->avctx->dia_size;
2549     s->first_slice_line=1;
2550     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2551         s->mb_x=0; //for block init below
2552         ff_init_block_index(s);
2553         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2554             s->block_index[0]+=2;
2555             s->block_index[1]+=2;
2556             s->block_index[2]+=2;
2557             s->block_index[3]+=2;
2558
2559             /* compute motion vector & mb_type and store in context */
2560             if(s->pict_type==AV_PICTURE_TYPE_B)
2561                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2562             else
2563                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2564         }
2565         s->first_slice_line=0;
2566     }
2567     return 0;
2568 }
2569
2570 static int mb_var_thread(AVCodecContext *c, void *arg){
2571     MpegEncContext *s= *(void**)arg;
2572     int mb_x, mb_y;
2573
2574     ff_check_alignment();
2575
2576     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2577         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2578             int xx = mb_x * 16;
2579             int yy = mb_y * 16;
2580             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2581             int varc;
2582             int sum = s->dsp.pix_sum(pix, s->linesize);
2583
2584             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2585
2586             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2587             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2588             s->me.mb_var_sum_temp    += varc;
2589         }
2590     }
2591     return 0;
2592 }
2593
2594 static void write_slice_end(MpegEncContext *s){
2595     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2596         if(s->partitioned_frame){
2597             ff_mpeg4_merge_partitions(s);
2598         }
2599
2600         ff_mpeg4_stuffing(&s->pb);
2601     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2602         ff_mjpeg_encode_stuffing(s);
2603     }
2604
2605     avpriv_align_put_bits(&s->pb);
2606     flush_put_bits(&s->pb);
2607
2608     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2609         s->misc_bits+= get_bits_diff(s);
2610 }
2611
2612 static void write_mb_info(MpegEncContext *s)
2613 {
2614     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2615     int offset = put_bits_count(&s->pb);
2616     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2617     int gobn = s->mb_y / s->gob_index;
2618     int pred_x, pred_y;
2619     if (CONFIG_H263_ENCODER)
2620         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2621     bytestream_put_le32(&ptr, offset);
2622     bytestream_put_byte(&ptr, s->qscale);
2623     bytestream_put_byte(&ptr, gobn);
2624     bytestream_put_le16(&ptr, mba);
2625     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2626     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2627     /* 4MV not implemented */
2628     bytestream_put_byte(&ptr, 0); /* hmv2 */
2629     bytestream_put_byte(&ptr, 0); /* vmv2 */
2630 }
2631
2632 static void update_mb_info(MpegEncContext *s, int startcode)
2633 {
2634     if (!s->mb_info)
2635         return;
2636     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2637         s->mb_info_size += 12;
2638         s->prev_mb_info = s->last_mb_info;
2639     }
2640     if (startcode) {
2641         s->prev_mb_info = put_bits_count(&s->pb)/8;
2642         /* This might have incremented mb_info_size above, and we return without
2643          * actually writing any info into that slot yet. But in that case,
2644          * this will be called again at the start of the after writing the
2645          * start code, actually writing the mb info. */
2646         return;
2647     }
2648
2649     s->last_mb_info = put_bits_count(&s->pb)/8;
2650     if (!s->mb_info_size)
2651         s->mb_info_size += 12;
2652     write_mb_info(s);
2653 }
2654
2655 static int encode_thread(AVCodecContext *c, void *arg){
2656     MpegEncContext *s= *(void**)arg;
2657     int mb_x, mb_y, pdif = 0;
2658     int chr_h= 16>>s->chroma_y_shift;
2659     int i, j;
2660     MpegEncContext best_s, backup_s;
2661     uint8_t bit_buf[2][MAX_MB_BYTES];
2662     uint8_t bit_buf2[2][MAX_MB_BYTES];
2663     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2664     PutBitContext pb[2], pb2[2], tex_pb[2];
2665
2666     ff_check_alignment();
2667
2668     for(i=0; i<2; i++){
2669         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2670         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2671         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2672     }
2673
2674     s->last_bits= put_bits_count(&s->pb);
2675     s->mv_bits=0;
2676     s->misc_bits=0;
2677     s->i_tex_bits=0;
2678     s->p_tex_bits=0;
2679     s->i_count=0;
2680     s->f_count=0;
2681     s->b_count=0;
2682     s->skip_count=0;
2683
2684     for(i=0; i<3; i++){
2685         /* init last dc values */
2686         /* note: quant matrix value (8) is implied here */
2687         s->last_dc[i] = 128 << s->intra_dc_precision;
2688
2689         s->current_picture.error[i] = 0;
2690     }
2691     if(s->codec_id==AV_CODEC_ID_AMV){
2692         s->last_dc[0] = 128*8/13;
2693         s->last_dc[1] = 128*8/14;
2694         s->last_dc[2] = 128*8/14;
2695     }
2696     s->mb_skip_run = 0;
2697     memset(s->last_mv, 0, sizeof(s->last_mv));
2698
2699     s->last_mv_dir = 0;
2700
2701     switch(s->codec_id){
2702     case AV_CODEC_ID_H263:
2703     case AV_CODEC_ID_H263P:
2704     case AV_CODEC_ID_FLV1:
2705         if (CONFIG_H263_ENCODER)
2706             s->gob_index = ff_h263_get_gob_height(s);
2707         break;
2708     case AV_CODEC_ID_MPEG4:
2709         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2710             ff_mpeg4_init_partitions(s);
2711         break;
2712     }
2713
2714     s->resync_mb_x=0;
2715     s->resync_mb_y=0;
2716     s->first_slice_line = 1;
2717     s->ptr_lastgob = s->pb.buf;
2718     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2719         s->mb_x=0;
2720         s->mb_y= mb_y;
2721
2722         ff_set_qscale(s, s->qscale);
2723         ff_init_block_index(s);
2724
2725         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2726             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2727             int mb_type= s->mb_type[xy];
2728 //            int d;
2729             int dmin= INT_MAX;
2730             int dir;
2731
2732             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2733                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2734                 return -1;
2735             }
2736             if(s->data_partitioning){
2737                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2738                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2739                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2740                     return -1;
2741                 }
2742             }
2743
2744             s->mb_x = mb_x;
2745             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2746             ff_update_block_index(s);
2747
2748             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2749                 ff_h261_reorder_mb_index(s);
2750                 xy= s->mb_y*s->mb_stride + s->mb_x;
2751                 mb_type= s->mb_type[xy];
2752             }
2753
2754             /* write gob / video packet header  */
2755             if(s->rtp_mode){
2756                 int current_packet_size, is_gob_start;
2757
2758                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2759
2760                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2761
2762                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2763
2764                 switch(s->codec_id){
2765                 case AV_CODEC_ID_H263:
2766                 case AV_CODEC_ID_H263P:
2767                     if(!s->h263_slice_structured)
2768                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2769                     break;
2770                 case AV_CODEC_ID_MPEG2VIDEO:
2771                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2772                 case AV_CODEC_ID_MPEG1VIDEO:
2773                     if(s->mb_skip_run) is_gob_start=0;
2774                     break;
2775                 case AV_CODEC_ID_MJPEG:
2776                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2777                     break;
2778                 }
2779
2780                 if(is_gob_start){
2781                     if(s->start_mb_y != mb_y || mb_x!=0){
2782                         write_slice_end(s);
2783
2784                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2785                             ff_mpeg4_init_partitions(s);
2786                         }
2787                     }
2788
2789                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2790                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2791
2792                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2793                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2794                         int d = 100 / s->error_rate;
2795                         if(r % d == 0){
2796                             current_packet_size=0;
2797                             s->pb.buf_ptr= s->ptr_lastgob;
2798                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2799                         }
2800                     }
2801
2802                     if (s->avctx->rtp_callback){
2803                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2804                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2805                     }
2806                     update_mb_info(s, 1);
2807
2808                     switch(s->codec_id){
2809                     case AV_CODEC_ID_MPEG4:
2810                         if (CONFIG_MPEG4_ENCODER) {
2811                             ff_mpeg4_encode_video_packet_header(s);
2812                             ff_mpeg4_clean_buffers(s);
2813                         }
2814                     break;
2815                     case AV_CODEC_ID_MPEG1VIDEO:
2816                     case AV_CODEC_ID_MPEG2VIDEO:
2817                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2818                             ff_mpeg1_encode_slice_header(s);
2819                             ff_mpeg1_clean_buffers(s);
2820                         }
2821                     break;
2822                     case AV_CODEC_ID_H263:
2823                     case AV_CODEC_ID_H263P:
2824                         if (CONFIG_H263_ENCODER)
2825                             ff_h263_encode_gob_header(s, mb_y);
2826                     break;
2827                     }
2828
2829                     if(s->flags&CODEC_FLAG_PASS1){
2830                         int bits= put_bits_count(&s->pb);
2831                         s->misc_bits+= bits - s->last_bits;
2832                         s->last_bits= bits;
2833                     }
2834
2835                     s->ptr_lastgob += current_packet_size;
2836                     s->first_slice_line=1;
2837                     s->resync_mb_x=mb_x;
2838                     s->resync_mb_y=mb_y;
2839                 }
2840             }
2841
2842             if(  (s->resync_mb_x   == s->mb_x)
2843                && s->resync_mb_y+1 == s->mb_y){
2844                 s->first_slice_line=0;
2845             }
2846
2847             s->mb_skipped=0;
2848             s->dquant=0; //only for QP_RD
2849
2850             update_mb_info(s, 0);
2851
2852             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2853                 int next_block=0;
2854                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2855
2856                 copy_context_before_encode(&backup_s, s, -1);
2857                 backup_s.pb= s->pb;
2858                 best_s.data_partitioning= s->data_partitioning;
2859                 best_s.partitioned_frame= s->partitioned_frame;
2860                 if(s->data_partitioning){
2861                     backup_s.pb2= s->pb2;
2862                     backup_s.tex_pb= s->tex_pb;
2863                 }
2864
2865                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2866                     s->mv_dir = MV_DIR_FORWARD;
2867                     s->mv_type = MV_TYPE_16X16;
2868                     s->mb_intra= 0;
2869                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2870                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2871                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2872                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2873                 }
2874                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2875                     s->mv_dir = MV_DIR_FORWARD;
2876                     s->mv_type = MV_TYPE_FIELD;
2877                     s->mb_intra= 0;
2878                     for(i=0; i<2; i++){
2879                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2880                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2881                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2882                     }
2883                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2884                                  &dmin, &next_block, 0, 0);
2885                 }
2886                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2887                     s->mv_dir = MV_DIR_FORWARD;
2888                     s->mv_type = MV_TYPE_16X16;
2889                     s->mb_intra= 0;
2890                     s->mv[0][0][0] = 0;
2891                     s->mv[0][0][1] = 0;
2892                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2893                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2894                 }
2895                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2896                     s->mv_dir = MV_DIR_FORWARD;
2897                     s->mv_type = MV_TYPE_8X8;
2898                     s->mb_intra= 0;
2899                     for(i=0; i<4; i++){
2900                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2901                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2902                     }
2903                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2904                                  &dmin, &next_block, 0, 0);
2905                 }
2906                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2907                     s->mv_dir = MV_DIR_FORWARD;
2908                     s->mv_type = MV_TYPE_16X16;
2909                     s->mb_intra= 0;
2910                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2911                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2912                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2913                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2914                 }
2915                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2916                     s->mv_dir = MV_DIR_BACKWARD;
2917                     s->mv_type = MV_TYPE_16X16;
2918                     s->mb_intra= 0;
2919                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2920                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2921                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2922                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2923                 }
2924                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2925                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2926                     s->mv_type = MV_TYPE_16X16;
2927                     s->mb_intra= 0;
2928                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2929                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2930                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2931                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2932                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2933                                  &dmin, &next_block, 0, 0);
2934                 }
2935                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2936                     s->mv_dir = MV_DIR_FORWARD;
2937                     s->mv_type = MV_TYPE_FIELD;
2938                     s->mb_intra= 0;
2939                     for(i=0; i<2; i++){
2940                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2941                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2942                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2943                     }
2944                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2945                                  &dmin, &next_block, 0, 0);
2946                 }
2947                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2948                     s->mv_dir = MV_DIR_BACKWARD;
2949                     s->mv_type = MV_TYPE_FIELD;
2950                     s->mb_intra= 0;
2951                     for(i=0; i<2; i++){
2952                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2953                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2954                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2955                     }
2956                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2957                                  &dmin, &next_block, 0, 0);
2958                 }
2959                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2960                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2961                     s->mv_type = MV_TYPE_FIELD;
2962                     s->mb_intra= 0;
2963                     for(dir=0; dir<2; dir++){
2964                         for(i=0; i<2; i++){
2965                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2966                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2967                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2968                         }
2969                     }
2970                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2971                                  &dmin, &next_block, 0, 0);
2972                 }
2973                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2974                     s->mv_dir = 0;
2975                     s->mv_type = MV_TYPE_16X16;
2976                     s->mb_intra= 1;
2977                     s->mv[0][0][0] = 0;
2978                     s->mv[0][0][1] = 0;
2979                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2980                                  &dmin, &next_block, 0, 0);
2981                     if(s->h263_pred || s->h263_aic){
2982                         if(best_s.mb_intra)
2983                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2984                         else
2985                             ff_clean_intra_table_entries(s); //old mode?
2986                     }
2987                 }
2988
2989                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2990                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2991                         const int last_qp= backup_s.qscale;
2992                         int qpi, qp, dc[6];
2993                         int16_t ac[6][16];
2994                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2995                         static const int dquant_tab[4]={-1,1,-2,2};
2996                         int storecoefs = s->mb_intra && s->dc_val[0];
2997
2998                         av_assert2(backup_s.dquant == 0);
2999
3000                         //FIXME intra
3001                         s->mv_dir= best_s.mv_dir;
3002                         s->mv_type = MV_TYPE_16X16;
3003                         s->mb_intra= best_s.mb_intra;
3004                         s->mv[0][0][0] = best_s.mv[0][0][0];
3005                         s->mv[0][0][1] = best_s.mv[0][0][1];
3006                         s->mv[1][0][0] = best_s.mv[1][0][0];
3007                         s->mv[1][0][1] = best_s.mv[1][0][1];
3008
3009                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3010                         for(; qpi<4; qpi++){
3011                             int dquant= dquant_tab[qpi];
3012                             qp= last_qp + dquant;
3013                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3014                                 continue;
3015                             backup_s.dquant= dquant;
3016                             if(storecoefs){
3017                                 for(i=0; i<6; i++){
3018                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3019                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3020                                 }
3021                             }
3022
3023                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3024                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3025                             if(best_s.qscale != qp){
3026                                 if(storecoefs){
3027                                     for(i=0; i<6; i++){
3028                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3029                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3030                                     }
3031                                 }
3032                             }
3033                         }
3034                     }
3035                 }
3036                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3037                     int mx= s->b_direct_mv_table[xy][0];
3038                     int my= s->b_direct_mv_table[xy][1];
3039
3040                     backup_s.dquant = 0;
3041                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3042                     s->mb_intra= 0;
3043                     ff_mpeg4_set_direct_mv(s, mx, my);
3044                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3045                                  &dmin, &next_block, mx, my);
3046                 }
3047                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3048                     backup_s.dquant = 0;
3049                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3050                     s->mb_intra= 0;
3051                     ff_mpeg4_set_direct_mv(s, 0, 0);
3052                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3053                                  &dmin, &next_block, 0, 0);
3054                 }
3055                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3056                     int coded=0;
3057                     for(i=0; i<6; i++)
3058                         coded |= s->block_last_index[i];
3059                     if(coded){
3060                         int mx,my;
3061                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3062                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3063                             mx=my=0; //FIXME find the one we actually used
3064                             ff_mpeg4_set_direct_mv(s, mx, my);
3065                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3066                             mx= s->mv[1][0][0];
3067                             my= s->mv[1][0][1];
3068                         }else{
3069                             mx= s->mv[0][0][0];
3070                             my= s->mv[0][0][1];
3071                         }
3072
3073                         s->mv_dir= best_s.mv_dir;
3074                         s->mv_type = best_s.mv_type;
3075                         s->mb_intra= 0;
3076 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3077                         s->mv[0][0][1] = best_s.mv[0][0][1];
3078                         s->mv[1][0][0] = best_s.mv[1][0][0];
3079                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3080                         backup_s.dquant= 0;
3081                         s->skipdct=1;
3082                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3083                                         &dmin, &next_block, mx, my);
3084                         s->skipdct=0;
3085                     }
3086                 }
3087
3088                 s->current_picture.qscale_table[xy] = best_s.qscale;
3089
3090                 copy_context_after_encode(s, &best_s, -1);
3091
3092                 pb_bits_count= put_bits_count(&s->pb);
3093                 flush_put_bits(&s->pb);
3094                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3095                 s->pb= backup_s.pb;
3096
3097                 if(s->data_partitioning){
3098                     pb2_bits_count= put_bits_count(&s->pb2);
3099                     flush_put_bits(&s->pb2);
3100                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3101                     s->pb2= backup_s.pb2;
3102
3103                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3104                     flush_put_bits(&s->tex_pb);
3105                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3106                     s->tex_pb= backup_s.tex_pb;
3107                 }
3108                 s->last_bits= put_bits_count(&s->pb);
3109
3110                 if (CONFIG_H263_ENCODER &&
3111                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3112                     ff_h263_update_motion_val(s);
3113
3114                 if(next_block==0){ //FIXME 16 vs linesize16
3115                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3116                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3117                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3118                 }
3119
3120                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3121                     ff_MPV_decode_mb(s, s->block);
3122             } else {
3123                 int motion_x = 0, motion_y = 0;
3124                 s->mv_type=MV_TYPE_16X16;
3125                 // only one MB-Type possible
3126
3127                 switch(mb_type){
3128                 case CANDIDATE_MB_TYPE_INTRA:
3129                     s->mv_dir = 0;
3130                     s->mb_intra= 1;
3131                     motion_x= s->mv[0][0][0] = 0;
3132                     motion_y= s->mv[0][0][1] = 0;
3133                     break;
3134                 case CANDIDATE_MB_TYPE_INTER:
3135                     s->mv_dir = MV_DIR_FORWARD;
3136                     s->mb_intra= 0;
3137                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3138                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3139                     break;
3140                 case CANDIDATE_MB_TYPE_INTER_I:
3141                     s->mv_dir = MV_DIR_FORWARD;
3142                     s->mv_type = MV_TYPE_FIELD;
3143                     s->mb_intra= 0;
3144                     for(i=0; i<2; i++){
3145                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3146                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3147                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3148                     }
3149                     break;
3150                 case CANDIDATE_MB_TYPE_INTER4V:
3151                     s->mv_dir = MV_DIR_FORWARD;
3152                     s->mv_type = MV_TYPE_8X8;
3153                     s->mb_intra= 0;
3154                     for(i=0; i<4; i++){
3155                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3156                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3157                     }
3158                     break;
3159                 case CANDIDATE_MB_TYPE_DIRECT:
3160                     if (CONFIG_MPEG4_ENCODER) {
3161                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3162                         s->mb_intra= 0;
3163                         motion_x=s->b_direct_mv_table[xy][0];
3164                         motion_y=s->b_direct_mv_table[xy][1];
3165                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3166                     }
3167                     break;
3168                 case CANDIDATE_MB_TYPE_DIRECT0:
3169                     if (CONFIG_MPEG4_ENCODER) {
3170                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3171                         s->mb_intra= 0;
3172                         ff_mpeg4_set_direct_mv(s, 0, 0);
3173                     }
3174                     break;
3175                 case CANDIDATE_MB_TYPE_BIDIR:
3176                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3177                     s->mb_intra= 0;
3178                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3179                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3180                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3181                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3182                     break;
3183                 case CANDIDATE_MB_TYPE_BACKWARD:
3184                     s->mv_dir = MV_DIR_BACKWARD;
3185                     s->mb_intra= 0;
3186                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3187                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3188                     break;
3189                 case CANDIDATE_MB_TYPE_FORWARD:
3190                     s->mv_dir = MV_DIR_FORWARD;
3191                     s->mb_intra= 0;
3192                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3193                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3194                     break;
3195                 case CANDIDATE_MB_TYPE_FORWARD_I:
3196                     s->mv_dir = MV_DIR_FORWARD;
3197                     s->mv_type = MV_TYPE_FIELD;
3198                     s->mb_intra= 0;
3199                     for(i=0; i<2; i++){
3200                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3201                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3202                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3203                     }
3204                     break;
3205                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3206                     s->mv_dir = MV_DIR_BACKWARD;
3207                     s->mv_type = MV_TYPE_FIELD;
3208                     s->mb_intra= 0;
3209                     for(i=0; i<2; i++){
3210                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3211                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3212                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3213                     }
3214                     break;
3215                 case CANDIDATE_MB_TYPE_BIDIR_I:
3216                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3217                     s->mv_type = MV_TYPE_FIELD;
3218                     s->mb_intra= 0;
3219                     for(dir=0; dir<2; dir++){
3220                         for(i=0; i<2; i++){
3221                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3222                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3223                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3224                         }
3225                     }
3226                     break;
3227                 default:
3228                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3229                 }
3230
3231                 encode_mb(s, motion_x, motion_y);
3232
3233                 // RAL: Update last macroblock type
3234                 s->last_mv_dir = s->mv_dir;
3235
3236                 if (CONFIG_H263_ENCODER &&
3237                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3238                     ff_h263_update_motion_val(s);
3239
3240                 ff_MPV_decode_mb(s, s->block);
3241             }
3242
3243             /* clean the MV table in IPS frames for direct mode in B frames */
3244             if(s->mb_intra /* && I,P,S_TYPE */){
3245                 s->p_mv_table[xy][0]=0;
3246                 s->p_mv_table[xy][1]=0;
3247             }
3248
3249             if(s->flags&CODEC_FLAG_PSNR){
3250                 int w= 16;
3251                 int h= 16;
3252
3253                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3254                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3255
3256                 s->current_picture.error[0] += sse(
3257                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3258                     s->dest[0], w, h, s->linesize);
3259                 s->current_picture.error[1] += sse(
3260                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3261                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3262                 s->current_picture.error[2] += sse(
3263                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3264                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3265             }
3266             if(s->loop_filter){
3267                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3268                     ff_h263_loop_filter(s);
3269             }
3270             av_dlog(s->avctx, "MB %d %d bits\n",
3271                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3272         }
3273     }
3274
3275     //not beautiful here but we must write it before flushing so it has to be here
3276     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3277         ff_msmpeg4_encode_ext_header(s);
3278
3279     write_slice_end(s);
3280
3281     /* Send the last GOB if RTP */
3282     if (s->avctx->rtp_callback) {
3283         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3284         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3285         /* Call the RTP callback to send the last GOB */
3286         emms_c();
3287         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3288     }
3289
3290     return 0;
3291 }
3292
3293 #define MERGE(field) dst->field += src->field; src->field=0
3294 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3295     MERGE(me.scene_change_score);
3296     MERGE(me.mc_mb_var_sum_temp);
3297     MERGE(me.mb_var_sum_temp);
3298 }
3299
3300 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3301     int i;
3302
3303     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3304     MERGE(dct_count[1]);
3305     MERGE(mv_bits);
3306     MERGE(i_tex_bits);
3307     MERGE(p_tex_bits);
3308     MERGE(i_count);
3309     MERGE(f_count);
3310     MERGE(b_count);
3311     MERGE(skip_count);
3312     MERGE(misc_bits);
3313     MERGE(er.error_count);
3314     MERGE(padding_bug_score);
3315     MERGE(current_picture.error[0]);
3316     MERGE(current_picture.error[1]);
3317     MERGE(current_picture.error[2]);
3318
3319     if(dst->avctx->noise_reduction){
3320         for(i=0; i<64; i++){
3321             MERGE(dct_error_sum[0][i]);
3322             MERGE(dct_error_sum[1][i]);
3323         }
3324     }
3325
3326     assert(put_bits_count(&src->pb) % 8 ==0);
3327     assert(put_bits_count(&dst->pb) % 8 ==0);
3328     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3329     flush_put_bits(&dst->pb);
3330 }
3331
3332 static int estimate_qp(MpegEncContext *s, int dry_run){
3333     if (s->next_lambda){
3334         s->current_picture_ptr->f->quality =
3335         s->current_picture.f->quality = s->next_lambda;
3336         if(!dry_run) s->next_lambda= 0;
3337     } else if (!s->fixed_qscale) {
3338         s->current_picture_ptr->f->quality =
3339         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3340         if (s->current_picture.f->quality < 0)
3341             return -1;
3342     }
3343
3344     if(s->adaptive_quant){
3345         switch(s->codec_id){
3346         case AV_CODEC_ID_MPEG4:
3347             if (CONFIG_MPEG4_ENCODER)
3348                 ff_clean_mpeg4_qscales(s);
3349             break;
3350         case AV_CODEC_ID_H263:
3351         case AV_CODEC_ID_H263P:
3352         case AV_CODEC_ID_FLV1:
3353             if (CONFIG_H263_ENCODER)
3354                 ff_clean_h263_qscales(s);
3355             break;
3356         default:
3357             ff_init_qscale_tab(s);
3358         }
3359
3360         s->lambda= s->lambda_table[0];
3361         //FIXME broken
3362     }else
3363         s->lambda = s->current_picture.f->quality;
3364     update_qscale(s);
3365     return 0;
3366 }
3367
3368 /* must be called before writing the header */
3369 static void set_frame_distances(MpegEncContext * s){
3370     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3371     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3372
3373     if(s->pict_type==AV_PICTURE_TYPE_B){
3374         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3375         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3376     }else{
3377         s->pp_time= s->time - s->last_non_b_time;
3378         s->last_non_b_time= s->time;
3379         assert(s->picture_number==0 || s->pp_time > 0);
3380     }
3381 }
3382
3383 static int encode_picture(MpegEncContext *s, int picture_number)
3384 {
3385     int i, ret;
3386     int bits;
3387     int context_count = s->slice_context_count;
3388
3389     s->picture_number = picture_number;
3390
3391     /* Reset the average MB variance */
3392     s->me.mb_var_sum_temp    =
3393     s->me.mc_mb_var_sum_temp = 0;
3394
3395     /* we need to initialize some time vars before we can encode b-frames */
3396     // RAL: Condition added for MPEG1VIDEO
3397     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3398         set_frame_distances(s);
3399     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3400         ff_set_mpeg4_time(s);
3401
3402     s->me.scene_change_score=0;
3403
3404 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3405
3406     if(s->pict_type==AV_PICTURE_TYPE_I){
3407         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3408         else                        s->no_rounding=0;
3409     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3410         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3411             s->no_rounding ^= 1;
3412     }
3413
3414     if(s->flags & CODEC_FLAG_PASS2){
3415         if (estimate_qp(s,1) < 0)
3416             return -1;
3417         ff_get_2pass_fcode(s);
3418     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3419         if(s->pict_type==AV_PICTURE_TYPE_B)
3420             s->lambda= s->last_lambda_for[s->pict_type];
3421         else
3422             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3423         update_qscale(s);
3424     }
3425
3426     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3427         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3428         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3429         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3430         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3431     }
3432
3433     s->mb_intra=0; //for the rate distortion & bit compare functions
3434     for(i=1; i<context_count; i++){
3435         ret = ff_update_duplicate_context(s->thread_context[i], s);
3436         if (ret < 0)
3437             return ret;
3438     }
3439
3440     if(ff_init_me(s)<0)
3441         return -1;
3442
3443     /* Estimate motion for every MB */
3444     if(s->pict_type != AV_PICTURE_TYPE_I){
3445         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3446         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3447         if (s->pict_type != AV_PICTURE_TYPE_B) {
3448             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3449                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3450             }
3451         }
3452
3453         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3454     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3455         /* I-Frame */
3456         for(i=0; i<s->mb_stride*s->mb_height; i++)
3457             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3458
3459         if(!s->fixed_qscale){
3460             /* finding spatial complexity for I-frame rate control */
3461             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3462         }
3463     }
3464     for(i=1; i<context_count; i++){
3465         merge_context_after_me(s, s->thread_context[i]);
3466     }
3467     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3468     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3469     emms_c();
3470
3471     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3472         s->pict_type= AV_PICTURE_TYPE_I;
3473         for(i=0; i<s->mb_stride*s->mb_height; i++)
3474             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3475         if(s->msmpeg4_version >= 3)
3476             s->no_rounding=1;
3477         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3478                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3479     }
3480
3481     if(!s->umvplus){
3482         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3483             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3484
3485             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3486                 int a,b;
3487                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3488                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3489                 s->f_code= FFMAX3(s->f_code, a, b);
3490             }
3491
3492             ff_fix_long_p_mvs(s);
3493             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3494             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3495                 int j;
3496                 for(i=0; i<2; i++){
3497                     for(j=0; j<2; j++)
3498                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3499                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3500                 }
3501             }
3502         }
3503
3504         if(s->pict_type==AV_PICTURE_TYPE_B){
3505             int a, b;
3506
3507             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3508             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3509             s->f_code = FFMAX(a, b);
3510
3511             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3512             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3513             s->b_code = FFMAX(a, b);
3514
3515             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3516             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3517             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3518             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3519             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3520                 int dir, j;
3521                 for(dir=0; dir<2; dir++){
3522                     for(i=0; i<2; i++){
3523                         for(j=0; j<2; j++){
3524                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3525                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3526                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3527                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3528                         }
3529                     }
3530                 }
3531             }
3532         }
3533     }
3534
3535     if (estimate_qp(s, 0) < 0)
3536         return -1;
3537
3538     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3539         s->qscale= 3; //reduce clipping problems
3540
3541     if (s->out_format == FMT_MJPEG) {
3542         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3543         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3544
3545         if (s->avctx->intra_matrix) {
3546             chroma_matrix =
3547             luma_matrix = s->avctx->intra_matrix;
3548         }
3549         if (s->avctx->chroma_intra_matrix)
3550             chroma_matrix = s->avctx->chroma_intra_matrix;
3551
3552         /* for mjpeg, we do include qscale in the matrix */
3553         for(i=1;i<64;i++){
3554             int j= s->dsp.idct_permutation[i];
3555
3556             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3557             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3558         }
3559         s->y_dc_scale_table=
3560         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3561         s->chroma_intra_matrix[0] =
3562         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3563         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3564                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3565         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3566                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3567         s->qscale= 8;
3568     }
3569     if(s->codec_id == AV_CODEC_ID_AMV){
3570         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3571         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3572         for(i=1;i<64;i++){
3573             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3574
3575             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3576             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3577         }
3578         s->y_dc_scale_table= y;
3579         s->c_dc_scale_table= c;
3580         s->intra_matrix[0] = 13;
3581         s->chroma_intra_matrix[0] = 14;
3582         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3583                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3584         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3585                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3586         s->qscale= 8;
3587     }
3588
3589     //FIXME var duplication
3590     s->current_picture_ptr->f->key_frame =
3591     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3592     s->current_picture_ptr->f->pict_type =
3593     s->current_picture.f->pict_type = s->pict_type;
3594
3595     if (s->current_picture.f->key_frame)
3596         s->picture_in_gop_number=0;
3597
3598     s->mb_x = s->mb_y = 0;
3599     s->last_bits= put_bits_count(&s->pb);
3600     switch(s->out_format) {
3601     case FMT_MJPEG:
3602         if (CONFIG_MJPEG_ENCODER)
3603             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3604                                            s->intra_matrix, s->chroma_intra_matrix);
3605         break;
3606     case FMT_H261:
3607         if (CONFIG_H261_ENCODER)
3608             ff_h261_encode_picture_header(s, picture_number);
3609         break;
3610     case FMT_H263:
3611         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3612             ff_wmv2_encode_picture_header(s, picture_number);
3613         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3614             ff_msmpeg4_encode_picture_header(s, picture_number);
3615         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3616             ff_mpeg4_encode_picture_header(s, picture_number);
3617         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3618             ff_rv10_encode_picture_header(s, picture_number);
3619         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3620             ff_rv20_encode_picture_header(s, picture_number);
3621         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3622             ff_flv_encode_picture_header(s, picture_number);
3623         else if (CONFIG_H263_ENCODER)
3624             ff_h263_encode_picture_header(s, picture_number);
3625         break;
3626     case FMT_MPEG1:
3627         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3628             ff_mpeg1_encode_picture_header(s, picture_number);
3629         break;
3630     default:
3631         av_assert0(0);
3632     }
3633     bits= put_bits_count(&s->pb);
3634     s->header_bits= bits - s->last_bits;
3635
3636     for(i=1; i<context_count; i++){
3637         update_duplicate_context_after_me(s->thread_context[i], s);
3638     }
3639     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3640     for(i=1; i<context_count; i++){
3641         merge_context_after_encode(s, s->thread_context[i]);
3642     }
3643     emms_c();
3644     return 0;
3645 }
3646
3647 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3648     const int intra= s->mb_intra;
3649     int i;
3650
3651     s->dct_count[intra]++;
3652
3653     for(i=0; i<64; i++){
3654         int level= block[i];
3655
3656         if(level){
3657             if(level>0){
3658                 s->dct_error_sum[intra][i] += level;
3659                 level -= s->dct_offset[intra][i];
3660                 if(level<0) level=0;
3661             }else{
3662                 s->dct_error_sum[intra][i] -= level;
3663                 level += s->dct_offset[intra][i];
3664                 if(level>0) level=0;
3665             }
3666             block[i]= level;
3667         }
3668     }
3669 }
3670
3671 static int dct_quantize_trellis_c(MpegEncContext *s,
3672                                   int16_t *block, int n,
3673                                   int qscale, int *overflow){
3674     const int *qmat;
3675     const uint8_t *scantable= s->intra_scantable.scantable;
3676     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3677     int max=0;
3678     unsigned int threshold1, threshold2;
3679     int bias=0;
3680     int run_tab[65];
3681     int level_tab[65];
3682     int score_tab[65];
3683     int survivor[65];
3684     int survivor_count;
3685     int last_run=0;
3686     int last_level=0;
3687     int last_score= 0;
3688     int last_i;
3689     int coeff[2][64];
3690     int coeff_count[64];
3691     int qmul, qadd, start_i, last_non_zero, i, dc;
3692     const int esc_length= s->ac_esc_length;
3693     uint8_t * length;
3694     uint8_t * last_length;
3695     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3696
3697     s->dsp.fdct (block);
3698
3699     if(s->dct_error_sum)
3700         s->denoise_dct(s, block);
3701     qmul= qscale*16;
3702     qadd= ((qscale-1)|1)*8;
3703
3704     if (s->mb_intra) {
3705         int q;
3706         if (!s->h263_aic) {
3707             if (n < 4)
3708                 q = s->y_dc_scale;
3709             else
3710                 q = s->c_dc_scale;
3711             q = q << 3;
3712         } else{
3713             /* For AIC we skip quant/dequant of INTRADC */
3714             q = 1 << 3;
3715             qadd=0;
3716         }
3717
3718         /* note: block[0] is assumed to be positive */
3719         block[0] = (block[0] + (q >> 1)) / q;
3720         start_i = 1;
3721         last_non_zero = 0;
3722         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3723         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3724             bias= 1<<(QMAT_SHIFT-1);
3725         length     = s->intra_ac_vlc_length;
3726         last_length= s->intra_ac_vlc_last_length;
3727     } else {
3728         start_i = 0;
3729         last_non_zero = -1;
3730         qmat = s->q_inter_matrix[qscale];
3731         length     = s->inter_ac_vlc_length;
3732         last_length= s->inter_ac_vlc_last_length;
3733     }
3734     last_i= start_i;
3735
3736     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3737     threshold2= (threshold1<<1);
3738
3739     for(i=63; i>=start_i; i--) {
3740         const int j = scantable[i];
3741         int level = block[j] * qmat[j];
3742
3743         if(((unsigned)(level+threshold1))>threshold2){
3744             last_non_zero = i;
3745             break;
3746         }
3747     }
3748
3749     for(i=start_i; i<=last_non_zero; i++) {
3750         const int j = scantable[i];
3751         int level = block[j] * qmat[j];
3752
3753 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3754 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3755         if(((unsigned)(level+threshold1))>threshold2){
3756             if(level>0){
3757                 level= (bias + level)>>QMAT_SHIFT;
3758                 coeff[0][i]= level;
3759                 coeff[1][i]= level-1;
3760 //                coeff[2][k]= level-2;
3761             }else{
3762                 level= (bias - level)>>QMAT_SHIFT;
3763                 coeff[0][i]= -level;
3764                 coeff[1][i]= -level+1;
3765 //                coeff[2][k]= -level+2;
3766             }
3767             coeff_count[i]= FFMIN(level, 2);
3768             av_assert2(coeff_count[i]);
3769             max |=level;
3770         }else{
3771             coeff[0][i]= (level>>31)|1;
3772             coeff_count[i]= 1;
3773         }
3774     }
3775
3776     *overflow= s->max_qcoeff < max; //overflow might have happened
3777
3778     if(last_non_zero < start_i){
3779         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3780         return last_non_zero;
3781     }
3782
3783     score_tab[start_i]= 0;
3784     survivor[0]= start_i;
3785     survivor_count= 1;
3786
3787     for(i=start_i; i<=last_non_zero; i++){
3788         int level_index, j, zero_distortion;
3789         int dct_coeff= FFABS(block[ scantable[i] ]);
3790         int best_score=256*256*256*120;
3791
3792         if (s->dsp.fdct == ff_fdct_ifast)
3793             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3794         zero_distortion= dct_coeff*dct_coeff;
3795
3796         for(level_index=0; level_index < coeff_count[i]; level_index++){
3797             int distortion;
3798             int level= coeff[level_index][i];
3799             const int alevel= FFABS(level);
3800             int unquant_coeff;
3801
3802             av_assert2(level);
3803
3804             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3805                 unquant_coeff= alevel*qmul + qadd;
3806             }else{ //MPEG1
3807                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3808                 if(s->mb_intra){
3809                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3810                         unquant_coeff =   (unquant_coeff - 1) | 1;
3811                 }else{
3812                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3813                         unquant_coeff =   (unquant_coeff - 1) | 1;
3814                 }
3815                 unquant_coeff<<= 3;
3816             }
3817
3818             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3819             level+=64;
3820             if((level&(~127)) == 0){
3821                 for(j=survivor_count-1; j>=0; j--){
3822                     int run= i - survivor[j];
3823                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3824                     score += score_tab[i-run];
3825
3826                     if(score < best_score){
3827                         best_score= score;
3828                         run_tab[i+1]= run;
3829                         level_tab[i+1]= level-64;
3830                     }
3831                 }
3832
3833                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3834                     for(j=survivor_count-1; j>=0; j--){
3835                         int run= i - survivor[j];
3836                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3837                         score += score_tab[i-run];
3838                         if(score < last_score){
3839                             last_score= score;
3840                             last_run= run;
3841                             last_level= level-64;
3842                             last_i= i+1;
3843                         }
3844                     }
3845                 }
3846             }else{
3847                 distortion += esc_length*lambda;
3848                 for(j=survivor_count-1; j>=0; j--){
3849                     int run= i - survivor[j];
3850                     int score= distortion + score_tab[i-run];
3851
3852                     if(score < best_score){
3853                         best_score= score;
3854                         run_tab[i+1]= run;
3855                         level_tab[i+1]= level-64;
3856                     }
3857                 }
3858
3859                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3860                   for(j=survivor_count-1; j>=0; j--){
3861                         int run= i - survivor[j];
3862                         int score= distortion + score_tab[i-run];
3863                         if(score < last_score){
3864                             last_score= score;
3865                             last_run= run;
3866                             last_level= level-64;
3867                             last_i= i+1;
3868                         }
3869                     }
3870                 }
3871             }
3872         }
3873
3874         score_tab[i+1]= best_score;
3875
3876         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3877         if(last_non_zero <= 27){
3878             for(; survivor_count; survivor_count--){
3879                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3880                     break;
3881             }
3882         }else{
3883             for(; survivor_count; survivor_count--){
3884                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3885                     break;
3886             }
3887         }
3888
3889         survivor[ survivor_count++ ]= i+1;
3890     }
3891
3892     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3893         last_score= 256*256*256*120;
3894         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3895             int score= score_tab[i];
3896             if(i) score += lambda*2; //FIXME exacter?
3897
3898             if(score < last_score){
3899                 last_score= score;
3900                 last_i= i;
3901                 last_level= level_tab[i];
3902                 last_run= run_tab[i];
3903             }
3904         }
3905     }
3906
3907     s->coded_score[n] = last_score;
3908
3909     dc= FFABS(block[0]);
3910     last_non_zero= last_i - 1;
3911     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3912
3913     if(last_non_zero < start_i)
3914         return last_non_zero;
3915
3916     if(last_non_zero == 0 && start_i == 0){
3917         int best_level= 0;
3918         int best_score= dc * dc;
3919
3920         for(i=0; i<coeff_count[0]; i++){
3921             int level= coeff[i][0];
3922             int alevel= FFABS(level);
3923             int unquant_coeff, score, distortion;
3924
3925             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3926                     unquant_coeff= (alevel*qmul + qadd)>>3;
3927             }else{ //MPEG1
3928                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3929                     unquant_coeff =   (unquant_coeff - 1) | 1;
3930             }
3931             unquant_coeff = (unquant_coeff + 4) >> 3;
3932             unquant_coeff<<= 3 + 3;
3933
3934             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3935             level+=64;
3936             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3937             else                    score= distortion + esc_length*lambda;
3938
3939             if(score < best_score){
3940                 best_score= score;
3941                 best_level= level - 64;
3942             }
3943         }
3944         block[0]= best_level;
3945         s->coded_score[n] = best_score - dc*dc;
3946         if(best_level == 0) return -1;
3947         else                return last_non_zero;
3948     }
3949
3950     i= last_i;
3951     av_assert2(last_level);
3952
3953     block[ perm_scantable[last_non_zero] ]= last_level;
3954     i -= last_run + 1;
3955
3956     for(; i>start_i; i -= run_tab[i] + 1){
3957         block[ perm_scantable[i-1] ]= level_tab[i];
3958     }
3959
3960     return last_non_zero;
3961 }
3962
3963 //#define REFINE_STATS 1
3964 static int16_t basis[64][64];
3965
3966 static void build_basis(uint8_t *perm){
3967     int i, j, x, y;
3968     emms_c();
3969     for(i=0; i<8; i++){
3970         for(j=0; j<8; j++){
3971             for(y=0; y<8; y++){
3972                 for(x=0; x<8; x++){
3973                     double s= 0.25*(1<<BASIS_SHIFT);
3974                     int index= 8*i + j;
3975                     int perm_index= perm[index];
3976                     if(i==0) s*= sqrt(0.5);
3977                     if(j==0) s*= sqrt(0.5);
3978                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3979                 }
3980             }
3981         }
3982     }
3983 }
3984
3985 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3986                         int16_t *block, int16_t *weight, int16_t *orig,
3987                         int n, int qscale){
3988     int16_t rem[64];
3989     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3990     const uint8_t *scantable= s->intra_scantable.scantable;
3991     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3992 //    unsigned int threshold1, threshold2;
3993 //    int bias=0;
3994     int run_tab[65];
3995     int prev_run=0;
3996     int prev_level=0;
3997     int qmul, qadd, start_i, last_non_zero, i, dc;
3998     uint8_t * length;
3999     uint8_t * last_length;
4000     int lambda;
4001     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4002 #ifdef REFINE_STATS
4003 static int count=0;
4004 static int after_last=0;
4005 static int to_zero=0;
4006 static int from_zero=0;
4007 static int raise=0;
4008 static int lower=0;
4009 static int messed_sign=0;
4010 #endif
4011
4012     if(basis[0][0] == 0)
4013         build_basis(s->dsp.idct_permutation);
4014
4015     qmul= qscale*2;
4016     qadd= (qscale-1)|1;
4017     if (s->mb_intra) {
4018         if (!s->h263_aic) {
4019             if (n < 4)
4020                 q = s->y_dc_scale;
4021             else
4022                 q = s->c_dc_scale;
4023         } else{
4024             /* For AIC we skip quant/dequant of INTRADC */
4025             q = 1;
4026             qadd=0;
4027         }
4028         q <<= RECON_SHIFT-3;
4029         /* note: block[0] is assumed to be positive */
4030         dc= block[0]*q;
4031 //        block[0] = (block[0] + (q >> 1)) / q;
4032         start_i = 1;
4033 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4034 //            bias= 1<<(QMAT_SHIFT-1);
4035         length     = s->intra_ac_vlc_length;
4036         last_length= s->intra_ac_vlc_last_length;
4037     } else {
4038         dc= 0;
4039         start_i = 0;
4040         length     = s->inter_ac_vlc_length;
4041         last_length= s->inter_ac_vlc_last_length;
4042     }
4043     last_non_zero = s->block_last_index[n];
4044
4045 #ifdef REFINE_STATS
4046 {START_TIMER
4047 #endif
4048     dc += (1<<(RECON_SHIFT-1));
4049     for(i=0; i<64; i++){
4050         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4051     }
4052 #ifdef REFINE_STATS
4053 STOP_TIMER("memset rem[]")}
4054 #endif
4055     sum=0;
4056     for(i=0; i<64; i++){
4057         int one= 36;
4058         int qns=4;
4059         int w;
4060
4061         w= FFABS(weight[i]) + qns*one;
4062         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4063
4064         weight[i] = w;
4065 //        w=weight[i] = (63*qns + (w/2)) / w;
4066
4067         av_assert2(w>0);
4068         av_assert2(w<(1<<6));
4069         sum += w*w;
4070     }
4071     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4072 #ifdef REFINE_STATS
4073 {START_TIMER
4074 #endif
4075     run=0;
4076     rle_index=0;
4077     for(i=start_i; i<=last_non_zero; i++){
4078         int j= perm_scantable[i];
4079         const int level= block[j];
4080         int coeff;
4081
4082         if(level){
4083             if(level<0) coeff= qmul*level - qadd;
4084             else        coeff= qmul*level + qadd;
4085             run_tab[rle_index++]=run;
4086             run=0;
4087
4088             s->dsp.add_8x8basis(rem, basis[j], coeff);
4089         }else{
4090             run++;
4091         }
4092     }
4093 #ifdef REFINE_STATS
4094 if(last_non_zero>0){
4095 STOP_TIMER("init rem[]")
4096 }
4097 }
4098
4099 {START_TIMER
4100 #endif
4101     for(;;){
4102         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
4103         int best_coeff=0;
4104         int best_change=0;
4105         int run2, best_unquant_change=0, analyze_gradient;
4106 #ifdef REFINE_STATS
4107 {START_TIMER
4108 #endif
4109         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4110
4111         if(analyze_gradient){
4112 #ifdef REFINE_STATS
4113 {START_TIMER
4114 #endif
4115             for(i=0; i<64; i++){
4116                 int w= weight[i];
4117
4118                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4119             }
4120 #ifdef REFINE_STATS
4121 STOP_TIMER("rem*w*w")}
4122 {START_TIMER
4123 #endif
4124             s->dsp.fdct(d1);
4125 #ifdef REFINE_STATS
4126 STOP_TIMER("dct")}
4127 #endif
4128         }
4129
4130         if(start_i){
4131             const int level= block[0];
4132             int change, old_coeff;
4133
4134             av_assert2(s->mb_intra);
4135
4136             old_coeff= q*level;
4137
4138             for(change=-1; change<=1; change+=2){
4139                 int new_level= level + change;
4140                 int score, new_coeff;
4141
4142                 new_coeff= q*new_level;
4143                 if(new_coeff >= 2048 || new_coeff < 0)
4144                     continue;
4145
4146                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
4147                 if(score<best_score){
4148                     best_score= score;
4149                     best_coeff= 0;
4150                     best_change= change;
4151                     best_unquant_change= new_coeff - old_coeff;
4152                 }
4153             }
4154         }
4155
4156         run=0;
4157         rle_index=0;
4158         run2= run_tab[rle_index++];
4159         prev_level=0;
4160         prev_run=0;
4161
4162         for(i=start_i; i<64; i++){
4163             int j= perm_scantable[i];
4164             const int level= block[j];
4165             int change, old_coeff;
4166
4167             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4168                 break;
4169
4170             if(level){
4171                 if(level<0) old_coeff= qmul*level - qadd;
4172                 else        old_coeff= qmul*level + qadd;
4173                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4174             }else{
4175                 old_coeff=0;
4176                 run2--;
4177                 av_assert2(run2>=0 || i >= last_non_zero );
4178             }
4179
4180             for(change=-1; change<=1; change+=2){
4181                 int new_level= level + change;
4182                 int score, new_coeff, unquant_change;
4183
4184                 score=0;
4185                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4186                    continue;
4187
4188                 if(new_level){
4189                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4190                     else            new_coeff= qmul*new_level + qadd;
4191                     if(new_coeff >= 2048 || new_coeff <= -2048)
4192                         continue;
4193                     //FIXME check for overflow
4194
4195                     if(level){
4196                         if(level < 63 && level > -63){
4197                             if(i < last_non_zero)
4198                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4199                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4200                             else
4201                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4202                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4203                         }
4204                     }else{
4205                         av_assert2(FFABS(new_level)==1);
4206
4207                         if(analyze_gradient){
4208                             int g= d1[ scantable[i] ];
4209                             if(g && (g^new_level) >= 0)
4210                                 continue;
4211                         }
4212
4213                         if(i < last_non_zero){
4214                             int next_i= i + run2 + 1;
4215                             int next_level= block[ perm_scantable[next_i] ] + 64;
4216
4217                             if(next_level&(~127))
4218                                 next_level= 0;
4219
4220                             if(next_i < last_non_zero)
4221                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4222                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4223                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4224                             else
4225                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4226                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4227                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4228                         }else{
4229                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4230                             if(prev_level){
4231                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4232                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4233                             }
4234                         }
4235                     }
4236                 }else{
4237                     new_coeff=0;
4238                     av_assert2(FFABS(level)==1);
4239
4240                     if(i < last_non_zero){
4241                         int next_i= i + run2 + 1;
4242                         int next_level= block[ perm_scantable[next_i] ] + 64;
4243
4244                         if(next_level&(~127))
4245                             next_level= 0;
4246
4247                         if(next_i < last_non_zero)
4248                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4249                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4250                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4251                         else
4252                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4253                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4254                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4255                     }else{
4256                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4257                         if(prev_level){
4258                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4259                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4260                         }
4261                     }
4262                 }
4263
4264                 score *= lambda;
4265
4266                 unquant_change= new_coeff - old_coeff;
4267                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4268
4269                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4270                 if(score<best_score){
4271                     best_score= score;
4272                     best_coeff= i;
4273                     best_change= change;
4274                     best_unquant_change= unquant_change;
4275                 }
4276             }
4277             if(level){
4278                 prev_level= level + 64;
4279                 if(prev_level&(~127))
4280                     prev_level= 0;
4281                 prev_run= run;
4282                 run=0;
4283             }else{
4284                 run++;
4285             }
4286         }
4287 #ifdef REFINE_STATS
4288 STOP_TIMER("iterative step")}
4289 #endif
4290
4291         if(best_change){
4292             int j= perm_scantable[ best_coeff ];
4293
4294             block[j] += best_change;
4295
4296             if(best_coeff > last_non_zero){
4297                 last_non_zero= best_coeff;
4298                 av_assert2(block[j]);
4299 #ifdef REFINE_STATS
4300 after_last++;
4301 #endif
4302             }else{
4303 #ifdef REFINE_STATS
4304 if(block[j]){
4305     if(block[j] - best_change){
4306         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4307             raise++;
4308         }else{
4309             lower++;
4310         }
4311     }else{
4312         from_zero++;
4313     }
4314 }else{
4315     to_zero++;
4316 }
4317 #endif
4318                 for(; last_non_zero>=start_i; last_non_zero--){
4319                     if(block[perm_scantable[last_non_zero]])
4320                         break;
4321                 }
4322             }
4323 #ifdef REFINE_STATS
4324 count++;
4325 if(256*256*256*64 % count == 0){
4326     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4327 }
4328 #endif
4329             run=0;
4330             rle_index=0;
4331             for(i=start_i; i<=last_non_zero; i++){
4332                 int j= perm_scantable[i];
4333                 const int level= block[j];
4334
4335                  if(level){
4336                      run_tab[rle_index++]=run;
4337                      run=0;
4338                  }else{
4339                      run++;
4340                  }
4341             }
4342
4343             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4344         }else{
4345             break;
4346         }
4347     }
4348 #ifdef REFINE_STATS
4349 if(last_non_zero>0){
4350 STOP_TIMER("iterative search")
4351 }
4352 }
4353 #endif
4354
4355     return last_non_zero;
4356 }
4357
4358 int ff_dct_quantize_c(MpegEncContext *s,
4359                         int16_t *block, int n,
4360                         int qscale, int *overflow)
4361 {
4362     int i, j, level, last_non_zero, q, start_i;
4363     const int *qmat;
4364     const uint8_t *scantable= s->intra_scantable.scantable;
4365     int bias;
4366     int max=0;
4367     unsigned int threshold1, threshold2;
4368
4369     s->dsp.fdct (block);
4370
4371     if(s->dct_error_sum)
4372         s->denoise_dct(s, block);
4373
4374     if (s->mb_intra) {
4375         if (!s->h263_aic) {
4376             if (n < 4)
4377                 q = s->y_dc_scale;
4378             else
4379                 q = s->c_dc_scale;
4380             q = q << 3;
4381         } else
4382             /* For AIC we skip quant/dequant of INTRADC */
4383             q = 1 << 3;
4384
4385         /* note: block[0] is assumed to be positive */
4386         block[0] = (block[0] + (q >> 1)) / q;
4387         start_i = 1;
4388         last_non_zero = 0;
4389         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4390         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4391     } else {
4392         start_i = 0;
4393         last_non_zero = -1;
4394         qmat = s->q_inter_matrix[qscale];
4395         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4396     }
4397     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4398     threshold2= (threshold1<<1);
4399     for(i=63;i>=start_i;i--) {
4400         j = scantable[i];
4401         level = block[j] * qmat[j];
4402
4403         if(((unsigned)(level+threshold1))>threshold2){
4404             last_non_zero = i;
4405             break;
4406         }else{
4407             block[j]=0;
4408         }
4409     }
4410     for(i=start_i; i<=last_non_zero; i++) {
4411         j = scantable[i];
4412         level = block[j] * qmat[j];
4413
4414 //        if(   bias+level >= (1<<QMAT_SHIFT)
4415 //           || bias-level >= (1<<QMAT_SHIFT)){
4416         if(((unsigned)(level+threshold1))>threshold2){
4417             if(level>0){
4418                 level= (bias + level)>>QMAT_SHIFT;
4419                 block[j]= level;
4420             }else{
4421                 level= (bias - level)>>QMAT_SHIFT;
4422                 block[j]= -level;
4423             }
4424             max |=level;
4425         }else{
4426             block[j]=0;
4427         }
4428     }
4429     *overflow= s->max_qcoeff < max; //overflow might have happened
4430
4431     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4432     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4433         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4434
4435     return last_non_zero;
4436 }
4437
4438 #define OFFSET(x) offsetof(MpegEncContext, x)
4439 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4440 static const AVOption h263_options[] = {
4441     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4442     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4443     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4444     FF_MPV_COMMON_OPTS
4445     { NULL },
4446 };
4447
4448 static const AVClass h263_class = {
4449     .class_name = "H.263 encoder",
4450     .item_name  = av_default_item_name,
4451     .option     = h263_options,
4452     .version    = LIBAVUTIL_VERSION_INT,
4453 };
4454
4455 AVCodec ff_h263_encoder = {
4456     .name           = "h263",
4457     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4458     .type           = AVMEDIA_TYPE_VIDEO,
4459     .id             = AV_CODEC_ID_H263,
4460     .priv_data_size = sizeof(MpegEncContext),
4461     .init           = ff_MPV_encode_init,
4462     .encode2        = ff_MPV_encode_picture,
4463     .close          = ff_MPV_encode_end,
4464     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4465     .priv_class     = &h263_class,
4466 };
4467
4468 static const AVOption h263p_options[] = {
4469     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4470     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4471     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4472     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4473     FF_MPV_COMMON_OPTS
4474     { NULL },
4475 };
4476 static const AVClass h263p_class = {
4477     .class_name = "H.263p encoder",
4478     .item_name  = av_default_item_name,
4479     .option     = h263p_options,
4480     .version    = LIBAVUTIL_VERSION_INT,
4481 };
4482
4483 AVCodec ff_h263p_encoder = {
4484     .name           = "h263p",
4485     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4486     .type           = AVMEDIA_TYPE_VIDEO,
4487     .id             = AV_CODEC_ID_H263P,
4488     .priv_data_size = sizeof(MpegEncContext),
4489     .init           = ff_MPV_encode_init,
4490     .encode2        = ff_MPV_encode_picture,
4491     .close          = ff_MPV_encode_end,
4492     .capabilities   = CODEC_CAP_SLICE_THREADS,
4493     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4494     .priv_class     = &h263p_class,
4495 };
4496
4497 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4498
4499 AVCodec ff_msmpeg4v2_encoder = {
4500     .name           = "msmpeg4v2",
4501     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4502     .type           = AVMEDIA_TYPE_VIDEO,
4503     .id             = AV_CODEC_ID_MSMPEG4V2,
4504     .priv_data_size = sizeof(MpegEncContext),
4505     .init           = ff_MPV_encode_init,
4506     .encode2        = ff_MPV_encode_picture,
4507     .close          = ff_MPV_encode_end,
4508     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4509     .priv_class     = &msmpeg4v2_class,
4510 };
4511
4512 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4513
4514 AVCodec ff_msmpeg4v3_encoder = {
4515     .name           = "msmpeg4",
4516     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4517     .type           = AVMEDIA_TYPE_VIDEO,
4518     .id             = AV_CODEC_ID_MSMPEG4V3,
4519     .priv_data_size = sizeof(MpegEncContext),
4520     .init           = ff_MPV_encode_init,
4521     .encode2        = ff_MPV_encode_picture,
4522     .close          = ff_MPV_encode_end,
4523     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4524     .priv_class     = &msmpeg4v3_class,
4525 };
4526
4527 FF_MPV_GENERIC_CLASS(wmv1)
4528
4529 AVCodec ff_wmv1_encoder = {
4530     .name           = "wmv1",
4531     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4532     .type           = AVMEDIA_TYPE_VIDEO,
4533     .id             = AV_CODEC_ID_WMV1,
4534     .priv_data_size = sizeof(MpegEncContext),
4535     .init           = ff_MPV_encode_init,
4536     .encode2        = ff_MPV_encode_picture,
4537     .close          = ff_MPV_encode_end,
4538     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4539     .priv_class     = &wmv1_class,
4540 };