]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
avcodec/pthread_slice: Use av_malloc(z)_array()
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "dsputil.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mathops.h"
46 #include "mpegutils.h"
47 #include "mjpegenc.h"
48 #include "msmpeg4.h"
49 #include "qpeldsp.h"
50 #include "faandct.h"
51 #include "thread.h"
52 #include "aandcttab.h"
53 #include "flv.h"
54 #include "mpeg4video.h"
55 #include "internal.h"
56 #include "bytestream.h"
57 #include <limits.h>
58 #include "sp5x.h"
59
60 static int encode_picture(MpegEncContext *s, int picture_number);
61 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
62 static int sse_mb(MpegEncContext *s);
63 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
64 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
65
66 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
67 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
68
69 const AVOption ff_mpv_generic_options[] = {
70     FF_MPV_COMMON_OPTS
71     { NULL },
72 };
73
74 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
75                        uint16_t (*qmat16)[2][64],
76                        const uint16_t *quant_matrix,
77                        int bias, int qmin, int qmax, int intra)
78 {
79     int qscale;
80     int shift = 0;
81
82     for (qscale = qmin; qscale <= qmax; qscale++) {
83         int i;
84         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
85             dsp->fdct == ff_jpeg_fdct_islow_10 ||
86             dsp->fdct == ff_faandct) {
87             for (i = 0; i < 64; i++) {
88                 const int j = dsp->idct_permutation[i];
89                 /* 16 <= qscale * quant_matrix[i] <= 7905
90                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
91                  *             19952 <=              x  <= 249205026
92                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
93                  *           3444240 >= (1 << 36) / (x) >= 275 */
94
95                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
96                                         (qscale * quant_matrix[j]));
97             }
98         } else if (dsp->fdct == ff_fdct_ifast) {
99             for (i = 0; i < 64; i++) {
100                 const int j = dsp->idct_permutation[i];
101                 /* 16 <= qscale * quant_matrix[i] <= 7905
102                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
103                  *             19952 <=              x  <= 249205026
104                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
105                  *           3444240 >= (1 << 36) / (x) >= 275 */
106
107                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
108                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
109             }
110         } else {
111             for (i = 0; i < 64; i++) {
112                 const int j = dsp->idct_permutation[i];
113                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
114                  * Assume x = qscale * quant_matrix[i]
115                  * So             16 <=              x  <= 7905
116                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
117                  * so          32768 >= (1 << 19) / (x) >= 67 */
118                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
119                                         (qscale * quant_matrix[j]));
120                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
121                 //                    (qscale * quant_matrix[i]);
122                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
123                                        (qscale * quant_matrix[j]);
124
125                 if (qmat16[qscale][0][i] == 0 ||
126                     qmat16[qscale][0][i] == 128 * 256)
127                     qmat16[qscale][0][i] = 128 * 256 - 1;
128                 qmat16[qscale][1][i] =
129                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
130                                 qmat16[qscale][0][i]);
131             }
132         }
133
134         for (i = intra; i < 64; i++) {
135             int64_t max = 8191;
136             if (dsp->fdct == ff_fdct_ifast) {
137                 max = (8191LL * ff_aanscales[i]) >> 14;
138             }
139             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
140                 shift++;
141             }
142         }
143     }
144     if (shift) {
145         av_log(NULL, AV_LOG_INFO,
146                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
147                QMAT_SHIFT - shift);
148     }
149 }
150
151 static inline void update_qscale(MpegEncContext *s)
152 {
153     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
154                 (FF_LAMBDA_SHIFT + 7);
155     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
156
157     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
158                  FF_LAMBDA_SHIFT;
159 }
160
161 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
162 {
163     int i;
164
165     if (matrix) {
166         put_bits(pb, 1, 1);
167         for (i = 0; i < 64; i++) {
168             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
169         }
170     } else
171         put_bits(pb, 1, 0);
172 }
173
174 /**
175  * init s->current_picture.qscale_table from s->lambda_table
176  */
177 void ff_init_qscale_tab(MpegEncContext *s)
178 {
179     int8_t * const qscale_table = s->current_picture.qscale_table;
180     int i;
181
182     for (i = 0; i < s->mb_num; i++) {
183         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
184         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
185         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
186                                                   s->avctx->qmax);
187     }
188 }
189
190 static void update_duplicate_context_after_me(MpegEncContext *dst,
191                                               MpegEncContext *src)
192 {
193 #define COPY(a) dst->a= src->a
194     COPY(pict_type);
195     COPY(current_picture);
196     COPY(f_code);
197     COPY(b_code);
198     COPY(qscale);
199     COPY(lambda);
200     COPY(lambda2);
201     COPY(picture_in_gop_number);
202     COPY(gop_picture_number);
203     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
204     COPY(progressive_frame);    // FIXME don't set in encode_header
205     COPY(partitioned_frame);    // FIXME don't set in encode_header
206 #undef COPY
207 }
208
209 /**
210  * Set the given MpegEncContext to defaults for encoding.
211  * the changed fields will not depend upon the prior state of the MpegEncContext.
212  */
213 static void MPV_encode_defaults(MpegEncContext *s)
214 {
215     int i;
216     ff_MPV_common_defaults(s);
217
218     for (i = -16; i < 16; i++) {
219         default_fcode_tab[i + MAX_MV] = 1;
220     }
221     s->me.mv_penalty = default_mv_penalty;
222     s->fcode_tab     = default_fcode_tab;
223
224     s->input_picture_number  = 0;
225     s->picture_in_gop_number = 0;
226 }
227
228 av_cold int ff_dct_encode_init(MpegEncContext *s) {
229     if (ARCH_X86)
230         ff_dct_encode_init_x86(s);
231
232     if (CONFIG_H263_ENCODER)
233         ff_h263dsp_init(&s->h263dsp);
234     if (!s->dct_quantize)
235         s->dct_quantize = ff_dct_quantize_c;
236     if (!s->denoise_dct)
237         s->denoise_dct  = denoise_dct_c;
238     s->fast_dct_quantize = s->dct_quantize;
239     if (s->avctx->trellis)
240         s->dct_quantize  = dct_quantize_trellis_c;
241
242     return 0;
243 }
244
245 /* init video encoder */
246 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
247 {
248     MpegEncContext *s = avctx->priv_data;
249     int i, ret;
250
251     MPV_encode_defaults(s);
252
253     switch (avctx->codec_id) {
254     case AV_CODEC_ID_MPEG2VIDEO:
255         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
256             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
257             av_log(avctx, AV_LOG_ERROR,
258                    "only YUV420 and YUV422 are supported\n");
259             return -1;
260         }
261         break;
262     case AV_CODEC_ID_MJPEG:
263     case AV_CODEC_ID_AMV:
264         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
265             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
266             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
267             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
268               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
269               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
270              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
271             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
272             return -1;
273         }
274         break;
275     default:
276         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
277             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
278             return -1;
279         }
280     }
281
282     switch (avctx->pix_fmt) {
283     case AV_PIX_FMT_YUVJ444P:
284     case AV_PIX_FMT_YUV444P:
285         s->chroma_format = CHROMA_444;
286         break;
287     case AV_PIX_FMT_YUVJ422P:
288     case AV_PIX_FMT_YUV422P:
289         s->chroma_format = CHROMA_422;
290         break;
291     case AV_PIX_FMT_YUVJ420P:
292     case AV_PIX_FMT_YUV420P:
293     default:
294         s->chroma_format = CHROMA_420;
295         break;
296     }
297
298     s->bit_rate = avctx->bit_rate;
299     s->width    = avctx->width;
300     s->height   = avctx->height;
301     if (avctx->gop_size > 600 &&
302         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
303         av_log(avctx, AV_LOG_WARNING,
304                "keyframe interval too large!, reducing it from %d to %d\n",
305                avctx->gop_size, 600);
306         avctx->gop_size = 600;
307     }
308     s->gop_size     = avctx->gop_size;
309     s->avctx        = avctx;
310     s->flags        = avctx->flags;
311     s->flags2       = avctx->flags2;
312     if (avctx->max_b_frames > MAX_B_FRAMES) {
313         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
314                "is %d.\n", MAX_B_FRAMES);
315         avctx->max_b_frames = MAX_B_FRAMES;
316     }
317     s->max_b_frames = avctx->max_b_frames;
318     s->codec_id     = avctx->codec->id;
319     s->strict_std_compliance = avctx->strict_std_compliance;
320     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
321     s->mpeg_quant         = avctx->mpeg_quant;
322     s->rtp_mode           = !!avctx->rtp_payload_size;
323     s->intra_dc_precision = avctx->intra_dc_precision;
324     s->user_specified_pts = AV_NOPTS_VALUE;
325
326     if (s->gop_size <= 1) {
327         s->intra_only = 1;
328         s->gop_size   = 12;
329     } else {
330         s->intra_only = 0;
331     }
332
333     s->me_method = avctx->me_method;
334
335     /* Fixed QSCALE */
336     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
337
338     s->adaptive_quant = (s->avctx->lumi_masking ||
339                          s->avctx->dark_masking ||
340                          s->avctx->temporal_cplx_masking ||
341                          s->avctx->spatial_cplx_masking  ||
342                          s->avctx->p_masking      ||
343                          s->avctx->border_masking ||
344                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
345                         !s->fixed_qscale;
346
347     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
348
349     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
350         switch(avctx->codec_id) {
351         case AV_CODEC_ID_MPEG1VIDEO:
352         case AV_CODEC_ID_MPEG2VIDEO:
353             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
354             break;
355         case AV_CODEC_ID_MPEG4:
356         case AV_CODEC_ID_MSMPEG4V1:
357         case AV_CODEC_ID_MSMPEG4V2:
358         case AV_CODEC_ID_MSMPEG4V3:
359             if       (avctx->rc_max_rate >= 15000000) {
360                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
361             } else if(avctx->rc_max_rate >=  2000000) {
362                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
363             } else if(avctx->rc_max_rate >=   384000) {
364                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
365             } else
366                 avctx->rc_buffer_size = 40;
367             avctx->rc_buffer_size *= 16384;
368             break;
369         }
370         if (avctx->rc_buffer_size) {
371             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
372         }
373     }
374
375     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
376         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
377         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
378             return -1;
379     }
380
381     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
382         av_log(avctx, AV_LOG_INFO,
383                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
384     }
385
386     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
387         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
388         return -1;
389     }
390
391     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
392         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
393         return -1;
394     }
395
396     if (avctx->rc_max_rate &&
397         avctx->rc_max_rate == avctx->bit_rate &&
398         avctx->rc_max_rate != avctx->rc_min_rate) {
399         av_log(avctx, AV_LOG_INFO,
400                "impossible bitrate constraints, this will fail\n");
401     }
402
403     if (avctx->rc_buffer_size &&
404         avctx->bit_rate * (int64_t)avctx->time_base.num >
405             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
406         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
407         return -1;
408     }
409
410     if (!s->fixed_qscale &&
411         avctx->bit_rate * av_q2d(avctx->time_base) >
412             avctx->bit_rate_tolerance) {
413         av_log(avctx, AV_LOG_WARNING,
414                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
415         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
416     }
417
418     if (s->avctx->rc_max_rate &&
419         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
420         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
421          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
422         90000LL * (avctx->rc_buffer_size - 1) >
423             s->avctx->rc_max_rate * 0xFFFFLL) {
424         av_log(avctx, AV_LOG_INFO,
425                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
426                "specified vbv buffer is too large for the given bitrate!\n");
427     }
428
429     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
430         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
431         s->codec_id != AV_CODEC_ID_FLV1) {
432         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
433         return -1;
434     }
435
436     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
437         av_log(avctx, AV_LOG_ERROR,
438                "OBMC is only supported with simple mb decision\n");
439         return -1;
440     }
441
442     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
443         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
444         return -1;
445     }
446
447     if (s->max_b_frames                    &&
448         s->codec_id != AV_CODEC_ID_MPEG4      &&
449         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
450         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
451         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
452         return -1;
453     }
454     if (s->max_b_frames < 0) {
455         av_log(avctx, AV_LOG_ERROR,
456                "max b frames must be 0 or positive for mpegvideo based encoders\n");
457         return -1;
458     }
459
460     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
461          s->codec_id == AV_CODEC_ID_H263  ||
462          s->codec_id == AV_CODEC_ID_H263P) &&
463         (avctx->sample_aspect_ratio.num > 255 ||
464          avctx->sample_aspect_ratio.den > 255)) {
465         av_log(avctx, AV_LOG_WARNING,
466                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
467                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
468         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
469                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
470     }
471
472     if ((s->codec_id == AV_CODEC_ID_H263  ||
473          s->codec_id == AV_CODEC_ID_H263P) &&
474         (avctx->width  > 2048 ||
475          avctx->height > 1152 )) {
476         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
477         return -1;
478     }
479     if ((s->codec_id == AV_CODEC_ID_H263  ||
480          s->codec_id == AV_CODEC_ID_H263P) &&
481         ((avctx->width &3) ||
482          (avctx->height&3) )) {
483         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
484         return -1;
485     }
486
487     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
488         (avctx->width  > 4095 ||
489          avctx->height > 4095 )) {
490         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
491         return -1;
492     }
493
494     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
495         (avctx->width  > 16383 ||
496          avctx->height > 16383 )) {
497         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
498         return -1;
499     }
500
501     if (s->codec_id == AV_CODEC_ID_RV10 &&
502         (avctx->width &15 ||
503          avctx->height&15 )) {
504         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
505         return AVERROR(EINVAL);
506     }
507
508     if (s->codec_id == AV_CODEC_ID_RV20 &&
509         (avctx->width &3 ||
510          avctx->height&3 )) {
511         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
512         return AVERROR(EINVAL);
513     }
514
515     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
516          s->codec_id == AV_CODEC_ID_WMV2) &&
517          avctx->width & 1) {
518          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
519          return -1;
520     }
521
522     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
523         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
524         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
525         return -1;
526     }
527
528     // FIXME mpeg2 uses that too
529     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
530                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
531         av_log(avctx, AV_LOG_ERROR,
532                "mpeg2 style quantization not supported by codec\n");
533         return -1;
534     }
535
536     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
537         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
538         return -1;
539     }
540
541     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
542         s->avctx->mb_decision != FF_MB_DECISION_RD) {
543         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
544         return -1;
545     }
546
547     if (s->avctx->scenechange_threshold < 1000000000 &&
548         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
549         av_log(avctx, AV_LOG_ERROR,
550                "closed gop with scene change detection are not supported yet, "
551                "set threshold to 1000000000\n");
552         return -1;
553     }
554
555     if (s->flags & CODEC_FLAG_LOW_DELAY) {
556         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
557             av_log(avctx, AV_LOG_ERROR,
558                   "low delay forcing is only available for mpeg2\n");
559             return -1;
560         }
561         if (s->max_b_frames != 0) {
562             av_log(avctx, AV_LOG_ERROR,
563                    "b frames cannot be used with low delay\n");
564             return -1;
565         }
566     }
567
568     if (s->q_scale_type == 1) {
569         if (avctx->qmax > 12) {
570             av_log(avctx, AV_LOG_ERROR,
571                    "non linear quant only supports qmax <= 12 currently\n");
572             return -1;
573         }
574     }
575
576     if (s->avctx->thread_count > 1         &&
577         s->codec_id != AV_CODEC_ID_MPEG4      &&
578         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
579         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
580         s->codec_id != AV_CODEC_ID_MJPEG      &&
581         (s->codec_id != AV_CODEC_ID_H263P)) {
582         av_log(avctx, AV_LOG_ERROR,
583                "multi threaded encoding not supported by codec\n");
584         return -1;
585     }
586
587     if (s->avctx->thread_count < 1) {
588         av_log(avctx, AV_LOG_ERROR,
589                "automatic thread number detection not supported by codec, "
590                "patch welcome\n");
591         return -1;
592     }
593
594     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
595         s->rtp_mode = 1;
596
597     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
598         s->h263_slice_structured = 1;
599
600     if (!avctx->time_base.den || !avctx->time_base.num) {
601         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
602         return -1;
603     }
604
605     i = (INT_MAX / 2 + 128) >> 8;
606     if (avctx->mb_threshold >= i) {
607         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
608                i - 1);
609         return -1;
610     }
611
612     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
613         av_log(avctx, AV_LOG_INFO,
614                "notice: b_frame_strategy only affects the first pass\n");
615         avctx->b_frame_strategy = 0;
616     }
617
618     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
619     if (i > 1) {
620         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
621         avctx->time_base.den /= i;
622         avctx->time_base.num /= i;
623         //return -1;
624     }
625
626     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
627         // (a + x * 3 / 8) / x
628         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
629         s->inter_quant_bias = 0;
630     } else {
631         s->intra_quant_bias = 0;
632         // (a - x / 4) / x
633         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
634     }
635
636     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
637         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
638         return AVERROR(EINVAL);
639     }
640
641     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
642         s->intra_quant_bias = avctx->intra_quant_bias;
643     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
644         s->inter_quant_bias = avctx->inter_quant_bias;
645
646     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
647
648     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
649         s->avctx->time_base.den > (1 << 16) - 1) {
650         av_log(avctx, AV_LOG_ERROR,
651                "timebase %d/%d not supported by MPEG 4 standard, "
652                "the maximum admitted value for the timebase denominator "
653                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
654                (1 << 16) - 1);
655         return -1;
656     }
657     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
658
659     switch (avctx->codec->id) {
660     case AV_CODEC_ID_MPEG1VIDEO:
661         s->out_format = FMT_MPEG1;
662         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
663         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
664         break;
665     case AV_CODEC_ID_MPEG2VIDEO:
666         s->out_format = FMT_MPEG1;
667         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
668         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
669         s->rtp_mode   = 1;
670         break;
671     case AV_CODEC_ID_MJPEG:
672     case AV_CODEC_ID_AMV:
673         s->out_format = FMT_MJPEG;
674         s->intra_only = 1; /* force intra only for jpeg */
675         if (!CONFIG_MJPEG_ENCODER ||
676             ff_mjpeg_encode_init(s) < 0)
677             return -1;
678         avctx->delay = 0;
679         s->low_delay = 1;
680         break;
681     case AV_CODEC_ID_H261:
682         if (!CONFIG_H261_ENCODER)
683             return -1;
684         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
685             av_log(avctx, AV_LOG_ERROR,
686                    "The specified picture size of %dx%d is not valid for the "
687                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
688                     s->width, s->height);
689             return -1;
690         }
691         s->out_format = FMT_H261;
692         avctx->delay  = 0;
693         s->low_delay  = 1;
694         break;
695     case AV_CODEC_ID_H263:
696         if (!CONFIG_H263_ENCODER)
697             return -1;
698         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
699                              s->width, s->height) == 8) {
700             av_log(avctx, AV_LOG_ERROR,
701                    "The specified picture size of %dx%d is not valid for "
702                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
703                    "352x288, 704x576, and 1408x1152. "
704                    "Try H.263+.\n", s->width, s->height);
705             return -1;
706         }
707         s->out_format = FMT_H263;
708         avctx->delay  = 0;
709         s->low_delay  = 1;
710         break;
711     case AV_CODEC_ID_H263P:
712         s->out_format = FMT_H263;
713         s->h263_plus  = 1;
714         /* Fx */
715         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
716         s->modified_quant  = s->h263_aic;
717         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
718         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
719
720         /* /Fx */
721         /* These are just to be sure */
722         avctx->delay = 0;
723         s->low_delay = 1;
724         break;
725     case AV_CODEC_ID_FLV1:
726         s->out_format      = FMT_H263;
727         s->h263_flv        = 2; /* format = 1; 11-bit codes */
728         s->unrestricted_mv = 1;
729         s->rtp_mode  = 0; /* don't allow GOB */
730         avctx->delay = 0;
731         s->low_delay = 1;
732         break;
733     case AV_CODEC_ID_RV10:
734         s->out_format = FMT_H263;
735         avctx->delay  = 0;
736         s->low_delay  = 1;
737         break;
738     case AV_CODEC_ID_RV20:
739         s->out_format      = FMT_H263;
740         avctx->delay       = 0;
741         s->low_delay       = 1;
742         s->modified_quant  = 1;
743         s->h263_aic        = 1;
744         s->h263_plus       = 1;
745         s->loop_filter     = 1;
746         s->unrestricted_mv = 0;
747         break;
748     case AV_CODEC_ID_MPEG4:
749         s->out_format      = FMT_H263;
750         s->h263_pred       = 1;
751         s->unrestricted_mv = 1;
752         s->low_delay       = s->max_b_frames ? 0 : 1;
753         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
754         break;
755     case AV_CODEC_ID_MSMPEG4V2:
756         s->out_format      = FMT_H263;
757         s->h263_pred       = 1;
758         s->unrestricted_mv = 1;
759         s->msmpeg4_version = 2;
760         avctx->delay       = 0;
761         s->low_delay       = 1;
762         break;
763     case AV_CODEC_ID_MSMPEG4V3:
764         s->out_format        = FMT_H263;
765         s->h263_pred         = 1;
766         s->unrestricted_mv   = 1;
767         s->msmpeg4_version   = 3;
768         s->flipflop_rounding = 1;
769         avctx->delay         = 0;
770         s->low_delay         = 1;
771         break;
772     case AV_CODEC_ID_WMV1:
773         s->out_format        = FMT_H263;
774         s->h263_pred         = 1;
775         s->unrestricted_mv   = 1;
776         s->msmpeg4_version   = 4;
777         s->flipflop_rounding = 1;
778         avctx->delay         = 0;
779         s->low_delay         = 1;
780         break;
781     case AV_CODEC_ID_WMV2:
782         s->out_format        = FMT_H263;
783         s->h263_pred         = 1;
784         s->unrestricted_mv   = 1;
785         s->msmpeg4_version   = 5;
786         s->flipflop_rounding = 1;
787         avctx->delay         = 0;
788         s->low_delay         = 1;
789         break;
790     default:
791         return -1;
792     }
793
794     avctx->has_b_frames = !s->low_delay;
795
796     s->encoding = 1;
797
798     s->progressive_frame    =
799     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
800                                                 CODEC_FLAG_INTERLACED_ME) ||
801                                 s->alternate_scan);
802
803     /* init */
804     if (ff_MPV_common_init(s) < 0)
805         return -1;
806
807     ff_qpeldsp_init(&s->qdsp);
808
809     s->avctx->coded_frame = s->current_picture.f;
810
811     if (s->msmpeg4_version) {
812         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
813                           2 * 2 * (MAX_LEVEL + 1) *
814                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
815     }
816     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
817
818     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
819     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
820     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
821     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
822     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
823     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
824     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
825                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
826     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
827                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
828
829     if (s->avctx->noise_reduction) {
830         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
831                           2 * 64 * sizeof(uint16_t), fail);
832     }
833
834     ff_dct_encode_init(s);
835
836     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
837         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
838
839     s->quant_precision = 5;
840
841     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
842     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
843
844     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
845         ff_h261_encode_init(s);
846     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
847         ff_h263_encode_init(s);
848     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
849         ff_msmpeg4_encode_init(s);
850     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
851         && s->out_format == FMT_MPEG1)
852         ff_mpeg1_encode_init(s);
853
854     /* init q matrix */
855     for (i = 0; i < 64; i++) {
856         int j = s->dsp.idct_permutation[i];
857         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
858             s->mpeg_quant) {
859             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
860             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
861         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
862             s->intra_matrix[j] =
863             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
864         } else {
865             /* mpeg1/2 */
866             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
867             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
868         }
869         if (s->avctx->intra_matrix)
870             s->intra_matrix[j] = s->avctx->intra_matrix[i];
871         if (s->avctx->inter_matrix)
872             s->inter_matrix[j] = s->avctx->inter_matrix[i];
873     }
874
875     /* precompute matrix */
876     /* for mjpeg, we do include qscale in the matrix */
877     if (s->out_format != FMT_MJPEG) {
878         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
879                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
880                           31, 1);
881         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
882                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
883                           31, 0);
884     }
885
886     if (ff_rate_control_init(s) < 0)
887         return -1;
888
889 #if FF_API_ERROR_RATE
890     FF_DISABLE_DEPRECATION_WARNINGS
891     if (avctx->error_rate)
892         s->error_rate = avctx->error_rate;
893     FF_ENABLE_DEPRECATION_WARNINGS;
894 #endif
895
896 #if FF_API_NORMALIZE_AQP
897     FF_DISABLE_DEPRECATION_WARNINGS
898     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
899         s->mpv_flags |= FF_MPV_FLAG_NAQ;
900     FF_ENABLE_DEPRECATION_WARNINGS;
901 #endif
902
903 #if FF_API_MV0
904     FF_DISABLE_DEPRECATION_WARNINGS
905     if (avctx->flags & CODEC_FLAG_MV0)
906         s->mpv_flags |= FF_MPV_FLAG_MV0;
907     FF_ENABLE_DEPRECATION_WARNINGS
908 #endif
909
910     if (avctx->b_frame_strategy == 2) {
911         for (i = 0; i < s->max_b_frames + 2; i++) {
912             s->tmp_frames[i] = av_frame_alloc();
913             if (!s->tmp_frames[i])
914                 return AVERROR(ENOMEM);
915
916             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
917             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
918             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
919
920             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
921             if (ret < 0)
922                 return ret;
923         }
924     }
925
926     return 0;
927 fail:
928     ff_MPV_encode_end(avctx);
929     return AVERROR_UNKNOWN;
930 }
931
932 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
933 {
934     MpegEncContext *s = avctx->priv_data;
935     int i;
936
937     ff_rate_control_uninit(s);
938
939     ff_MPV_common_end(s);
940     if (CONFIG_MJPEG_ENCODER &&
941         s->out_format == FMT_MJPEG)
942         ff_mjpeg_encode_close(s);
943
944     av_freep(&avctx->extradata);
945
946     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
947         av_frame_free(&s->tmp_frames[i]);
948
949     ff_free_picture_tables(&s->new_picture);
950     ff_mpeg_unref_picture(s, &s->new_picture);
951
952     av_freep(&s->avctx->stats_out);
953     av_freep(&s->ac_stats);
954
955     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
956     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
957     s->q_chroma_intra_matrix=   NULL;
958     s->q_chroma_intra_matrix16= NULL;
959     av_freep(&s->q_intra_matrix);
960     av_freep(&s->q_inter_matrix);
961     av_freep(&s->q_intra_matrix16);
962     av_freep(&s->q_inter_matrix16);
963     av_freep(&s->input_picture);
964     av_freep(&s->reordered_input_picture);
965     av_freep(&s->dct_offset);
966
967     return 0;
968 }
969
970 static int get_sae(uint8_t *src, int ref, int stride)
971 {
972     int x,y;
973     int acc = 0;
974
975     for (y = 0; y < 16; y++) {
976         for (x = 0; x < 16; x++) {
977             acc += FFABS(src[x + y * stride] - ref);
978         }
979     }
980
981     return acc;
982 }
983
984 static int get_intra_count(MpegEncContext *s, uint8_t *src,
985                            uint8_t *ref, int stride)
986 {
987     int x, y, w, h;
988     int acc = 0;
989
990     w = s->width  & ~15;
991     h = s->height & ~15;
992
993     for (y = 0; y < h; y += 16) {
994         for (x = 0; x < w; x += 16) {
995             int offset = x + y * stride;
996             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
997                                      16);
998             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
999             int sae  = get_sae(src + offset, mean, stride);
1000
1001             acc += sae + 500 < sad;
1002         }
1003     }
1004     return acc;
1005 }
1006
1007
1008 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1009 {
1010     Picture *pic = NULL;
1011     int64_t pts;
1012     int i, display_picture_number = 0, ret;
1013     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1014                                                  (s->low_delay ? 0 : 1);
1015     int direct = 1;
1016
1017     if (pic_arg) {
1018         pts = pic_arg->pts;
1019         display_picture_number = s->input_picture_number++;
1020
1021         if (pts != AV_NOPTS_VALUE) {
1022             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1023                 int64_t last = s->user_specified_pts;
1024
1025                 if (pts <= last) {
1026                     av_log(s->avctx, AV_LOG_ERROR,
1027                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1028                            pts, last);
1029                     return AVERROR(EINVAL);
1030                 }
1031
1032                 if (!s->low_delay && display_picture_number == 1)
1033                     s->dts_delta = pts - last;
1034             }
1035             s->user_specified_pts = pts;
1036         } else {
1037             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1038                 s->user_specified_pts =
1039                 pts = s->user_specified_pts + 1;
1040                 av_log(s->avctx, AV_LOG_INFO,
1041                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1042                        pts);
1043             } else {
1044                 pts = display_picture_number;
1045             }
1046         }
1047     }
1048
1049     if (pic_arg) {
1050         if (!pic_arg->buf[0])
1051             direct = 0;
1052         if (pic_arg->linesize[0] != s->linesize)
1053             direct = 0;
1054         if (pic_arg->linesize[1] != s->uvlinesize)
1055             direct = 0;
1056         if (pic_arg->linesize[2] != s->uvlinesize)
1057             direct = 0;
1058         if ((s->width & 15) || (s->height & 15))
1059             direct = 0;
1060         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1061             direct = 0;
1062         if (s->linesize & (STRIDE_ALIGN-1))
1063             direct = 0;
1064
1065         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1066                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1067
1068         if (direct) {
1069             i = ff_find_unused_picture(s, 1);
1070             if (i < 0)
1071                 return i;
1072
1073             pic = &s->picture[i];
1074             pic->reference = 3;
1075
1076             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1077                 return ret;
1078             if (ff_alloc_picture(s, pic, 1) < 0) {
1079                 return -1;
1080             }
1081         } else {
1082             i = ff_find_unused_picture(s, 0);
1083             if (i < 0)
1084                 return i;
1085
1086             pic = &s->picture[i];
1087             pic->reference = 3;
1088
1089             if (ff_alloc_picture(s, pic, 0) < 0) {
1090                 return -1;
1091             }
1092
1093             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1094                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1095                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1096                 // empty
1097             } else {
1098                 int h_chroma_shift, v_chroma_shift;
1099                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1100                                                  &h_chroma_shift,
1101                                                  &v_chroma_shift);
1102
1103                 for (i = 0; i < 3; i++) {
1104                     int src_stride = pic_arg->linesize[i];
1105                     int dst_stride = i ? s->uvlinesize : s->linesize;
1106                     int h_shift = i ? h_chroma_shift : 0;
1107                     int v_shift = i ? v_chroma_shift : 0;
1108                     int w = s->width  >> h_shift;
1109                     int h = s->height >> v_shift;
1110                     uint8_t *src = pic_arg->data[i];
1111                     uint8_t *dst = pic->f->data[i];
1112
1113                     if (!s->avctx->rc_buffer_size)
1114                         dst += INPLACE_OFFSET;
1115
1116                     if (src_stride == dst_stride)
1117                         memcpy(dst, src, src_stride * h);
1118                     else {
1119                         int h2 = h;
1120                         uint8_t *dst2 = dst;
1121                         while (h2--) {
1122                             memcpy(dst2, src, w);
1123                             dst2 += dst_stride;
1124                             src += src_stride;
1125                         }
1126                     }
1127                     if ((s->width & 15) || (s->height & 15)) {
1128                         s->dsp.draw_edges(dst, dst_stride,
1129                                           w, h,
1130                                           16>>h_shift,
1131                                           16>>v_shift,
1132                                           EDGE_BOTTOM);
1133                     }
1134                 }
1135             }
1136         }
1137         ret = av_frame_copy_props(pic->f, pic_arg);
1138         if (ret < 0)
1139             return ret;
1140
1141         pic->f->display_picture_number = display_picture_number;
1142         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1143     }
1144
1145     /* shift buffer entries */
1146     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1147         s->input_picture[i - 1] = s->input_picture[i];
1148
1149     s->input_picture[encoding_delay] = (Picture*) pic;
1150
1151     return 0;
1152 }
1153
1154 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1155 {
1156     int x, y, plane;
1157     int score = 0;
1158     int64_t score64 = 0;
1159
1160     for (plane = 0; plane < 3; plane++) {
1161         const int stride = p->f->linesize[plane];
1162         const int bw = plane ? 1 : 2;
1163         for (y = 0; y < s->mb_height * bw; y++) {
1164             for (x = 0; x < s->mb_width * bw; x++) {
1165                 int off = p->shared ? 0 : 16;
1166                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1167                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1168                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1169
1170                 switch (FFABS(s->avctx->frame_skip_exp)) {
1171                 case 0: score    =  FFMAX(score, v);          break;
1172                 case 1: score   += FFABS(v);                  break;
1173                 case 2: score64 += v * (int64_t)v;                       break;
1174                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1175                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1176                 }
1177             }
1178         }
1179     }
1180     emms_c();
1181
1182     if (score)
1183         score64 = score;
1184     if (s->avctx->frame_skip_exp < 0)
1185         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1186                       -1.0/s->avctx->frame_skip_exp);
1187
1188     if (score64 < s->avctx->frame_skip_threshold)
1189         return 1;
1190     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1191         return 1;
1192     return 0;
1193 }
1194
1195 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1196 {
1197     AVPacket pkt = { 0 };
1198     int ret, got_output;
1199
1200     av_init_packet(&pkt);
1201     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1202     if (ret < 0)
1203         return ret;
1204
1205     ret = pkt.size;
1206     av_free_packet(&pkt);
1207     return ret;
1208 }
1209
1210 static int estimate_best_b_count(MpegEncContext *s)
1211 {
1212     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1213     AVCodecContext *c = avcodec_alloc_context3(NULL);
1214     const int scale = s->avctx->brd_scale;
1215     int i, j, out_size, p_lambda, b_lambda, lambda2;
1216     int64_t best_rd  = INT64_MAX;
1217     int best_b_count = -1;
1218
1219     av_assert0(scale >= 0 && scale <= 3);
1220
1221     //emms_c();
1222     //s->next_picture_ptr->quality;
1223     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1224     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1225     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1226     if (!b_lambda) // FIXME we should do this somewhere else
1227         b_lambda = p_lambda;
1228     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1229                FF_LAMBDA_SHIFT;
1230
1231     c->width        = s->width  >> scale;
1232     c->height       = s->height >> scale;
1233     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1234     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1235     c->mb_decision  = s->avctx->mb_decision;
1236     c->me_cmp       = s->avctx->me_cmp;
1237     c->mb_cmp       = s->avctx->mb_cmp;
1238     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1239     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1240     c->time_base    = s->avctx->time_base;
1241     c->max_b_frames = s->max_b_frames;
1242
1243     if (avcodec_open2(c, codec, NULL) < 0)
1244         return -1;
1245
1246     for (i = 0; i < s->max_b_frames + 2; i++) {
1247         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1248                                                 s->next_picture_ptr;
1249
1250         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1251             pre_input = *pre_input_ptr;
1252
1253             if (!pre_input.shared && i) {
1254                 pre_input.f->data[0] += INPLACE_OFFSET;
1255                 pre_input.f->data[1] += INPLACE_OFFSET;
1256                 pre_input.f->data[2] += INPLACE_OFFSET;
1257             }
1258
1259             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1260                                  pre_input.f->data[0], pre_input.f->linesize[0],
1261                                  c->width,      c->height);
1262             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1263                                  pre_input.f->data[1], pre_input.f->linesize[1],
1264                                  c->width >> 1, c->height >> 1);
1265             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1266                                  pre_input.f->data[2], pre_input.f->linesize[2],
1267                                  c->width >> 1, c->height >> 1);
1268         }
1269     }
1270
1271     for (j = 0; j < s->max_b_frames + 1; j++) {
1272         int64_t rd = 0;
1273
1274         if (!s->input_picture[j])
1275             break;
1276
1277         c->error[0] = c->error[1] = c->error[2] = 0;
1278
1279         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1280         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1281
1282         out_size = encode_frame(c, s->tmp_frames[0]);
1283
1284         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1285
1286         for (i = 0; i < s->max_b_frames + 1; i++) {
1287             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1288
1289             s->tmp_frames[i + 1]->pict_type = is_p ?
1290                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1291             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1292
1293             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1294
1295             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1296         }
1297
1298         /* get the delayed frames */
1299         while (out_size) {
1300             out_size = encode_frame(c, NULL);
1301             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1302         }
1303
1304         rd += c->error[0] + c->error[1] + c->error[2];
1305
1306         if (rd < best_rd) {
1307             best_rd = rd;
1308             best_b_count = j;
1309         }
1310     }
1311
1312     avcodec_close(c);
1313     av_freep(&c);
1314
1315     return best_b_count;
1316 }
1317
1318 static int select_input_picture(MpegEncContext *s)
1319 {
1320     int i, ret;
1321
1322     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1323         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1324     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1325
1326     /* set next picture type & ordering */
1327     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1328         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1329             if (s->picture_in_gop_number < s->gop_size &&
1330                 s->next_picture_ptr &&
1331                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1332                 // FIXME check that te gop check above is +-1 correct
1333                 av_frame_unref(s->input_picture[0]->f);
1334
1335                 ff_vbv_update(s, 0);
1336
1337                 goto no_output_pic;
1338             }
1339         }
1340
1341         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1342             s->next_picture_ptr == NULL || s->intra_only) {
1343             s->reordered_input_picture[0] = s->input_picture[0];
1344             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1345             s->reordered_input_picture[0]->f->coded_picture_number =
1346                 s->coded_picture_number++;
1347         } else {
1348             int b_frames;
1349
1350             if (s->flags & CODEC_FLAG_PASS2) {
1351                 for (i = 0; i < s->max_b_frames + 1; i++) {
1352                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1353
1354                     if (pict_num >= s->rc_context.num_entries)
1355                         break;
1356                     if (!s->input_picture[i]) {
1357                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1358                         break;
1359                     }
1360
1361                     s->input_picture[i]->f->pict_type =
1362                         s->rc_context.entry[pict_num].new_pict_type;
1363                 }
1364             }
1365
1366             if (s->avctx->b_frame_strategy == 0) {
1367                 b_frames = s->max_b_frames;
1368                 while (b_frames && !s->input_picture[b_frames])
1369                     b_frames--;
1370             } else if (s->avctx->b_frame_strategy == 1) {
1371                 for (i = 1; i < s->max_b_frames + 1; i++) {
1372                     if (s->input_picture[i] &&
1373                         s->input_picture[i]->b_frame_score == 0) {
1374                         s->input_picture[i]->b_frame_score =
1375                             get_intra_count(s,
1376                                             s->input_picture[i    ]->f->data[0],
1377                                             s->input_picture[i - 1]->f->data[0],
1378                                             s->linesize) + 1;
1379                     }
1380                 }
1381                 for (i = 0; i < s->max_b_frames + 1; i++) {
1382                     if (s->input_picture[i] == NULL ||
1383                         s->input_picture[i]->b_frame_score - 1 >
1384                             s->mb_num / s->avctx->b_sensitivity)
1385                         break;
1386                 }
1387
1388                 b_frames = FFMAX(0, i - 1);
1389
1390                 /* reset scores */
1391                 for (i = 0; i < b_frames + 1; i++) {
1392                     s->input_picture[i]->b_frame_score = 0;
1393                 }
1394             } else if (s->avctx->b_frame_strategy == 2) {
1395                 b_frames = estimate_best_b_count(s);
1396             } else {
1397                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1398                 b_frames = 0;
1399             }
1400
1401             emms_c();
1402
1403             for (i = b_frames - 1; i >= 0; i--) {
1404                 int type = s->input_picture[i]->f->pict_type;
1405                 if (type && type != AV_PICTURE_TYPE_B)
1406                     b_frames = i;
1407             }
1408             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1409                 b_frames == s->max_b_frames) {
1410                 av_log(s->avctx, AV_LOG_ERROR,
1411                        "warning, too many b frames in a row\n");
1412             }
1413
1414             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1415                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1416                     s->gop_size > s->picture_in_gop_number) {
1417                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1418                 } else {
1419                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1420                         b_frames = 0;
1421                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1422                 }
1423             }
1424
1425             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1426                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1427                 b_frames--;
1428
1429             s->reordered_input_picture[0] = s->input_picture[b_frames];
1430             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1431                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1432             s->reordered_input_picture[0]->f->coded_picture_number =
1433                 s->coded_picture_number++;
1434             for (i = 0; i < b_frames; i++) {
1435                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1436                 s->reordered_input_picture[i + 1]->f->pict_type =
1437                     AV_PICTURE_TYPE_B;
1438                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1439                     s->coded_picture_number++;
1440             }
1441         }
1442     }
1443 no_output_pic:
1444     if (s->reordered_input_picture[0]) {
1445         s->reordered_input_picture[0]->reference =
1446            s->reordered_input_picture[0]->f->pict_type !=
1447                AV_PICTURE_TYPE_B ? 3 : 0;
1448
1449         ff_mpeg_unref_picture(s, &s->new_picture);
1450         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1451             return ret;
1452
1453         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1454             // input is a shared pix, so we can't modifiy it -> alloc a new
1455             // one & ensure that the shared one is reuseable
1456
1457             Picture *pic;
1458             int i = ff_find_unused_picture(s, 0);
1459             if (i < 0)
1460                 return i;
1461             pic = &s->picture[i];
1462
1463             pic->reference = s->reordered_input_picture[0]->reference;
1464             if (ff_alloc_picture(s, pic, 0) < 0) {
1465                 return -1;
1466             }
1467
1468             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1469             if (ret < 0)
1470                 return ret;
1471
1472             /* mark us unused / free shared pic */
1473             av_frame_unref(s->reordered_input_picture[0]->f);
1474             s->reordered_input_picture[0]->shared = 0;
1475
1476             s->current_picture_ptr = pic;
1477         } else {
1478             // input is not a shared pix -> reuse buffer for current_pix
1479             s->current_picture_ptr = s->reordered_input_picture[0];
1480             for (i = 0; i < 4; i++) {
1481                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1482             }
1483         }
1484         ff_mpeg_unref_picture(s, &s->current_picture);
1485         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1486                                        s->current_picture_ptr)) < 0)
1487             return ret;
1488
1489         s->picture_number = s->new_picture.f->display_picture_number;
1490     } else {
1491         ff_mpeg_unref_picture(s, &s->new_picture);
1492     }
1493     return 0;
1494 }
1495
1496 static void frame_end(MpegEncContext *s)
1497 {
1498     if (s->unrestricted_mv &&
1499         s->current_picture.reference &&
1500         !s->intra_only) {
1501         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1502         int hshift = desc->log2_chroma_w;
1503         int vshift = desc->log2_chroma_h;
1504         s->dsp.draw_edges(s->current_picture.f->data[0], s->current_picture.f->linesize[0],
1505                           s->h_edge_pos, s->v_edge_pos,
1506                           EDGE_WIDTH, EDGE_WIDTH,
1507                           EDGE_TOP | EDGE_BOTTOM);
1508         s->dsp.draw_edges(s->current_picture.f->data[1], s->current_picture.f->linesize[1],
1509                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1510                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1511                           EDGE_TOP | EDGE_BOTTOM);
1512         s->dsp.draw_edges(s->current_picture.f->data[2], s->current_picture.f->linesize[2],
1513                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1514                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1515                           EDGE_TOP | EDGE_BOTTOM);
1516     }
1517
1518     emms_c();
1519
1520     s->last_pict_type                 = s->pict_type;
1521     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1522     if (s->pict_type!= AV_PICTURE_TYPE_B)
1523         s->last_non_b_pict_type = s->pict_type;
1524
1525     s->avctx->coded_frame = s->current_picture_ptr->f;
1526
1527 }
1528
1529 static void update_noise_reduction(MpegEncContext *s)
1530 {
1531     int intra, i;
1532
1533     for (intra = 0; intra < 2; intra++) {
1534         if (s->dct_count[intra] > (1 << 16)) {
1535             for (i = 0; i < 64; i++) {
1536                 s->dct_error_sum[intra][i] >>= 1;
1537             }
1538             s->dct_count[intra] >>= 1;
1539         }
1540
1541         for (i = 0; i < 64; i++) {
1542             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1543                                        s->dct_count[intra] +
1544                                        s->dct_error_sum[intra][i] / 2) /
1545                                       (s->dct_error_sum[intra][i] + 1);
1546         }
1547     }
1548 }
1549
1550 static int frame_start(MpegEncContext *s)
1551 {
1552     int ret;
1553
1554     /* mark & release old frames */
1555     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1556         s->last_picture_ptr != s->next_picture_ptr &&
1557         s->last_picture_ptr->f->buf[0]) {
1558         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1559     }
1560
1561     s->current_picture_ptr->f->pict_type = s->pict_type;
1562     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1563
1564     ff_mpeg_unref_picture(s, &s->current_picture);
1565     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1566                                    s->current_picture_ptr)) < 0)
1567         return ret;
1568
1569     if (s->pict_type != AV_PICTURE_TYPE_B) {
1570         s->last_picture_ptr = s->next_picture_ptr;
1571         if (!s->droppable)
1572             s->next_picture_ptr = s->current_picture_ptr;
1573     }
1574
1575     if (s->last_picture_ptr) {
1576         ff_mpeg_unref_picture(s, &s->last_picture);
1577         if (s->last_picture_ptr->f->buf[0] &&
1578             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1579                                        s->last_picture_ptr)) < 0)
1580             return ret;
1581     }
1582     if (s->next_picture_ptr) {
1583         ff_mpeg_unref_picture(s, &s->next_picture);
1584         if (s->next_picture_ptr->f->buf[0] &&
1585             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1586                                        s->next_picture_ptr)) < 0)
1587             return ret;
1588     }
1589
1590     if (s->picture_structure!= PICT_FRAME) {
1591         int i;
1592         for (i = 0; i < 4; i++) {
1593             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1594                 s->current_picture.f->data[i] +=
1595                     s->current_picture.f->linesize[i];
1596             }
1597             s->current_picture.f->linesize[i] *= 2;
1598             s->last_picture.f->linesize[i]    *= 2;
1599             s->next_picture.f->linesize[i]    *= 2;
1600         }
1601     }
1602
1603     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1604         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1605         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1606     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1607         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1608         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1609     } else {
1610         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1611         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1612     }
1613
1614     if (s->dct_error_sum) {
1615         av_assert2(s->avctx->noise_reduction && s->encoding);
1616         update_noise_reduction(s);
1617     }
1618
1619     return 0;
1620 }
1621
1622 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1623                           const AVFrame *pic_arg, int *got_packet)
1624 {
1625     MpegEncContext *s = avctx->priv_data;
1626     int i, stuffing_count, ret;
1627     int context_count = s->slice_context_count;
1628
1629     s->picture_in_gop_number++;
1630
1631     if (load_input_picture(s, pic_arg) < 0)
1632         return -1;
1633
1634     if (select_input_picture(s) < 0) {
1635         return -1;
1636     }
1637
1638     /* output? */
1639     if (s->new_picture.f->data[0]) {
1640         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1641             return ret;
1642         if (s->mb_info) {
1643             s->mb_info_ptr = av_packet_new_side_data(pkt,
1644                                  AV_PKT_DATA_H263_MB_INFO,
1645                                  s->mb_width*s->mb_height*12);
1646             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1647         }
1648
1649         for (i = 0; i < context_count; i++) {
1650             int start_y = s->thread_context[i]->start_mb_y;
1651             int   end_y = s->thread_context[i]->  end_mb_y;
1652             int h       = s->mb_height;
1653             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1654             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1655
1656             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1657         }
1658
1659         s->pict_type = s->new_picture.f->pict_type;
1660         //emms_c();
1661         ret = frame_start(s);
1662         if (ret < 0)
1663             return ret;
1664 vbv_retry:
1665         if (encode_picture(s, s->picture_number) < 0)
1666             return -1;
1667
1668         avctx->header_bits = s->header_bits;
1669         avctx->mv_bits     = s->mv_bits;
1670         avctx->misc_bits   = s->misc_bits;
1671         avctx->i_tex_bits  = s->i_tex_bits;
1672         avctx->p_tex_bits  = s->p_tex_bits;
1673         avctx->i_count     = s->i_count;
1674         // FIXME f/b_count in avctx
1675         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1676         avctx->skip_count  = s->skip_count;
1677
1678         frame_end(s);
1679
1680         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1681             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1682
1683         if (avctx->rc_buffer_size) {
1684             RateControlContext *rcc = &s->rc_context;
1685             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1686
1687             if (put_bits_count(&s->pb) > max_size &&
1688                 s->lambda < s->avctx->lmax) {
1689                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1690                                        (s->qscale + 1) / s->qscale);
1691                 if (s->adaptive_quant) {
1692                     int i;
1693                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1694                         s->lambda_table[i] =
1695                             FFMAX(s->lambda_table[i] + 1,
1696                                   s->lambda_table[i] * (s->qscale + 1) /
1697                                   s->qscale);
1698                 }
1699                 s->mb_skipped = 0;        // done in frame_start()
1700                 // done in encode_picture() so we must undo it
1701                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1702                     if (s->flipflop_rounding          ||
1703                         s->codec_id == AV_CODEC_ID_H263P ||
1704                         s->codec_id == AV_CODEC_ID_MPEG4)
1705                         s->no_rounding ^= 1;
1706                 }
1707                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1708                     s->time_base       = s->last_time_base;
1709                     s->last_non_b_time = s->time - s->pp_time;
1710                 }
1711                 for (i = 0; i < context_count; i++) {
1712                     PutBitContext *pb = &s->thread_context[i]->pb;
1713                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1714                 }
1715                 goto vbv_retry;
1716             }
1717
1718             av_assert0(s->avctx->rc_max_rate);
1719         }
1720
1721         if (s->flags & CODEC_FLAG_PASS1)
1722             ff_write_pass1_stats(s);
1723
1724         for (i = 0; i < 4; i++) {
1725             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1726             avctx->error[i] += s->current_picture_ptr->f->error[i];
1727         }
1728
1729         if (s->flags & CODEC_FLAG_PASS1)
1730             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1731                    avctx->i_tex_bits + avctx->p_tex_bits ==
1732                        put_bits_count(&s->pb));
1733         flush_put_bits(&s->pb);
1734         s->frame_bits  = put_bits_count(&s->pb);
1735
1736         stuffing_count = ff_vbv_update(s, s->frame_bits);
1737         s->stuffing_bits = 8*stuffing_count;
1738         if (stuffing_count) {
1739             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1740                     stuffing_count + 50) {
1741                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1742                 return -1;
1743             }
1744
1745             switch (s->codec_id) {
1746             case AV_CODEC_ID_MPEG1VIDEO:
1747             case AV_CODEC_ID_MPEG2VIDEO:
1748                 while (stuffing_count--) {
1749                     put_bits(&s->pb, 8, 0);
1750                 }
1751             break;
1752             case AV_CODEC_ID_MPEG4:
1753                 put_bits(&s->pb, 16, 0);
1754                 put_bits(&s->pb, 16, 0x1C3);
1755                 stuffing_count -= 4;
1756                 while (stuffing_count--) {
1757                     put_bits(&s->pb, 8, 0xFF);
1758                 }
1759             break;
1760             default:
1761                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1762             }
1763             flush_put_bits(&s->pb);
1764             s->frame_bits  = put_bits_count(&s->pb);
1765         }
1766
1767         /* update mpeg1/2 vbv_delay for CBR */
1768         if (s->avctx->rc_max_rate                          &&
1769             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1770             s->out_format == FMT_MPEG1                     &&
1771             90000LL * (avctx->rc_buffer_size - 1) <=
1772                 s->avctx->rc_max_rate * 0xFFFFLL) {
1773             int vbv_delay, min_delay;
1774             double inbits  = s->avctx->rc_max_rate *
1775                              av_q2d(s->avctx->time_base);
1776             int    minbits = s->frame_bits - 8 *
1777                              (s->vbv_delay_ptr - s->pb.buf - 1);
1778             double bits    = s->rc_context.buffer_index + minbits - inbits;
1779
1780             if (bits < 0)
1781                 av_log(s->avctx, AV_LOG_ERROR,
1782                        "Internal error, negative bits\n");
1783
1784             assert(s->repeat_first_field == 0);
1785
1786             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1787             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1788                         s->avctx->rc_max_rate;
1789
1790             vbv_delay = FFMAX(vbv_delay, min_delay);
1791
1792             av_assert0(vbv_delay < 0xFFFF);
1793
1794             s->vbv_delay_ptr[0] &= 0xF8;
1795             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1796             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1797             s->vbv_delay_ptr[2] &= 0x07;
1798             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1799             avctx->vbv_delay     = vbv_delay * 300;
1800         }
1801         s->total_bits     += s->frame_bits;
1802         avctx->frame_bits  = s->frame_bits;
1803
1804         pkt->pts = s->current_picture.f->pts;
1805         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1806             if (!s->current_picture.f->coded_picture_number)
1807                 pkt->dts = pkt->pts - s->dts_delta;
1808             else
1809                 pkt->dts = s->reordered_pts;
1810             s->reordered_pts = pkt->pts;
1811         } else
1812             pkt->dts = pkt->pts;
1813         if (s->current_picture.f->key_frame)
1814             pkt->flags |= AV_PKT_FLAG_KEY;
1815         if (s->mb_info)
1816             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1817     } else {
1818         s->frame_bits = 0;
1819     }
1820
1821     /* release non-reference frames */
1822     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1823         if (!s->picture[i].reference)
1824             ff_mpeg_unref_picture(s, &s->picture[i]);
1825     }
1826
1827     av_assert1((s->frame_bits & 7) == 0);
1828
1829     pkt->size = s->frame_bits / 8;
1830     *got_packet = !!pkt->size;
1831     return 0;
1832 }
1833
1834 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1835                                                 int n, int threshold)
1836 {
1837     static const char tab[64] = {
1838         3, 2, 2, 1, 1, 1, 1, 1,
1839         1, 1, 1, 1, 1, 1, 1, 1,
1840         1, 1, 1, 1, 1, 1, 1, 1,
1841         0, 0, 0, 0, 0, 0, 0, 0,
1842         0, 0, 0, 0, 0, 0, 0, 0,
1843         0, 0, 0, 0, 0, 0, 0, 0,
1844         0, 0, 0, 0, 0, 0, 0, 0,
1845         0, 0, 0, 0, 0, 0, 0, 0
1846     };
1847     int score = 0;
1848     int run = 0;
1849     int i;
1850     int16_t *block = s->block[n];
1851     const int last_index = s->block_last_index[n];
1852     int skip_dc;
1853
1854     if (threshold < 0) {
1855         skip_dc = 0;
1856         threshold = -threshold;
1857     } else
1858         skip_dc = 1;
1859
1860     /* Are all we could set to zero already zero? */
1861     if (last_index <= skip_dc - 1)
1862         return;
1863
1864     for (i = 0; i <= last_index; i++) {
1865         const int j = s->intra_scantable.permutated[i];
1866         const int level = FFABS(block[j]);
1867         if (level == 1) {
1868             if (skip_dc && i == 0)
1869                 continue;
1870             score += tab[run];
1871             run = 0;
1872         } else if (level > 1) {
1873             return;
1874         } else {
1875             run++;
1876         }
1877     }
1878     if (score >= threshold)
1879         return;
1880     for (i = skip_dc; i <= last_index; i++) {
1881         const int j = s->intra_scantable.permutated[i];
1882         block[j] = 0;
1883     }
1884     if (block[0])
1885         s->block_last_index[n] = 0;
1886     else
1887         s->block_last_index[n] = -1;
1888 }
1889
1890 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1891                                int last_index)
1892 {
1893     int i;
1894     const int maxlevel = s->max_qcoeff;
1895     const int minlevel = s->min_qcoeff;
1896     int overflow = 0;
1897
1898     if (s->mb_intra) {
1899         i = 1; // skip clipping of intra dc
1900     } else
1901         i = 0;
1902
1903     for (; i <= last_index; i++) {
1904         const int j = s->intra_scantable.permutated[i];
1905         int level = block[j];
1906
1907         if (level > maxlevel) {
1908             level = maxlevel;
1909             overflow++;
1910         } else if (level < minlevel) {
1911             level = minlevel;
1912             overflow++;
1913         }
1914
1915         block[j] = level;
1916     }
1917
1918     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1919         av_log(s->avctx, AV_LOG_INFO,
1920                "warning, clipping %d dct coefficients to %d..%d\n",
1921                overflow, minlevel, maxlevel);
1922 }
1923
1924 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1925 {
1926     int x, y;
1927     // FIXME optimize
1928     for (y = 0; y < 8; y++) {
1929         for (x = 0; x < 8; x++) {
1930             int x2, y2;
1931             int sum = 0;
1932             int sqr = 0;
1933             int count = 0;
1934
1935             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1936                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1937                     int v = ptr[x2 + y2 * stride];
1938                     sum += v;
1939                     sqr += v * v;
1940                     count++;
1941                 }
1942             }
1943             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1944         }
1945     }
1946 }
1947
1948 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1949                                                 int motion_x, int motion_y,
1950                                                 int mb_block_height,
1951                                                 int mb_block_width,
1952                                                 int mb_block_count)
1953 {
1954     int16_t weight[12][64];
1955     int16_t orig[12][64];
1956     const int mb_x = s->mb_x;
1957     const int mb_y = s->mb_y;
1958     int i;
1959     int skip_dct[12];
1960     int dct_offset = s->linesize * 8; // default for progressive frames
1961     int uv_dct_offset = s->uvlinesize * 8;
1962     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1963     ptrdiff_t wrap_y, wrap_c;
1964
1965     for (i = 0; i < mb_block_count; i++)
1966         skip_dct[i] = s->skipdct;
1967
1968     if (s->adaptive_quant) {
1969         const int last_qp = s->qscale;
1970         const int mb_xy = mb_x + mb_y * s->mb_stride;
1971
1972         s->lambda = s->lambda_table[mb_xy];
1973         update_qscale(s);
1974
1975         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1976             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1977             s->dquant = s->qscale - last_qp;
1978
1979             if (s->out_format == FMT_H263) {
1980                 s->dquant = av_clip(s->dquant, -2, 2);
1981
1982                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1983                     if (!s->mb_intra) {
1984                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1985                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1986                                 s->dquant = 0;
1987                         }
1988                         if (s->mv_type == MV_TYPE_8X8)
1989                             s->dquant = 0;
1990                     }
1991                 }
1992             }
1993         }
1994         ff_set_qscale(s, last_qp + s->dquant);
1995     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1996         ff_set_qscale(s, s->qscale + s->dquant);
1997
1998     wrap_y = s->linesize;
1999     wrap_c = s->uvlinesize;
2000     ptr_y  = s->new_picture.f->data[0] +
2001              (mb_y * 16 * wrap_y)              + mb_x * 16;
2002     ptr_cb = s->new_picture.f->data[1] +
2003              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2004     ptr_cr = s->new_picture.f->data[2] +
2005              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2006
2007     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2008         uint8_t *ebuf = s->edge_emu_buffer + 32;
2009         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2010         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2011         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2012                                  wrap_y, wrap_y,
2013                                  16, 16, mb_x * 16, mb_y * 16,
2014                                  s->width, s->height);
2015         ptr_y = ebuf;
2016         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
2017                                  wrap_c, wrap_c,
2018                                  mb_block_width, mb_block_height,
2019                                  mb_x * mb_block_width, mb_y * mb_block_height,
2020                                  cw, ch);
2021         ptr_cb = ebuf + 18 * wrap_y;
2022         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 16, ptr_cr,
2023                                  wrap_c, wrap_c,
2024                                  mb_block_width, mb_block_height,
2025                                  mb_x * mb_block_width, mb_y * mb_block_height,
2026                                  cw, ch);
2027         ptr_cr = ebuf + 18 * wrap_y + 16;
2028     }
2029
2030     if (s->mb_intra) {
2031         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2032             int progressive_score, interlaced_score;
2033
2034             s->interlaced_dct = 0;
2035             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
2036                                                     NULL, wrap_y, 8) +
2037                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2038                                                     NULL, wrap_y, 8) - 400;
2039
2040             if (progressive_score > 0) {
2041                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
2042                                                        NULL, wrap_y * 2, 8) +
2043                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
2044                                                        NULL, wrap_y * 2, 8);
2045                 if (progressive_score > interlaced_score) {
2046                     s->interlaced_dct = 1;
2047
2048                     dct_offset = wrap_y;
2049                     uv_dct_offset = wrap_c;
2050                     wrap_y <<= 1;
2051                     if (s->chroma_format == CHROMA_422 ||
2052                         s->chroma_format == CHROMA_444)
2053                         wrap_c <<= 1;
2054                 }
2055             }
2056         }
2057
2058         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
2059         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
2060         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
2061         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
2062
2063         if (s->flags & CODEC_FLAG_GRAY) {
2064             skip_dct[4] = 1;
2065             skip_dct[5] = 1;
2066         } else {
2067             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2068             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2069             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2070                 s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2071                 s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2072             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2073                 s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c);
2074                 s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c);
2075                 s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c);
2076                 s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c);
2077                 s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2078                 s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2079             }
2080         }
2081     } else {
2082         op_pixels_func (*op_pix)[4];
2083         qpel_mc_func (*op_qpix)[16];
2084         uint8_t *dest_y, *dest_cb, *dest_cr;
2085
2086         dest_y  = s->dest[0];
2087         dest_cb = s->dest[1];
2088         dest_cr = s->dest[2];
2089
2090         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2091             op_pix  = s->hdsp.put_pixels_tab;
2092             op_qpix = s->qdsp.put_qpel_pixels_tab;
2093         } else {
2094             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2095             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2096         }
2097
2098         if (s->mv_dir & MV_DIR_FORWARD) {
2099             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
2100                           s->last_picture.f->data,
2101                           op_pix, op_qpix);
2102             op_pix  = s->hdsp.avg_pixels_tab;
2103             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2104         }
2105         if (s->mv_dir & MV_DIR_BACKWARD) {
2106             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
2107                           s->next_picture.f->data,
2108                           op_pix, op_qpix);
2109         }
2110
2111         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2112             int progressive_score, interlaced_score;
2113
2114             s->interlaced_dct = 0;
2115             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
2116                                                     ptr_y,              wrap_y,
2117                                                     8) +
2118                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
2119                                                     ptr_y + wrap_y * 8, wrap_y,
2120                                                     8) - 400;
2121
2122             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2123                 progressive_score -= 400;
2124
2125             if (progressive_score > 0) {
2126                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
2127                                                        ptr_y,
2128                                                        wrap_y * 2, 8) +
2129                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
2130                                                        ptr_y + wrap_y,
2131                                                        wrap_y * 2, 8);
2132
2133                 if (progressive_score > interlaced_score) {
2134                     s->interlaced_dct = 1;
2135
2136                     dct_offset = wrap_y;
2137                     uv_dct_offset = wrap_c;
2138                     wrap_y <<= 1;
2139                     if (s->chroma_format == CHROMA_422)
2140                         wrap_c <<= 1;
2141                 }
2142             }
2143         }
2144
2145         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2146         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2147         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2148                            dest_y + dct_offset, wrap_y);
2149         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2150                            dest_y + dct_offset + 8, wrap_y);
2151
2152         if (s->flags & CODEC_FLAG_GRAY) {
2153             skip_dct[4] = 1;
2154             skip_dct[5] = 1;
2155         } else {
2156             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2157             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2158             if (!s->chroma_y_shift) { /* 422 */
2159                 s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2160                                    dest_cb + uv_dct_offset, wrap_c);
2161                 s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2162                                    dest_cr + uv_dct_offset, wrap_c);
2163             }
2164         }
2165         /* pre quantization */
2166         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2167                 2 * s->qscale * s->qscale) {
2168             // FIXME optimize
2169             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2170                               wrap_y, 8) < 20 * s->qscale)
2171                 skip_dct[0] = 1;
2172             if (s->dsp.sad[1](NULL, ptr_y + 8,
2173                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2174                 skip_dct[1] = 1;
2175             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2176                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2177                 skip_dct[2] = 1;
2178             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2179                               dest_y + dct_offset + 8,
2180                               wrap_y, 8) < 20 * s->qscale)
2181                 skip_dct[3] = 1;
2182             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2183                               wrap_c, 8) < 20 * s->qscale)
2184                 skip_dct[4] = 1;
2185             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2186                               wrap_c, 8) < 20 * s->qscale)
2187                 skip_dct[5] = 1;
2188             if (!s->chroma_y_shift) { /* 422 */
2189                 if (s->dsp.sad[1](NULL, ptr_cb + uv_dct_offset,
2190                                   dest_cb + uv_dct_offset,
2191                                   wrap_c, 8) < 20 * s->qscale)
2192                     skip_dct[6] = 1;
2193                 if (s->dsp.sad[1](NULL, ptr_cr + uv_dct_offset,
2194                                   dest_cr + uv_dct_offset,
2195                                   wrap_c, 8) < 20 * s->qscale)
2196                     skip_dct[7] = 1;
2197             }
2198         }
2199     }
2200
2201     if (s->quantizer_noise_shaping) {
2202         if (!skip_dct[0])
2203             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2204         if (!skip_dct[1])
2205             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2206         if (!skip_dct[2])
2207             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2208         if (!skip_dct[3])
2209             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2210         if (!skip_dct[4])
2211             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2212         if (!skip_dct[5])
2213             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2214         if (!s->chroma_y_shift) { /* 422 */
2215             if (!skip_dct[6])
2216                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2217                                   wrap_c);
2218             if (!skip_dct[7])
2219                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2220                                   wrap_c);
2221         }
2222         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2223     }
2224
2225     /* DCT & quantize */
2226     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2227     {
2228         for (i = 0; i < mb_block_count; i++) {
2229             if (!skip_dct[i]) {
2230                 int overflow;
2231                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2232                 // FIXME we could decide to change to quantizer instead of
2233                 // clipping
2234                 // JS: I don't think that would be a good idea it could lower
2235                 //     quality instead of improve it. Just INTRADC clipping
2236                 //     deserves changes in quantizer
2237                 if (overflow)
2238                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2239             } else
2240                 s->block_last_index[i] = -1;
2241         }
2242         if (s->quantizer_noise_shaping) {
2243             for (i = 0; i < mb_block_count; i++) {
2244                 if (!skip_dct[i]) {
2245                     s->block_last_index[i] =
2246                         dct_quantize_refine(s, s->block[i], weight[i],
2247                                             orig[i], i, s->qscale);
2248                 }
2249             }
2250         }
2251
2252         if (s->luma_elim_threshold && !s->mb_intra)
2253             for (i = 0; i < 4; i++)
2254                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2255         if (s->chroma_elim_threshold && !s->mb_intra)
2256             for (i = 4; i < mb_block_count; i++)
2257                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2258
2259         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2260             for (i = 0; i < mb_block_count; i++) {
2261                 if (s->block_last_index[i] == -1)
2262                     s->coded_score[i] = INT_MAX / 256;
2263             }
2264         }
2265     }
2266
2267     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2268         s->block_last_index[4] =
2269         s->block_last_index[5] = 0;
2270         s->block[4][0] =
2271         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2272         if (!s->chroma_y_shift) { /* 422 / 444 */
2273             for (i=6; i<12; i++) {
2274                 s->block_last_index[i] = 0;
2275                 s->block[i][0] = s->block[4][0];
2276             }
2277         }
2278     }
2279
2280     // non c quantize code returns incorrect block_last_index FIXME
2281     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2282         for (i = 0; i < mb_block_count; i++) {
2283             int j;
2284             if (s->block_last_index[i] > 0) {
2285                 for (j = 63; j > 0; j--) {
2286                     if (s->block[i][s->intra_scantable.permutated[j]])
2287                         break;
2288                 }
2289                 s->block_last_index[i] = j;
2290             }
2291         }
2292     }
2293
2294     /* huffman encode */
2295     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2296     case AV_CODEC_ID_MPEG1VIDEO:
2297     case AV_CODEC_ID_MPEG2VIDEO:
2298         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2299             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2300         break;
2301     case AV_CODEC_ID_MPEG4:
2302         if (CONFIG_MPEG4_ENCODER)
2303             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2304         break;
2305     case AV_CODEC_ID_MSMPEG4V2:
2306     case AV_CODEC_ID_MSMPEG4V3:
2307     case AV_CODEC_ID_WMV1:
2308         if (CONFIG_MSMPEG4_ENCODER)
2309             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2310         break;
2311     case AV_CODEC_ID_WMV2:
2312         if (CONFIG_WMV2_ENCODER)
2313             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2314         break;
2315     case AV_CODEC_ID_H261:
2316         if (CONFIG_H261_ENCODER)
2317             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2318         break;
2319     case AV_CODEC_ID_H263:
2320     case AV_CODEC_ID_H263P:
2321     case AV_CODEC_ID_FLV1:
2322     case AV_CODEC_ID_RV10:
2323     case AV_CODEC_ID_RV20:
2324         if (CONFIG_H263_ENCODER)
2325             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2326         break;
2327     case AV_CODEC_ID_MJPEG:
2328     case AV_CODEC_ID_AMV:
2329         if (CONFIG_MJPEG_ENCODER)
2330             ff_mjpeg_encode_mb(s, s->block);
2331         break;
2332     default:
2333         av_assert1(0);
2334     }
2335 }
2336
2337 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2338 {
2339     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2340     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2341     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2342 }
2343
2344 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2345     int i;
2346
2347     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2348
2349     /* mpeg1 */
2350     d->mb_skip_run= s->mb_skip_run;
2351     for(i=0; i<3; i++)
2352         d->last_dc[i] = s->last_dc[i];
2353
2354     /* statistics */
2355     d->mv_bits= s->mv_bits;
2356     d->i_tex_bits= s->i_tex_bits;
2357     d->p_tex_bits= s->p_tex_bits;
2358     d->i_count= s->i_count;
2359     d->f_count= s->f_count;
2360     d->b_count= s->b_count;
2361     d->skip_count= s->skip_count;
2362     d->misc_bits= s->misc_bits;
2363     d->last_bits= 0;
2364
2365     d->mb_skipped= 0;
2366     d->qscale= s->qscale;
2367     d->dquant= s->dquant;
2368
2369     d->esc3_level_length= s->esc3_level_length;
2370 }
2371
2372 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2373     int i;
2374
2375     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2376     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2377
2378     /* mpeg1 */
2379     d->mb_skip_run= s->mb_skip_run;
2380     for(i=0; i<3; i++)
2381         d->last_dc[i] = s->last_dc[i];
2382
2383     /* statistics */
2384     d->mv_bits= s->mv_bits;
2385     d->i_tex_bits= s->i_tex_bits;
2386     d->p_tex_bits= s->p_tex_bits;
2387     d->i_count= s->i_count;
2388     d->f_count= s->f_count;
2389     d->b_count= s->b_count;
2390     d->skip_count= s->skip_count;
2391     d->misc_bits= s->misc_bits;
2392
2393     d->mb_intra= s->mb_intra;
2394     d->mb_skipped= s->mb_skipped;
2395     d->mv_type= s->mv_type;
2396     d->mv_dir= s->mv_dir;
2397     d->pb= s->pb;
2398     if(s->data_partitioning){
2399         d->pb2= s->pb2;
2400         d->tex_pb= s->tex_pb;
2401     }
2402     d->block= s->block;
2403     for(i=0; i<8; i++)
2404         d->block_last_index[i]= s->block_last_index[i];
2405     d->interlaced_dct= s->interlaced_dct;
2406     d->qscale= s->qscale;
2407
2408     d->esc3_level_length= s->esc3_level_length;
2409 }
2410
2411 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2412                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2413                            int *dmin, int *next_block, int motion_x, int motion_y)
2414 {
2415     int score;
2416     uint8_t *dest_backup[3];
2417
2418     copy_context_before_encode(s, backup, type);
2419
2420     s->block= s->blocks[*next_block];
2421     s->pb= pb[*next_block];
2422     if(s->data_partitioning){
2423         s->pb2   = pb2   [*next_block];
2424         s->tex_pb= tex_pb[*next_block];
2425     }
2426
2427     if(*next_block){
2428         memcpy(dest_backup, s->dest, sizeof(s->dest));
2429         s->dest[0] = s->rd_scratchpad;
2430         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2431         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2432         av_assert0(s->linesize >= 32); //FIXME
2433     }
2434
2435     encode_mb(s, motion_x, motion_y);
2436
2437     score= put_bits_count(&s->pb);
2438     if(s->data_partitioning){
2439         score+= put_bits_count(&s->pb2);
2440         score+= put_bits_count(&s->tex_pb);
2441     }
2442
2443     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2444         ff_MPV_decode_mb(s, s->block);
2445
2446         score *= s->lambda2;
2447         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2448     }
2449
2450     if(*next_block){
2451         memcpy(s->dest, dest_backup, sizeof(s->dest));
2452     }
2453
2454     if(score<*dmin){
2455         *dmin= score;
2456         *next_block^=1;
2457
2458         copy_context_after_encode(best, s, type);
2459     }
2460 }
2461
2462 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2463     uint32_t *sq = ff_square_tab + 256;
2464     int acc=0;
2465     int x,y;
2466
2467     if(w==16 && h==16)
2468         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2469     else if(w==8 && h==8)
2470         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2471
2472     for(y=0; y<h; y++){
2473         for(x=0; x<w; x++){
2474             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2475         }
2476     }
2477
2478     av_assert2(acc>=0);
2479
2480     return acc;
2481 }
2482
2483 static int sse_mb(MpegEncContext *s){
2484     int w= 16;
2485     int h= 16;
2486
2487     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2488     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2489
2490     if(w==16 && h==16)
2491       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2492         return  s->dsp.nsse[0](s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2493                +s->dsp.nsse[1](s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2494                +s->dsp.nsse[1](s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2495       }else{
2496         return  s->dsp.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2497                +s->dsp.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2498                +s->dsp.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2499       }
2500     else
2501         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2502                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2503                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2504 }
2505
2506 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2507     MpegEncContext *s= *(void**)arg;
2508
2509
2510     s->me.pre_pass=1;
2511     s->me.dia_size= s->avctx->pre_dia_size;
2512     s->first_slice_line=1;
2513     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2514         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2515             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2516         }
2517         s->first_slice_line=0;
2518     }
2519
2520     s->me.pre_pass=0;
2521
2522     return 0;
2523 }
2524
2525 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2526     MpegEncContext *s= *(void**)arg;
2527
2528     ff_check_alignment();
2529
2530     s->me.dia_size= s->avctx->dia_size;
2531     s->first_slice_line=1;
2532     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2533         s->mb_x=0; //for block init below
2534         ff_init_block_index(s);
2535         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2536             s->block_index[0]+=2;
2537             s->block_index[1]+=2;
2538             s->block_index[2]+=2;
2539             s->block_index[3]+=2;
2540
2541             /* compute motion vector & mb_type and store in context */
2542             if(s->pict_type==AV_PICTURE_TYPE_B)
2543                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2544             else
2545                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2546         }
2547         s->first_slice_line=0;
2548     }
2549     return 0;
2550 }
2551
2552 static int mb_var_thread(AVCodecContext *c, void *arg){
2553     MpegEncContext *s= *(void**)arg;
2554     int mb_x, mb_y;
2555
2556     ff_check_alignment();
2557
2558     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2559         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2560             int xx = mb_x * 16;
2561             int yy = mb_y * 16;
2562             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2563             int varc;
2564             int sum = s->dsp.pix_sum(pix, s->linesize);
2565
2566             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2567
2568             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2569             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2570             s->me.mb_var_sum_temp    += varc;
2571         }
2572     }
2573     return 0;
2574 }
2575
2576 static void write_slice_end(MpegEncContext *s){
2577     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2578         if(s->partitioned_frame){
2579             ff_mpeg4_merge_partitions(s);
2580         }
2581
2582         ff_mpeg4_stuffing(&s->pb);
2583     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2584         ff_mjpeg_encode_stuffing(s);
2585     }
2586
2587     avpriv_align_put_bits(&s->pb);
2588     flush_put_bits(&s->pb);
2589
2590     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2591         s->misc_bits+= get_bits_diff(s);
2592 }
2593
2594 static void write_mb_info(MpegEncContext *s)
2595 {
2596     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2597     int offset = put_bits_count(&s->pb);
2598     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2599     int gobn = s->mb_y / s->gob_index;
2600     int pred_x, pred_y;
2601     if (CONFIG_H263_ENCODER)
2602         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2603     bytestream_put_le32(&ptr, offset);
2604     bytestream_put_byte(&ptr, s->qscale);
2605     bytestream_put_byte(&ptr, gobn);
2606     bytestream_put_le16(&ptr, mba);
2607     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2608     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2609     /* 4MV not implemented */
2610     bytestream_put_byte(&ptr, 0); /* hmv2 */
2611     bytestream_put_byte(&ptr, 0); /* vmv2 */
2612 }
2613
2614 static void update_mb_info(MpegEncContext *s, int startcode)
2615 {
2616     if (!s->mb_info)
2617         return;
2618     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2619         s->mb_info_size += 12;
2620         s->prev_mb_info = s->last_mb_info;
2621     }
2622     if (startcode) {
2623         s->prev_mb_info = put_bits_count(&s->pb)/8;
2624         /* This might have incremented mb_info_size above, and we return without
2625          * actually writing any info into that slot yet. But in that case,
2626          * this will be called again at the start of the after writing the
2627          * start code, actually writing the mb info. */
2628         return;
2629     }
2630
2631     s->last_mb_info = put_bits_count(&s->pb)/8;
2632     if (!s->mb_info_size)
2633         s->mb_info_size += 12;
2634     write_mb_info(s);
2635 }
2636
2637 static int encode_thread(AVCodecContext *c, void *arg){
2638     MpegEncContext *s= *(void**)arg;
2639     int mb_x, mb_y, pdif = 0;
2640     int chr_h= 16>>s->chroma_y_shift;
2641     int i, j;
2642     MpegEncContext best_s, backup_s;
2643     uint8_t bit_buf[2][MAX_MB_BYTES];
2644     uint8_t bit_buf2[2][MAX_MB_BYTES];
2645     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2646     PutBitContext pb[2], pb2[2], tex_pb[2];
2647
2648     ff_check_alignment();
2649
2650     for(i=0; i<2; i++){
2651         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2652         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2653         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2654     }
2655
2656     s->last_bits= put_bits_count(&s->pb);
2657     s->mv_bits=0;
2658     s->misc_bits=0;
2659     s->i_tex_bits=0;
2660     s->p_tex_bits=0;
2661     s->i_count=0;
2662     s->f_count=0;
2663     s->b_count=0;
2664     s->skip_count=0;
2665
2666     for(i=0; i<3; i++){
2667         /* init last dc values */
2668         /* note: quant matrix value (8) is implied here */
2669         s->last_dc[i] = 128 << s->intra_dc_precision;
2670
2671         s->current_picture.f->error[i] = 0;
2672     }
2673     if(s->codec_id==AV_CODEC_ID_AMV){
2674         s->last_dc[0] = 128*8/13;
2675         s->last_dc[1] = 128*8/14;
2676         s->last_dc[2] = 128*8/14;
2677     }
2678     s->mb_skip_run = 0;
2679     memset(s->last_mv, 0, sizeof(s->last_mv));
2680
2681     s->last_mv_dir = 0;
2682
2683     switch(s->codec_id){
2684     case AV_CODEC_ID_H263:
2685     case AV_CODEC_ID_H263P:
2686     case AV_CODEC_ID_FLV1:
2687         if (CONFIG_H263_ENCODER)
2688             s->gob_index = ff_h263_get_gob_height(s);
2689         break;
2690     case AV_CODEC_ID_MPEG4:
2691         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2692             ff_mpeg4_init_partitions(s);
2693         break;
2694     }
2695
2696     s->resync_mb_x=0;
2697     s->resync_mb_y=0;
2698     s->first_slice_line = 1;
2699     s->ptr_lastgob = s->pb.buf;
2700     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2701         s->mb_x=0;
2702         s->mb_y= mb_y;
2703
2704         ff_set_qscale(s, s->qscale);
2705         ff_init_block_index(s);
2706
2707         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2708             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2709             int mb_type= s->mb_type[xy];
2710 //            int d;
2711             int dmin= INT_MAX;
2712             int dir;
2713
2714             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2715                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2716                 return -1;
2717             }
2718             if(s->data_partitioning){
2719                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2720                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2721                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2722                     return -1;
2723                 }
2724             }
2725
2726             s->mb_x = mb_x;
2727             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2728             ff_update_block_index(s);
2729
2730             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2731                 ff_h261_reorder_mb_index(s);
2732                 xy= s->mb_y*s->mb_stride + s->mb_x;
2733                 mb_type= s->mb_type[xy];
2734             }
2735
2736             /* write gob / video packet header  */
2737             if(s->rtp_mode){
2738                 int current_packet_size, is_gob_start;
2739
2740                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2741
2742                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2743
2744                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2745
2746                 switch(s->codec_id){
2747                 case AV_CODEC_ID_H263:
2748                 case AV_CODEC_ID_H263P:
2749                     if(!s->h263_slice_structured)
2750                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2751                     break;
2752                 case AV_CODEC_ID_MPEG2VIDEO:
2753                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2754                 case AV_CODEC_ID_MPEG1VIDEO:
2755                     if(s->mb_skip_run) is_gob_start=0;
2756                     break;
2757                 case AV_CODEC_ID_MJPEG:
2758                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2759                     break;
2760                 }
2761
2762                 if(is_gob_start){
2763                     if(s->start_mb_y != mb_y || mb_x!=0){
2764                         write_slice_end(s);
2765
2766                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2767                             ff_mpeg4_init_partitions(s);
2768                         }
2769                     }
2770
2771                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2772                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2773
2774                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2775                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2776                         int d = 100 / s->error_rate;
2777                         if(r % d == 0){
2778                             current_packet_size=0;
2779                             s->pb.buf_ptr= s->ptr_lastgob;
2780                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2781                         }
2782                     }
2783
2784                     if (s->avctx->rtp_callback){
2785                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2786                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2787                     }
2788                     update_mb_info(s, 1);
2789
2790                     switch(s->codec_id){
2791                     case AV_CODEC_ID_MPEG4:
2792                         if (CONFIG_MPEG4_ENCODER) {
2793                             ff_mpeg4_encode_video_packet_header(s);
2794                             ff_mpeg4_clean_buffers(s);
2795                         }
2796                     break;
2797                     case AV_CODEC_ID_MPEG1VIDEO:
2798                     case AV_CODEC_ID_MPEG2VIDEO:
2799                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2800                             ff_mpeg1_encode_slice_header(s);
2801                             ff_mpeg1_clean_buffers(s);
2802                         }
2803                     break;
2804                     case AV_CODEC_ID_H263:
2805                     case AV_CODEC_ID_H263P:
2806                         if (CONFIG_H263_ENCODER)
2807                             ff_h263_encode_gob_header(s, mb_y);
2808                     break;
2809                     }
2810
2811                     if(s->flags&CODEC_FLAG_PASS1){
2812                         int bits= put_bits_count(&s->pb);
2813                         s->misc_bits+= bits - s->last_bits;
2814                         s->last_bits= bits;
2815                     }
2816
2817                     s->ptr_lastgob += current_packet_size;
2818                     s->first_slice_line=1;
2819                     s->resync_mb_x=mb_x;
2820                     s->resync_mb_y=mb_y;
2821                 }
2822             }
2823
2824             if(  (s->resync_mb_x   == s->mb_x)
2825                && s->resync_mb_y+1 == s->mb_y){
2826                 s->first_slice_line=0;
2827             }
2828
2829             s->mb_skipped=0;
2830             s->dquant=0; //only for QP_RD
2831
2832             update_mb_info(s, 0);
2833
2834             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2835                 int next_block=0;
2836                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2837
2838                 copy_context_before_encode(&backup_s, s, -1);
2839                 backup_s.pb= s->pb;
2840                 best_s.data_partitioning= s->data_partitioning;
2841                 best_s.partitioned_frame= s->partitioned_frame;
2842                 if(s->data_partitioning){
2843                     backup_s.pb2= s->pb2;
2844                     backup_s.tex_pb= s->tex_pb;
2845                 }
2846
2847                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2848                     s->mv_dir = MV_DIR_FORWARD;
2849                     s->mv_type = MV_TYPE_16X16;
2850                     s->mb_intra= 0;
2851                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2852                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2853                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2854                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2855                 }
2856                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2857                     s->mv_dir = MV_DIR_FORWARD;
2858                     s->mv_type = MV_TYPE_FIELD;
2859                     s->mb_intra= 0;
2860                     for(i=0; i<2; i++){
2861                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2862                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2863                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2864                     }
2865                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2866                                  &dmin, &next_block, 0, 0);
2867                 }
2868                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2869                     s->mv_dir = MV_DIR_FORWARD;
2870                     s->mv_type = MV_TYPE_16X16;
2871                     s->mb_intra= 0;
2872                     s->mv[0][0][0] = 0;
2873                     s->mv[0][0][1] = 0;
2874                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2875                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2876                 }
2877                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2878                     s->mv_dir = MV_DIR_FORWARD;
2879                     s->mv_type = MV_TYPE_8X8;
2880                     s->mb_intra= 0;
2881                     for(i=0; i<4; i++){
2882                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2883                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2884                     }
2885                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2886                                  &dmin, &next_block, 0, 0);
2887                 }
2888                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2889                     s->mv_dir = MV_DIR_FORWARD;
2890                     s->mv_type = MV_TYPE_16X16;
2891                     s->mb_intra= 0;
2892                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2893                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2894                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2895                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2896                 }
2897                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2898                     s->mv_dir = MV_DIR_BACKWARD;
2899                     s->mv_type = MV_TYPE_16X16;
2900                     s->mb_intra= 0;
2901                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2902                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2903                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2904                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2905                 }
2906                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2907                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2908                     s->mv_type = MV_TYPE_16X16;
2909                     s->mb_intra= 0;
2910                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2911                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2912                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2913                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2914                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2915                                  &dmin, &next_block, 0, 0);
2916                 }
2917                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2918                     s->mv_dir = MV_DIR_FORWARD;
2919                     s->mv_type = MV_TYPE_FIELD;
2920                     s->mb_intra= 0;
2921                     for(i=0; i<2; i++){
2922                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2923                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2924                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2925                     }
2926                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2927                                  &dmin, &next_block, 0, 0);
2928                 }
2929                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2930                     s->mv_dir = MV_DIR_BACKWARD;
2931                     s->mv_type = MV_TYPE_FIELD;
2932                     s->mb_intra= 0;
2933                     for(i=0; i<2; i++){
2934                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2935                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2936                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2937                     }
2938                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2939                                  &dmin, &next_block, 0, 0);
2940                 }
2941                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2942                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2943                     s->mv_type = MV_TYPE_FIELD;
2944                     s->mb_intra= 0;
2945                     for(dir=0; dir<2; dir++){
2946                         for(i=0; i<2; i++){
2947                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2948                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2949                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2950                         }
2951                     }
2952                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2953                                  &dmin, &next_block, 0, 0);
2954                 }
2955                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2956                     s->mv_dir = 0;
2957                     s->mv_type = MV_TYPE_16X16;
2958                     s->mb_intra= 1;
2959                     s->mv[0][0][0] = 0;
2960                     s->mv[0][0][1] = 0;
2961                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2962                                  &dmin, &next_block, 0, 0);
2963                     if(s->h263_pred || s->h263_aic){
2964                         if(best_s.mb_intra)
2965                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2966                         else
2967                             ff_clean_intra_table_entries(s); //old mode?
2968                     }
2969                 }
2970
2971                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2972                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2973                         const int last_qp= backup_s.qscale;
2974                         int qpi, qp, dc[6];
2975                         int16_t ac[6][16];
2976                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2977                         static const int dquant_tab[4]={-1,1,-2,2};
2978                         int storecoefs = s->mb_intra && s->dc_val[0];
2979
2980                         av_assert2(backup_s.dquant == 0);
2981
2982                         //FIXME intra
2983                         s->mv_dir= best_s.mv_dir;
2984                         s->mv_type = MV_TYPE_16X16;
2985                         s->mb_intra= best_s.mb_intra;
2986                         s->mv[0][0][0] = best_s.mv[0][0][0];
2987                         s->mv[0][0][1] = best_s.mv[0][0][1];
2988                         s->mv[1][0][0] = best_s.mv[1][0][0];
2989                         s->mv[1][0][1] = best_s.mv[1][0][1];
2990
2991                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2992                         for(; qpi<4; qpi++){
2993                             int dquant= dquant_tab[qpi];
2994                             qp= last_qp + dquant;
2995                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2996                                 continue;
2997                             backup_s.dquant= dquant;
2998                             if(storecoefs){
2999                                 for(i=0; i<6; i++){
3000                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3001                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3002                                 }
3003                             }
3004
3005                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3006                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3007                             if(best_s.qscale != qp){
3008                                 if(storecoefs){
3009                                     for(i=0; i<6; i++){
3010                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3011                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3012                                     }
3013                                 }
3014                             }
3015                         }
3016                     }
3017                 }
3018                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3019                     int mx= s->b_direct_mv_table[xy][0];
3020                     int my= s->b_direct_mv_table[xy][1];
3021
3022                     backup_s.dquant = 0;
3023                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3024                     s->mb_intra= 0;
3025                     ff_mpeg4_set_direct_mv(s, mx, my);
3026                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3027                                  &dmin, &next_block, mx, my);
3028                 }
3029                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3030                     backup_s.dquant = 0;
3031                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3032                     s->mb_intra= 0;
3033                     ff_mpeg4_set_direct_mv(s, 0, 0);
3034                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3035                                  &dmin, &next_block, 0, 0);
3036                 }
3037                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3038                     int coded=0;
3039                     for(i=0; i<6; i++)
3040                         coded |= s->block_last_index[i];
3041                     if(coded){
3042                         int mx,my;
3043                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3044                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3045                             mx=my=0; //FIXME find the one we actually used
3046                             ff_mpeg4_set_direct_mv(s, mx, my);
3047                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3048                             mx= s->mv[1][0][0];
3049                             my= s->mv[1][0][1];
3050                         }else{
3051                             mx= s->mv[0][0][0];
3052                             my= s->mv[0][0][1];
3053                         }
3054
3055                         s->mv_dir= best_s.mv_dir;
3056                         s->mv_type = best_s.mv_type;
3057                         s->mb_intra= 0;
3058 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3059                         s->mv[0][0][1] = best_s.mv[0][0][1];
3060                         s->mv[1][0][0] = best_s.mv[1][0][0];
3061                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3062                         backup_s.dquant= 0;
3063                         s->skipdct=1;
3064                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3065                                         &dmin, &next_block, mx, my);
3066                         s->skipdct=0;
3067                     }
3068                 }
3069
3070                 s->current_picture.qscale_table[xy] = best_s.qscale;
3071
3072                 copy_context_after_encode(s, &best_s, -1);
3073
3074                 pb_bits_count= put_bits_count(&s->pb);
3075                 flush_put_bits(&s->pb);
3076                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3077                 s->pb= backup_s.pb;
3078
3079                 if(s->data_partitioning){
3080                     pb2_bits_count= put_bits_count(&s->pb2);
3081                     flush_put_bits(&s->pb2);
3082                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3083                     s->pb2= backup_s.pb2;
3084
3085                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3086                     flush_put_bits(&s->tex_pb);
3087                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3088                     s->tex_pb= backup_s.tex_pb;
3089                 }
3090                 s->last_bits= put_bits_count(&s->pb);
3091
3092                 if (CONFIG_H263_ENCODER &&
3093                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3094                     ff_h263_update_motion_val(s);
3095
3096                 if(next_block==0){ //FIXME 16 vs linesize16
3097                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3098                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3099                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3100                 }
3101
3102                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3103                     ff_MPV_decode_mb(s, s->block);
3104             } else {
3105                 int motion_x = 0, motion_y = 0;
3106                 s->mv_type=MV_TYPE_16X16;
3107                 // only one MB-Type possible
3108
3109                 switch(mb_type){
3110                 case CANDIDATE_MB_TYPE_INTRA:
3111                     s->mv_dir = 0;
3112                     s->mb_intra= 1;
3113                     motion_x= s->mv[0][0][0] = 0;
3114                     motion_y= s->mv[0][0][1] = 0;
3115                     break;
3116                 case CANDIDATE_MB_TYPE_INTER:
3117                     s->mv_dir = MV_DIR_FORWARD;
3118                     s->mb_intra= 0;
3119                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3120                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3121                     break;
3122                 case CANDIDATE_MB_TYPE_INTER_I:
3123                     s->mv_dir = MV_DIR_FORWARD;
3124                     s->mv_type = MV_TYPE_FIELD;
3125                     s->mb_intra= 0;
3126                     for(i=0; i<2; i++){
3127                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3128                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3129                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3130                     }
3131                     break;
3132                 case CANDIDATE_MB_TYPE_INTER4V:
3133                     s->mv_dir = MV_DIR_FORWARD;
3134                     s->mv_type = MV_TYPE_8X8;
3135                     s->mb_intra= 0;
3136                     for(i=0; i<4; i++){
3137                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3138                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3139                     }
3140                     break;
3141                 case CANDIDATE_MB_TYPE_DIRECT:
3142                     if (CONFIG_MPEG4_ENCODER) {
3143                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3144                         s->mb_intra= 0;
3145                         motion_x=s->b_direct_mv_table[xy][0];
3146                         motion_y=s->b_direct_mv_table[xy][1];
3147                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3148                     }
3149                     break;
3150                 case CANDIDATE_MB_TYPE_DIRECT0:
3151                     if (CONFIG_MPEG4_ENCODER) {
3152                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3153                         s->mb_intra= 0;
3154                         ff_mpeg4_set_direct_mv(s, 0, 0);
3155                     }
3156                     break;
3157                 case CANDIDATE_MB_TYPE_BIDIR:
3158                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3159                     s->mb_intra= 0;
3160                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3161                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3162                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3163                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3164                     break;
3165                 case CANDIDATE_MB_TYPE_BACKWARD:
3166                     s->mv_dir = MV_DIR_BACKWARD;
3167                     s->mb_intra= 0;
3168                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3169                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3170                     break;
3171                 case CANDIDATE_MB_TYPE_FORWARD:
3172                     s->mv_dir = MV_DIR_FORWARD;
3173                     s->mb_intra= 0;
3174                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3175                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3176                     break;
3177                 case CANDIDATE_MB_TYPE_FORWARD_I:
3178                     s->mv_dir = MV_DIR_FORWARD;
3179                     s->mv_type = MV_TYPE_FIELD;
3180                     s->mb_intra= 0;
3181                     for(i=0; i<2; i++){
3182                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3183                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3184                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3185                     }
3186                     break;
3187                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3188                     s->mv_dir = MV_DIR_BACKWARD;
3189                     s->mv_type = MV_TYPE_FIELD;
3190                     s->mb_intra= 0;
3191                     for(i=0; i<2; i++){
3192                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3193                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3194                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3195                     }
3196                     break;
3197                 case CANDIDATE_MB_TYPE_BIDIR_I:
3198                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3199                     s->mv_type = MV_TYPE_FIELD;
3200                     s->mb_intra= 0;
3201                     for(dir=0; dir<2; dir++){
3202                         for(i=0; i<2; i++){
3203                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3204                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3205                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3206                         }
3207                     }
3208                     break;
3209                 default:
3210                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3211                 }
3212
3213                 encode_mb(s, motion_x, motion_y);
3214
3215                 // RAL: Update last macroblock type
3216                 s->last_mv_dir = s->mv_dir;
3217
3218                 if (CONFIG_H263_ENCODER &&
3219                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3220                     ff_h263_update_motion_val(s);
3221
3222                 ff_MPV_decode_mb(s, s->block);
3223             }
3224
3225             /* clean the MV table in IPS frames for direct mode in B frames */
3226             if(s->mb_intra /* && I,P,S_TYPE */){
3227                 s->p_mv_table[xy][0]=0;
3228                 s->p_mv_table[xy][1]=0;
3229             }
3230
3231             if(s->flags&CODEC_FLAG_PSNR){
3232                 int w= 16;
3233                 int h= 16;
3234
3235                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3236                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3237
3238                 s->current_picture.f->error[0] += sse(
3239                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3240                     s->dest[0], w, h, s->linesize);
3241                 s->current_picture.f->error[1] += sse(
3242                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3243                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3244                 s->current_picture.f->error[2] += sse(
3245                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3246                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3247             }
3248             if(s->loop_filter){
3249                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3250                     ff_h263_loop_filter(s);
3251             }
3252             av_dlog(s->avctx, "MB %d %d bits\n",
3253                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3254         }
3255     }
3256
3257     //not beautiful here but we must write it before flushing so it has to be here
3258     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3259         ff_msmpeg4_encode_ext_header(s);
3260
3261     write_slice_end(s);
3262
3263     /* Send the last GOB if RTP */
3264     if (s->avctx->rtp_callback) {
3265         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3266         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3267         /* Call the RTP callback to send the last GOB */
3268         emms_c();
3269         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3270     }
3271
3272     return 0;
3273 }
3274
3275 #define MERGE(field) dst->field += src->field; src->field=0
3276 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3277     MERGE(me.scene_change_score);
3278     MERGE(me.mc_mb_var_sum_temp);
3279     MERGE(me.mb_var_sum_temp);
3280 }
3281
3282 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3283     int i;
3284
3285     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3286     MERGE(dct_count[1]);
3287     MERGE(mv_bits);
3288     MERGE(i_tex_bits);
3289     MERGE(p_tex_bits);
3290     MERGE(i_count);
3291     MERGE(f_count);
3292     MERGE(b_count);
3293     MERGE(skip_count);
3294     MERGE(misc_bits);
3295     MERGE(er.error_count);
3296     MERGE(padding_bug_score);
3297     MERGE(current_picture.f->error[0]);
3298     MERGE(current_picture.f->error[1]);
3299     MERGE(current_picture.f->error[2]);
3300
3301     if(dst->avctx->noise_reduction){
3302         for(i=0; i<64; i++){
3303             MERGE(dct_error_sum[0][i]);
3304             MERGE(dct_error_sum[1][i]);
3305         }
3306     }
3307
3308     assert(put_bits_count(&src->pb) % 8 ==0);
3309     assert(put_bits_count(&dst->pb) % 8 ==0);
3310     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3311     flush_put_bits(&dst->pb);
3312 }
3313
3314 static int estimate_qp(MpegEncContext *s, int dry_run){
3315     if (s->next_lambda){
3316         s->current_picture_ptr->f->quality =
3317         s->current_picture.f->quality = s->next_lambda;
3318         if(!dry_run) s->next_lambda= 0;
3319     } else if (!s->fixed_qscale) {
3320         s->current_picture_ptr->f->quality =
3321         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3322         if (s->current_picture.f->quality < 0)
3323             return -1;
3324     }
3325
3326     if(s->adaptive_quant){
3327         switch(s->codec_id){
3328         case AV_CODEC_ID_MPEG4:
3329             if (CONFIG_MPEG4_ENCODER)
3330                 ff_clean_mpeg4_qscales(s);
3331             break;
3332         case AV_CODEC_ID_H263:
3333         case AV_CODEC_ID_H263P:
3334         case AV_CODEC_ID_FLV1:
3335             if (CONFIG_H263_ENCODER)
3336                 ff_clean_h263_qscales(s);
3337             break;
3338         default:
3339             ff_init_qscale_tab(s);
3340         }
3341
3342         s->lambda= s->lambda_table[0];
3343         //FIXME broken
3344     }else
3345         s->lambda = s->current_picture.f->quality;
3346     update_qscale(s);
3347     return 0;
3348 }
3349
3350 /* must be called before writing the header */
3351 static void set_frame_distances(MpegEncContext * s){
3352     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3353     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3354
3355     if(s->pict_type==AV_PICTURE_TYPE_B){
3356         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3357         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3358     }else{
3359         s->pp_time= s->time - s->last_non_b_time;
3360         s->last_non_b_time= s->time;
3361         assert(s->picture_number==0 || s->pp_time > 0);
3362     }
3363 }
3364
3365 static int encode_picture(MpegEncContext *s, int picture_number)
3366 {
3367     int i, ret;
3368     int bits;
3369     int context_count = s->slice_context_count;
3370
3371     s->picture_number = picture_number;
3372
3373     /* Reset the average MB variance */
3374     s->me.mb_var_sum_temp    =
3375     s->me.mc_mb_var_sum_temp = 0;
3376
3377     /* we need to initialize some time vars before we can encode b-frames */
3378     // RAL: Condition added for MPEG1VIDEO
3379     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3380         set_frame_distances(s);
3381     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3382         ff_set_mpeg4_time(s);
3383
3384     s->me.scene_change_score=0;
3385
3386 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3387
3388     if(s->pict_type==AV_PICTURE_TYPE_I){
3389         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3390         else                        s->no_rounding=0;
3391     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3392         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3393             s->no_rounding ^= 1;
3394     }
3395
3396     if(s->flags & CODEC_FLAG_PASS2){
3397         if (estimate_qp(s,1) < 0)
3398             return -1;
3399         ff_get_2pass_fcode(s);
3400     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3401         if(s->pict_type==AV_PICTURE_TYPE_B)
3402             s->lambda= s->last_lambda_for[s->pict_type];
3403         else
3404             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3405         update_qscale(s);
3406     }
3407
3408     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3409         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3410         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3411         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3412         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3413     }
3414
3415     s->mb_intra=0; //for the rate distortion & bit compare functions
3416     for(i=1; i<context_count; i++){
3417         ret = ff_update_duplicate_context(s->thread_context[i], s);
3418         if (ret < 0)
3419             return ret;
3420     }
3421
3422     if(ff_init_me(s)<0)
3423         return -1;
3424
3425     /* Estimate motion for every MB */
3426     if(s->pict_type != AV_PICTURE_TYPE_I){
3427         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3428         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3429         if (s->pict_type != AV_PICTURE_TYPE_B) {
3430             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3431                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3432             }
3433         }
3434
3435         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3436     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3437         /* I-Frame */
3438         for(i=0; i<s->mb_stride*s->mb_height; i++)
3439             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3440
3441         if(!s->fixed_qscale){
3442             /* finding spatial complexity for I-frame rate control */
3443             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3444         }
3445     }
3446     for(i=1; i<context_count; i++){
3447         merge_context_after_me(s, s->thread_context[i]);
3448     }
3449     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3450     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3451     emms_c();
3452
3453     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3454         s->pict_type= AV_PICTURE_TYPE_I;
3455         for(i=0; i<s->mb_stride*s->mb_height; i++)
3456             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3457         if(s->msmpeg4_version >= 3)
3458             s->no_rounding=1;
3459         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3460                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3461     }
3462
3463     if(!s->umvplus){
3464         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3465             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3466
3467             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3468                 int a,b;
3469                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3470                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3471                 s->f_code= FFMAX3(s->f_code, a, b);
3472             }
3473
3474             ff_fix_long_p_mvs(s);
3475             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3476             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3477                 int j;
3478                 for(i=0; i<2; i++){
3479                     for(j=0; j<2; j++)
3480                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3481                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3482                 }
3483             }
3484         }
3485
3486         if(s->pict_type==AV_PICTURE_TYPE_B){
3487             int a, b;
3488
3489             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3490             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3491             s->f_code = FFMAX(a, b);
3492
3493             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3494             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3495             s->b_code = FFMAX(a, b);
3496
3497             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3498             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3499             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3500             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3501             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3502                 int dir, j;
3503                 for(dir=0; dir<2; dir++){
3504                     for(i=0; i<2; i++){
3505                         for(j=0; j<2; j++){
3506                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3507                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3508                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3509                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3510                         }
3511                     }
3512                 }
3513             }
3514         }
3515     }
3516
3517     if (estimate_qp(s, 0) < 0)
3518         return -1;
3519
3520     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3521         s->qscale= 3; //reduce clipping problems
3522
3523     if (s->out_format == FMT_MJPEG) {
3524         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3525         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3526
3527         if (s->avctx->intra_matrix) {
3528             chroma_matrix =
3529             luma_matrix = s->avctx->intra_matrix;
3530         }
3531         if (s->avctx->chroma_intra_matrix)
3532             chroma_matrix = s->avctx->chroma_intra_matrix;
3533
3534         /* for mjpeg, we do include qscale in the matrix */
3535         for(i=1;i<64;i++){
3536             int j= s->dsp.idct_permutation[i];
3537
3538             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3539             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3540         }
3541         s->y_dc_scale_table=
3542         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3543         s->chroma_intra_matrix[0] =
3544         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3545         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3546                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3547         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3548                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3549         s->qscale= 8;
3550     }
3551     if(s->codec_id == AV_CODEC_ID_AMV){
3552         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3553         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3554         for(i=1;i<64;i++){
3555             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3556
3557             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3558             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3559         }
3560         s->y_dc_scale_table= y;
3561         s->c_dc_scale_table= c;
3562         s->intra_matrix[0] = 13;
3563         s->chroma_intra_matrix[0] = 14;
3564         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3565                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3566         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3567                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3568         s->qscale= 8;
3569     }
3570
3571     //FIXME var duplication
3572     s->current_picture_ptr->f->key_frame =
3573     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3574     s->current_picture_ptr->f->pict_type =
3575     s->current_picture.f->pict_type = s->pict_type;
3576
3577     if (s->current_picture.f->key_frame)
3578         s->picture_in_gop_number=0;
3579
3580     s->mb_x = s->mb_y = 0;
3581     s->last_bits= put_bits_count(&s->pb);
3582     switch(s->out_format) {
3583     case FMT_MJPEG:
3584         if (CONFIG_MJPEG_ENCODER)
3585             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3586                                            s->intra_matrix, s->chroma_intra_matrix);
3587         break;
3588     case FMT_H261:
3589         if (CONFIG_H261_ENCODER)
3590             ff_h261_encode_picture_header(s, picture_number);
3591         break;
3592     case FMT_H263:
3593         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3594             ff_wmv2_encode_picture_header(s, picture_number);
3595         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3596             ff_msmpeg4_encode_picture_header(s, picture_number);
3597         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3598             ff_mpeg4_encode_picture_header(s, picture_number);
3599         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3600             ff_rv10_encode_picture_header(s, picture_number);
3601         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3602             ff_rv20_encode_picture_header(s, picture_number);
3603         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3604             ff_flv_encode_picture_header(s, picture_number);
3605         else if (CONFIG_H263_ENCODER)
3606             ff_h263_encode_picture_header(s, picture_number);
3607         break;
3608     case FMT_MPEG1:
3609         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3610             ff_mpeg1_encode_picture_header(s, picture_number);
3611         break;
3612     default:
3613         av_assert0(0);
3614     }
3615     bits= put_bits_count(&s->pb);
3616     s->header_bits= bits - s->last_bits;
3617
3618     for(i=1; i<context_count; i++){
3619         update_duplicate_context_after_me(s->thread_context[i], s);
3620     }
3621     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3622     for(i=1; i<context_count; i++){
3623         merge_context_after_encode(s, s->thread_context[i]);
3624     }
3625     emms_c();
3626     return 0;
3627 }
3628
3629 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3630     const int intra= s->mb_intra;
3631     int i;
3632
3633     s->dct_count[intra]++;
3634
3635     for(i=0; i<64; i++){
3636         int level= block[i];
3637
3638         if(level){
3639             if(level>0){
3640                 s->dct_error_sum[intra][i] += level;
3641                 level -= s->dct_offset[intra][i];
3642                 if(level<0) level=0;
3643             }else{
3644                 s->dct_error_sum[intra][i] -= level;
3645                 level += s->dct_offset[intra][i];
3646                 if(level>0) level=0;
3647             }
3648             block[i]= level;
3649         }
3650     }
3651 }
3652
3653 static int dct_quantize_trellis_c(MpegEncContext *s,
3654                                   int16_t *block, int n,
3655                                   int qscale, int *overflow){
3656     const int *qmat;
3657     const uint8_t *scantable= s->intra_scantable.scantable;
3658     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3659     int max=0;
3660     unsigned int threshold1, threshold2;
3661     int bias=0;
3662     int run_tab[65];
3663     int level_tab[65];
3664     int score_tab[65];
3665     int survivor[65];
3666     int survivor_count;
3667     int last_run=0;
3668     int last_level=0;
3669     int last_score= 0;
3670     int last_i;
3671     int coeff[2][64];
3672     int coeff_count[64];
3673     int qmul, qadd, start_i, last_non_zero, i, dc;
3674     const int esc_length= s->ac_esc_length;
3675     uint8_t * length;
3676     uint8_t * last_length;
3677     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3678
3679     s->dsp.fdct (block);
3680
3681     if(s->dct_error_sum)
3682         s->denoise_dct(s, block);
3683     qmul= qscale*16;
3684     qadd= ((qscale-1)|1)*8;
3685
3686     if (s->mb_intra) {
3687         int q;
3688         if (!s->h263_aic) {
3689             if (n < 4)
3690                 q = s->y_dc_scale;
3691             else
3692                 q = s->c_dc_scale;
3693             q = q << 3;
3694         } else{
3695             /* For AIC we skip quant/dequant of INTRADC */
3696             q = 1 << 3;
3697             qadd=0;
3698         }
3699
3700         /* note: block[0] is assumed to be positive */
3701         block[0] = (block[0] + (q >> 1)) / q;
3702         start_i = 1;
3703         last_non_zero = 0;
3704         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3705         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3706             bias= 1<<(QMAT_SHIFT-1);
3707         length     = s->intra_ac_vlc_length;
3708         last_length= s->intra_ac_vlc_last_length;
3709     } else {
3710         start_i = 0;
3711         last_non_zero = -1;
3712         qmat = s->q_inter_matrix[qscale];
3713         length     = s->inter_ac_vlc_length;
3714         last_length= s->inter_ac_vlc_last_length;
3715     }
3716     last_i= start_i;
3717
3718     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3719     threshold2= (threshold1<<1);
3720
3721     for(i=63; i>=start_i; i--) {
3722         const int j = scantable[i];
3723         int level = block[j] * qmat[j];
3724
3725         if(((unsigned)(level+threshold1))>threshold2){
3726             last_non_zero = i;
3727             break;
3728         }
3729     }
3730
3731     for(i=start_i; i<=last_non_zero; i++) {
3732         const int j = scantable[i];
3733         int level = block[j] * qmat[j];
3734
3735 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3736 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3737         if(((unsigned)(level+threshold1))>threshold2){
3738             if(level>0){
3739                 level= (bias + level)>>QMAT_SHIFT;
3740                 coeff[0][i]= level;
3741                 coeff[1][i]= level-1;
3742 //                coeff[2][k]= level-2;
3743             }else{
3744                 level= (bias - level)>>QMAT_SHIFT;
3745                 coeff[0][i]= -level;
3746                 coeff[1][i]= -level+1;
3747 //                coeff[2][k]= -level+2;
3748             }
3749             coeff_count[i]= FFMIN(level, 2);
3750             av_assert2(coeff_count[i]);
3751             max |=level;
3752         }else{
3753             coeff[0][i]= (level>>31)|1;
3754             coeff_count[i]= 1;
3755         }
3756     }
3757
3758     *overflow= s->max_qcoeff < max; //overflow might have happened
3759
3760     if(last_non_zero < start_i){
3761         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3762         return last_non_zero;
3763     }
3764
3765     score_tab[start_i]= 0;
3766     survivor[0]= start_i;
3767     survivor_count= 1;
3768
3769     for(i=start_i; i<=last_non_zero; i++){
3770         int level_index, j, zero_distortion;
3771         int dct_coeff= FFABS(block[ scantable[i] ]);
3772         int best_score=256*256*256*120;
3773
3774         if (s->dsp.fdct == ff_fdct_ifast)
3775             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3776         zero_distortion= dct_coeff*dct_coeff;
3777
3778         for(level_index=0; level_index < coeff_count[i]; level_index++){
3779             int distortion;
3780             int level= coeff[level_index][i];
3781             const int alevel= FFABS(level);
3782             int unquant_coeff;
3783
3784             av_assert2(level);
3785
3786             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3787                 unquant_coeff= alevel*qmul + qadd;
3788             }else{ //MPEG1
3789                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3790                 if(s->mb_intra){
3791                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3792                         unquant_coeff =   (unquant_coeff - 1) | 1;
3793                 }else{
3794                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3795                         unquant_coeff =   (unquant_coeff - 1) | 1;
3796                 }
3797                 unquant_coeff<<= 3;
3798             }
3799
3800             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3801             level+=64;
3802             if((level&(~127)) == 0){
3803                 for(j=survivor_count-1; j>=0; j--){
3804                     int run= i - survivor[j];
3805                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3806                     score += score_tab[i-run];
3807
3808                     if(score < best_score){
3809                         best_score= score;
3810                         run_tab[i+1]= run;
3811                         level_tab[i+1]= level-64;
3812                     }
3813                 }
3814
3815                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3816                     for(j=survivor_count-1; j>=0; j--){
3817                         int run= i - survivor[j];
3818                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3819                         score += score_tab[i-run];
3820                         if(score < last_score){
3821                             last_score= score;
3822                             last_run= run;
3823                             last_level= level-64;
3824                             last_i= i+1;
3825                         }
3826                     }
3827                 }
3828             }else{
3829                 distortion += esc_length*lambda;
3830                 for(j=survivor_count-1; j>=0; j--){
3831                     int run= i - survivor[j];
3832                     int score= distortion + score_tab[i-run];
3833
3834                     if(score < best_score){
3835                         best_score= score;
3836                         run_tab[i+1]= run;
3837                         level_tab[i+1]= level-64;
3838                     }
3839                 }
3840
3841                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3842                   for(j=survivor_count-1; j>=0; j--){
3843                         int run= i - survivor[j];
3844                         int score= distortion + score_tab[i-run];
3845                         if(score < last_score){
3846                             last_score= score;
3847                             last_run= run;
3848                             last_level= level-64;
3849                             last_i= i+1;
3850                         }
3851                     }
3852                 }
3853             }
3854         }
3855
3856         score_tab[i+1]= best_score;
3857
3858         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3859         if(last_non_zero <= 27){
3860             for(; survivor_count; survivor_count--){
3861                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3862                     break;
3863             }
3864         }else{
3865             for(; survivor_count; survivor_count--){
3866                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3867                     break;
3868             }
3869         }
3870
3871         survivor[ survivor_count++ ]= i+1;
3872     }
3873
3874     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3875         last_score= 256*256*256*120;
3876         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3877             int score= score_tab[i];
3878             if(i) score += lambda*2; //FIXME exacter?
3879
3880             if(score < last_score){
3881                 last_score= score;
3882                 last_i= i;
3883                 last_level= level_tab[i];
3884                 last_run= run_tab[i];
3885             }
3886         }
3887     }
3888
3889     s->coded_score[n] = last_score;
3890
3891     dc= FFABS(block[0]);
3892     last_non_zero= last_i - 1;
3893     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3894
3895     if(last_non_zero < start_i)
3896         return last_non_zero;
3897
3898     if(last_non_zero == 0 && start_i == 0){
3899         int best_level= 0;
3900         int best_score= dc * dc;
3901
3902         for(i=0; i<coeff_count[0]; i++){
3903             int level= coeff[i][0];
3904             int alevel= FFABS(level);
3905             int unquant_coeff, score, distortion;
3906
3907             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3908                     unquant_coeff= (alevel*qmul + qadd)>>3;
3909             }else{ //MPEG1
3910                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3911                     unquant_coeff =   (unquant_coeff - 1) | 1;
3912             }
3913             unquant_coeff = (unquant_coeff + 4) >> 3;
3914             unquant_coeff<<= 3 + 3;
3915
3916             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3917             level+=64;
3918             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3919             else                    score= distortion + esc_length*lambda;
3920
3921             if(score < best_score){
3922                 best_score= score;
3923                 best_level= level - 64;
3924             }
3925         }
3926         block[0]= best_level;
3927         s->coded_score[n] = best_score - dc*dc;
3928         if(best_level == 0) return -1;
3929         else                return last_non_zero;
3930     }
3931
3932     i= last_i;
3933     av_assert2(last_level);
3934
3935     block[ perm_scantable[last_non_zero] ]= last_level;
3936     i -= last_run + 1;
3937
3938     for(; i>start_i; i -= run_tab[i] + 1){
3939         block[ perm_scantable[i-1] ]= level_tab[i];
3940     }
3941
3942     return last_non_zero;
3943 }
3944
3945 //#define REFINE_STATS 1
3946 static int16_t basis[64][64];
3947
3948 static void build_basis(uint8_t *perm){
3949     int i, j, x, y;
3950     emms_c();
3951     for(i=0; i<8; i++){
3952         for(j=0; j<8; j++){
3953             for(y=0; y<8; y++){
3954                 for(x=0; x<8; x++){
3955                     double s= 0.25*(1<<BASIS_SHIFT);
3956                     int index= 8*i + j;
3957                     int perm_index= perm[index];
3958                     if(i==0) s*= sqrt(0.5);
3959                     if(j==0) s*= sqrt(0.5);
3960                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3961                 }
3962             }
3963         }
3964     }
3965 }
3966
3967 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3968                         int16_t *block, int16_t *weight, int16_t *orig,
3969                         int n, int qscale){
3970     int16_t rem[64];
3971     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3972     const uint8_t *scantable= s->intra_scantable.scantable;
3973     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3974 //    unsigned int threshold1, threshold2;
3975 //    int bias=0;
3976     int run_tab[65];
3977     int prev_run=0;
3978     int prev_level=0;
3979     int qmul, qadd, start_i, last_non_zero, i, dc;
3980     uint8_t * length;
3981     uint8_t * last_length;
3982     int lambda;
3983     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3984 #ifdef REFINE_STATS
3985 static int count=0;
3986 static int after_last=0;
3987 static int to_zero=0;
3988 static int from_zero=0;
3989 static int raise=0;
3990 static int lower=0;
3991 static int messed_sign=0;
3992 #endif
3993
3994     if(basis[0][0] == 0)
3995         build_basis(s->dsp.idct_permutation);
3996
3997     qmul= qscale*2;
3998     qadd= (qscale-1)|1;
3999     if (s->mb_intra) {
4000         if (!s->h263_aic) {
4001             if (n < 4)
4002                 q = s->y_dc_scale;
4003             else
4004                 q = s->c_dc_scale;
4005         } else{
4006             /* For AIC we skip quant/dequant of INTRADC */
4007             q = 1;
4008             qadd=0;
4009         }
4010         q <<= RECON_SHIFT-3;
4011         /* note: block[0] is assumed to be positive */
4012         dc= block[0]*q;
4013 //        block[0] = (block[0] + (q >> 1)) / q;
4014         start_i = 1;
4015 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4016 //            bias= 1<<(QMAT_SHIFT-1);
4017         length     = s->intra_ac_vlc_length;
4018         last_length= s->intra_ac_vlc_last_length;
4019     } else {
4020         dc= 0;
4021         start_i = 0;
4022         length     = s->inter_ac_vlc_length;
4023         last_length= s->inter_ac_vlc_last_length;
4024     }
4025     last_non_zero = s->block_last_index[n];
4026
4027 #ifdef REFINE_STATS
4028 {START_TIMER
4029 #endif
4030     dc += (1<<(RECON_SHIFT-1));
4031     for(i=0; i<64; i++){
4032         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4033     }
4034 #ifdef REFINE_STATS
4035 STOP_TIMER("memset rem[]")}
4036 #endif
4037     sum=0;
4038     for(i=0; i<64; i++){
4039         int one= 36;
4040         int qns=4;
4041         int w;
4042
4043         w= FFABS(weight[i]) + qns*one;
4044         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4045
4046         weight[i] = w;
4047 //        w=weight[i] = (63*qns + (w/2)) / w;
4048
4049         av_assert2(w>0);
4050         av_assert2(w<(1<<6));
4051         sum += w*w;
4052     }
4053     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4054 #ifdef REFINE_STATS
4055 {START_TIMER
4056 #endif
4057     run=0;
4058     rle_index=0;
4059     for(i=start_i; i<=last_non_zero; i++){
4060         int j= perm_scantable[i];
4061         const int level= block[j];
4062         int coeff;
4063
4064         if(level){
4065             if(level<0) coeff= qmul*level - qadd;
4066             else        coeff= qmul*level + qadd;
4067             run_tab[rle_index++]=run;
4068             run=0;
4069
4070             s->dsp.add_8x8basis(rem, basis[j], coeff);
4071         }else{
4072             run++;
4073         }
4074     }
4075 #ifdef REFINE_STATS
4076 if(last_non_zero>0){
4077 STOP_TIMER("init rem[]")
4078 }
4079 }
4080
4081 {START_TIMER
4082 #endif
4083     for(;;){
4084         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
4085         int best_coeff=0;
4086         int best_change=0;
4087         int run2, best_unquant_change=0, analyze_gradient;
4088 #ifdef REFINE_STATS
4089 {START_TIMER
4090 #endif
4091         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4092
4093         if(analyze_gradient){
4094 #ifdef REFINE_STATS
4095 {START_TIMER
4096 #endif
4097             for(i=0; i<64; i++){
4098                 int w= weight[i];
4099
4100                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4101             }
4102 #ifdef REFINE_STATS
4103 STOP_TIMER("rem*w*w")}
4104 {START_TIMER
4105 #endif
4106             s->dsp.fdct(d1);
4107 #ifdef REFINE_STATS
4108 STOP_TIMER("dct")}
4109 #endif
4110         }
4111
4112         if(start_i){
4113             const int level= block[0];
4114             int change, old_coeff;
4115
4116             av_assert2(s->mb_intra);
4117
4118             old_coeff= q*level;
4119
4120             for(change=-1; change<=1; change+=2){
4121                 int new_level= level + change;
4122                 int score, new_coeff;
4123
4124                 new_coeff= q*new_level;
4125                 if(new_coeff >= 2048 || new_coeff < 0)
4126                     continue;
4127
4128                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
4129                 if(score<best_score){
4130                     best_score= score;
4131                     best_coeff= 0;
4132                     best_change= change;
4133                     best_unquant_change= new_coeff - old_coeff;
4134                 }
4135             }
4136         }
4137
4138         run=0;
4139         rle_index=0;
4140         run2= run_tab[rle_index++];
4141         prev_level=0;
4142         prev_run=0;
4143
4144         for(i=start_i; i<64; i++){
4145             int j= perm_scantable[i];
4146             const int level= block[j];
4147             int change, old_coeff;
4148
4149             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4150                 break;
4151
4152             if(level){
4153                 if(level<0) old_coeff= qmul*level - qadd;
4154                 else        old_coeff= qmul*level + qadd;
4155                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4156             }else{
4157                 old_coeff=0;
4158                 run2--;
4159                 av_assert2(run2>=0 || i >= last_non_zero );
4160             }
4161
4162             for(change=-1; change<=1; change+=2){
4163                 int new_level= level + change;
4164                 int score, new_coeff, unquant_change;
4165
4166                 score=0;
4167                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4168                    continue;
4169
4170                 if(new_level){
4171                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4172                     else            new_coeff= qmul*new_level + qadd;
4173                     if(new_coeff >= 2048 || new_coeff <= -2048)
4174                         continue;
4175                     //FIXME check for overflow
4176
4177                     if(level){
4178                         if(level < 63 && level > -63){
4179                             if(i < last_non_zero)
4180                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4181                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4182                             else
4183                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4184                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4185                         }
4186                     }else{
4187                         av_assert2(FFABS(new_level)==1);
4188
4189                         if(analyze_gradient){
4190                             int g= d1[ scantable[i] ];
4191                             if(g && (g^new_level) >= 0)
4192                                 continue;
4193                         }
4194
4195                         if(i < last_non_zero){
4196                             int next_i= i + run2 + 1;
4197                             int next_level= block[ perm_scantable[next_i] ] + 64;
4198
4199                             if(next_level&(~127))
4200                                 next_level= 0;
4201
4202                             if(next_i < last_non_zero)
4203                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4204                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4205                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4206                             else
4207                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4208                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4209                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4210                         }else{
4211                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4212                             if(prev_level){
4213                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4214                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4215                             }
4216                         }
4217                     }
4218                 }else{
4219                     new_coeff=0;
4220                     av_assert2(FFABS(level)==1);
4221
4222                     if(i < last_non_zero){
4223                         int next_i= i + run2 + 1;
4224                         int next_level= block[ perm_scantable[next_i] ] + 64;
4225
4226                         if(next_level&(~127))
4227                             next_level= 0;
4228
4229                         if(next_i < last_non_zero)
4230                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4231                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4232                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4233                         else
4234                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4235                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4236                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4237                     }else{
4238                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4239                         if(prev_level){
4240                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4241                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4242                         }
4243                     }
4244                 }
4245
4246                 score *= lambda;
4247
4248                 unquant_change= new_coeff - old_coeff;
4249                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4250
4251                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4252                 if(score<best_score){
4253                     best_score= score;
4254                     best_coeff= i;
4255                     best_change= change;
4256                     best_unquant_change= unquant_change;
4257                 }
4258             }
4259             if(level){
4260                 prev_level= level + 64;
4261                 if(prev_level&(~127))
4262                     prev_level= 0;
4263                 prev_run= run;
4264                 run=0;
4265             }else{
4266                 run++;
4267             }
4268         }
4269 #ifdef REFINE_STATS
4270 STOP_TIMER("iterative step")}
4271 #endif
4272
4273         if(best_change){
4274             int j= perm_scantable[ best_coeff ];
4275
4276             block[j] += best_change;
4277
4278             if(best_coeff > last_non_zero){
4279                 last_non_zero= best_coeff;
4280                 av_assert2(block[j]);
4281 #ifdef REFINE_STATS
4282 after_last++;
4283 #endif
4284             }else{
4285 #ifdef REFINE_STATS
4286 if(block[j]){
4287     if(block[j] - best_change){
4288         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4289             raise++;
4290         }else{
4291             lower++;
4292         }
4293     }else{
4294         from_zero++;
4295     }
4296 }else{
4297     to_zero++;
4298 }
4299 #endif
4300                 for(; last_non_zero>=start_i; last_non_zero--){
4301                     if(block[perm_scantable[last_non_zero]])
4302                         break;
4303                 }
4304             }
4305 #ifdef REFINE_STATS
4306 count++;
4307 if(256*256*256*64 % count == 0){
4308     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4309 }
4310 #endif
4311             run=0;
4312             rle_index=0;
4313             for(i=start_i; i<=last_non_zero; i++){
4314                 int j= perm_scantable[i];
4315                 const int level= block[j];
4316
4317                  if(level){
4318                      run_tab[rle_index++]=run;
4319                      run=0;
4320                  }else{
4321                      run++;
4322                  }
4323             }
4324
4325             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4326         }else{
4327             break;
4328         }
4329     }
4330 #ifdef REFINE_STATS
4331 if(last_non_zero>0){
4332 STOP_TIMER("iterative search")
4333 }
4334 }
4335 #endif
4336
4337     return last_non_zero;
4338 }
4339
4340 int ff_dct_quantize_c(MpegEncContext *s,
4341                         int16_t *block, int n,
4342                         int qscale, int *overflow)
4343 {
4344     int i, j, level, last_non_zero, q, start_i;
4345     const int *qmat;
4346     const uint8_t *scantable= s->intra_scantable.scantable;
4347     int bias;
4348     int max=0;
4349     unsigned int threshold1, threshold2;
4350
4351     s->dsp.fdct (block);
4352
4353     if(s->dct_error_sum)
4354         s->denoise_dct(s, block);
4355
4356     if (s->mb_intra) {
4357         if (!s->h263_aic) {
4358             if (n < 4)
4359                 q = s->y_dc_scale;
4360             else
4361                 q = s->c_dc_scale;
4362             q = q << 3;
4363         } else
4364             /* For AIC we skip quant/dequant of INTRADC */
4365             q = 1 << 3;
4366
4367         /* note: block[0] is assumed to be positive */
4368         block[0] = (block[0] + (q >> 1)) / q;
4369         start_i = 1;
4370         last_non_zero = 0;
4371         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4372         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4373     } else {
4374         start_i = 0;
4375         last_non_zero = -1;
4376         qmat = s->q_inter_matrix[qscale];
4377         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4378     }
4379     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4380     threshold2= (threshold1<<1);
4381     for(i=63;i>=start_i;i--) {
4382         j = scantable[i];
4383         level = block[j] * qmat[j];
4384
4385         if(((unsigned)(level+threshold1))>threshold2){
4386             last_non_zero = i;
4387             break;
4388         }else{
4389             block[j]=0;
4390         }
4391     }
4392     for(i=start_i; i<=last_non_zero; i++) {
4393         j = scantable[i];
4394         level = block[j] * qmat[j];
4395
4396 //        if(   bias+level >= (1<<QMAT_SHIFT)
4397 //           || bias-level >= (1<<QMAT_SHIFT)){
4398         if(((unsigned)(level+threshold1))>threshold2){
4399             if(level>0){
4400                 level= (bias + level)>>QMAT_SHIFT;
4401                 block[j]= level;
4402             }else{
4403                 level= (bias - level)>>QMAT_SHIFT;
4404                 block[j]= -level;
4405             }
4406             max |=level;
4407         }else{
4408             block[j]=0;
4409         }
4410     }
4411     *overflow= s->max_qcoeff < max; //overflow might have happened
4412
4413     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4414     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4415         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4416
4417     return last_non_zero;
4418 }
4419
4420 #define OFFSET(x) offsetof(MpegEncContext, x)
4421 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4422 static const AVOption h263_options[] = {
4423     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4424     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4425     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4426     FF_MPV_COMMON_OPTS
4427     { NULL },
4428 };
4429
4430 static const AVClass h263_class = {
4431     .class_name = "H.263 encoder",
4432     .item_name  = av_default_item_name,
4433     .option     = h263_options,
4434     .version    = LIBAVUTIL_VERSION_INT,
4435 };
4436
4437 AVCodec ff_h263_encoder = {
4438     .name           = "h263",
4439     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4440     .type           = AVMEDIA_TYPE_VIDEO,
4441     .id             = AV_CODEC_ID_H263,
4442     .priv_data_size = sizeof(MpegEncContext),
4443     .init           = ff_MPV_encode_init,
4444     .encode2        = ff_MPV_encode_picture,
4445     .close          = ff_MPV_encode_end,
4446     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4447     .priv_class     = &h263_class,
4448 };
4449
4450 static const AVOption h263p_options[] = {
4451     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4452     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4453     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4454     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4455     FF_MPV_COMMON_OPTS
4456     { NULL },
4457 };
4458 static const AVClass h263p_class = {
4459     .class_name = "H.263p encoder",
4460     .item_name  = av_default_item_name,
4461     .option     = h263p_options,
4462     .version    = LIBAVUTIL_VERSION_INT,
4463 };
4464
4465 AVCodec ff_h263p_encoder = {
4466     .name           = "h263p",
4467     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4468     .type           = AVMEDIA_TYPE_VIDEO,
4469     .id             = AV_CODEC_ID_H263P,
4470     .priv_data_size = sizeof(MpegEncContext),
4471     .init           = ff_MPV_encode_init,
4472     .encode2        = ff_MPV_encode_picture,
4473     .close          = ff_MPV_encode_end,
4474     .capabilities   = CODEC_CAP_SLICE_THREADS,
4475     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4476     .priv_class     = &h263p_class,
4477 };
4478
4479 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4480
4481 AVCodec ff_msmpeg4v2_encoder = {
4482     .name           = "msmpeg4v2",
4483     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4484     .type           = AVMEDIA_TYPE_VIDEO,
4485     .id             = AV_CODEC_ID_MSMPEG4V2,
4486     .priv_data_size = sizeof(MpegEncContext),
4487     .init           = ff_MPV_encode_init,
4488     .encode2        = ff_MPV_encode_picture,
4489     .close          = ff_MPV_encode_end,
4490     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4491     .priv_class     = &msmpeg4v2_class,
4492 };
4493
4494 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4495
4496 AVCodec ff_msmpeg4v3_encoder = {
4497     .name           = "msmpeg4",
4498     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4499     .type           = AVMEDIA_TYPE_VIDEO,
4500     .id             = AV_CODEC_ID_MSMPEG4V3,
4501     .priv_data_size = sizeof(MpegEncContext),
4502     .init           = ff_MPV_encode_init,
4503     .encode2        = ff_MPV_encode_picture,
4504     .close          = ff_MPV_encode_end,
4505     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4506     .priv_class     = &msmpeg4v3_class,
4507 };
4508
4509 FF_MPV_GENERIC_CLASS(wmv1)
4510
4511 AVCodec ff_wmv1_encoder = {
4512     .name           = "wmv1",
4513     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4514     .type           = AVMEDIA_TYPE_VIDEO,
4515     .id             = AV_CODEC_ID_WMV1,
4516     .priv_data_size = sizeof(MpegEncContext),
4517     .init           = ff_MPV_encode_init,
4518     .encode2        = ff_MPV_encode_picture,
4519     .close          = ff_MPV_encode_end,
4520     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4521     .priv_class     = &wmv1_class,
4522 };