]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge commit '50ba57e0ce63d9904269ea0728936a0c79f8bfb5'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "h263.h"
37 #include "mathops.h"
38 #include "mjpegenc.h"
39 #include "msmpeg4.h"
40 #include "faandct.h"
41 #include "thread.h"
42 #include "aandcttab.h"
43 #include "flv.h"
44 #include "mpeg4video.h"
45 #include "internal.h"
46 #include "bytestream.h"
47 #include <limits.h>
48 #include "sp5x.h"
49
50 //#undef NDEBUG
51 //#include <assert.h>
52
53 static int encode_picture(MpegEncContext *s, int picture_number);
54 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
55 static int sse_mb(MpegEncContext *s);
56 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
57 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
58
59 /* enable all paranoid tests for rounding, overflows, etc... */
60 //#define PARANOID
61
62 //#define DEBUG
63
64 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
65 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
66
67 const AVOption ff_mpv_generic_options[] = {
68     FF_MPV_COMMON_OPTS
69     { NULL },
70 };
71
72 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
73                        uint16_t (*qmat16)[2][64],
74                        const uint16_t *quant_matrix,
75                        int bias, int qmin, int qmax, int intra)
76 {
77     int qscale;
78     int shift = 0;
79
80     for (qscale = qmin; qscale <= qmax; qscale++) {
81         int i;
82         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
83             dsp->fdct == ff_jpeg_fdct_islow_10 ||
84             dsp->fdct == ff_faandct) {
85             for (i = 0; i < 64; i++) {
86                 const int j = dsp->idct_permutation[i];
87                 /* 16 <= qscale * quant_matrix[i] <= 7905
88                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
89                  *             19952 <=              x  <= 249205026
90                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
91                  *           3444240 >= (1 << 36) / (x) >= 275 */
92
93                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
94                                         (qscale * quant_matrix[j]));
95             }
96         } else if (dsp->fdct == ff_fdct_ifast) {
97             for (i = 0; i < 64; i++) {
98                 const int j = dsp->idct_permutation[i];
99                 /* 16 <= qscale * quant_matrix[i] <= 7905
100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
101                  *             19952 <=              x  <= 249205026
102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
103                  *           3444240 >= (1 << 36) / (x) >= 275 */
104
105                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
106                                         (ff_aanscales[i] * qscale * quant_matrix[j]));
107             }
108         } else {
109             for (i = 0; i < 64; i++) {
110                 const int j = dsp->idct_permutation[i];
111                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
112                  * Assume x = qscale * quant_matrix[i]
113                  * So             16 <=              x  <= 7905
114                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
115                  * so          32768 >= (1 << 19) / (x) >= 67 */
116                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
117                                         (qscale * quant_matrix[j]));
118                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
119                 //                    (qscale * quant_matrix[i]);
120                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
121                                        (qscale * quant_matrix[j]);
122
123                 if (qmat16[qscale][0][i] == 0 ||
124                     qmat16[qscale][0][i] == 128 * 256)
125                     qmat16[qscale][0][i] = 128 * 256 - 1;
126                 qmat16[qscale][1][i] =
127                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
128                                 qmat16[qscale][0][i]);
129             }
130         }
131
132         for (i = intra; i < 64; i++) {
133             int64_t max = 8191;
134             if (dsp->fdct == ff_fdct_ifast) {
135                 max = (8191LL * ff_aanscales[i]) >> 14;
136             }
137             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
138                 shift++;
139             }
140         }
141     }
142     if (shift) {
143         av_log(NULL, AV_LOG_INFO,
144                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
145                QMAT_SHIFT - shift);
146     }
147 }
148
149 static inline void update_qscale(MpegEncContext *s)
150 {
151     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
152                 (FF_LAMBDA_SHIFT + 7);
153     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
154
155     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
156                  FF_LAMBDA_SHIFT;
157 }
158
159 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
160 {
161     int i;
162
163     if (matrix) {
164         put_bits(pb, 1, 1);
165         for (i = 0; i < 64; i++) {
166             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
167         }
168     } else
169         put_bits(pb, 1, 0);
170 }
171
172 /**
173  * init s->current_picture.qscale_table from s->lambda_table
174  */
175 void ff_init_qscale_tab(MpegEncContext *s)
176 {
177     int8_t * const qscale_table = s->current_picture.f.qscale_table;
178     int i;
179
180     for (i = 0; i < s->mb_num; i++) {
181         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
182         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
183         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
184                                                   s->avctx->qmax);
185     }
186 }
187
188 static void copy_picture_attributes(MpegEncContext *s,
189                                     AVFrame *dst,
190                                     AVFrame *src)
191 {
192     int i;
193
194     dst->pict_type              = src->pict_type;
195     dst->quality                = src->quality;
196     dst->coded_picture_number   = src->coded_picture_number;
197     dst->display_picture_number = src->display_picture_number;
198     //dst->reference              = src->reference;
199     dst->pts                    = src->pts;
200     dst->interlaced_frame       = src->interlaced_frame;
201     dst->top_field_first        = src->top_field_first;
202
203     if (s->avctx->me_threshold) {
204         if (!src->motion_val[0])
205             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
206         if (!src->mb_type)
207             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
208         if (!src->ref_index[0])
209             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
210         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
211             av_log(s->avctx, AV_LOG_ERROR,
212                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
213                    src->motion_subsample_log2, dst->motion_subsample_log2);
214
215         memcpy(dst->mb_type, src->mb_type,
216                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
217
218         for (i = 0; i < 2; i++) {
219             int stride = ((16 * s->mb_width ) >>
220                           src->motion_subsample_log2) + 1;
221             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
222
223             if (src->motion_val[i] &&
224                 src->motion_val[i] != dst->motion_val[i]) {
225                 memcpy(dst->motion_val[i], src->motion_val[i],
226                        2 * stride * height * sizeof(int16_t));
227             }
228             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
229                 memcpy(dst->ref_index[i], src->ref_index[i],
230                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
231             }
232         }
233     }
234 }
235
236 static void update_duplicate_context_after_me(MpegEncContext *dst,
237                                               MpegEncContext *src)
238 {
239 #define COPY(a) dst->a= src->a
240     COPY(pict_type);
241     COPY(current_picture);
242     COPY(f_code);
243     COPY(b_code);
244     COPY(qscale);
245     COPY(lambda);
246     COPY(lambda2);
247     COPY(picture_in_gop_number);
248     COPY(gop_picture_number);
249     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
250     COPY(progressive_frame);    // FIXME don't set in encode_header
251     COPY(partitioned_frame);    // FIXME don't set in encode_header
252 #undef COPY
253 }
254
255 /**
256  * Set the given MpegEncContext to defaults for encoding.
257  * the changed fields will not depend upon the prior state of the MpegEncContext.
258  */
259 static void MPV_encode_defaults(MpegEncContext *s)
260 {
261     int i;
262     ff_MPV_common_defaults(s);
263
264     for (i = -16; i < 16; i++) {
265         default_fcode_tab[i + MAX_MV] = 1;
266     }
267     s->me.mv_penalty = default_mv_penalty;
268     s->fcode_tab     = default_fcode_tab;
269 }
270
271 av_cold int ff_dct_encode_init(MpegEncContext *s) {
272     if (ARCH_X86)
273         ff_dct_encode_init_x86(s);
274
275     if (!s->dct_quantize)
276         s->dct_quantize = ff_dct_quantize_c;
277     if (!s->denoise_dct)
278         s->denoise_dct  = denoise_dct_c;
279     s->fast_dct_quantize = s->dct_quantize;
280     if (s->avctx->trellis)
281         s->dct_quantize  = dct_quantize_trellis_c;
282
283     return 0;
284 }
285
286 /* init video encoder */
287 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
288 {
289     MpegEncContext *s = avctx->priv_data;
290     int i;
291     int chroma_h_shift, chroma_v_shift;
292
293     MPV_encode_defaults(s);
294
295     switch (avctx->codec_id) {
296     case AV_CODEC_ID_MPEG2VIDEO:
297         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
298             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
299             av_log(avctx, AV_LOG_ERROR,
300                    "only YUV420 and YUV422 are supported\n");
301             return -1;
302         }
303         break;
304     case AV_CODEC_ID_LJPEG:
305         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
306             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
307             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
308             avctx->pix_fmt != AV_PIX_FMT_BGR0     &&
309             avctx->pix_fmt != AV_PIX_FMT_BGRA     &&
310             avctx->pix_fmt != AV_PIX_FMT_BGR24    &&
311             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
312               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
313               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
314              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
315             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
316             return -1;
317         }
318         break;
319     case AV_CODEC_ID_MJPEG:
320     case AV_CODEC_ID_AMV:
321         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
322             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
323             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
324               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
325              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
326             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
327             return -1;
328         }
329         break;
330     default:
331         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
332             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
333             return -1;
334         }
335     }
336
337     switch (avctx->pix_fmt) {
338     case AV_PIX_FMT_YUVJ422P:
339     case AV_PIX_FMT_YUV422P:
340         s->chroma_format = CHROMA_422;
341         break;
342     case AV_PIX_FMT_YUVJ420P:
343     case AV_PIX_FMT_YUV420P:
344     default:
345         s->chroma_format = CHROMA_420;
346         break;
347     }
348
349     s->bit_rate = avctx->bit_rate;
350     s->width    = avctx->width;
351     s->height   = avctx->height;
352     if (avctx->gop_size > 600 &&
353         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
354         av_log(avctx, AV_LOG_WARNING,
355                "keyframe interval too large!, reducing it from %d to %d\n",
356                avctx->gop_size, 600);
357         avctx->gop_size = 600;
358     }
359     s->gop_size     = avctx->gop_size;
360     s->avctx        = avctx;
361     s->flags        = avctx->flags;
362     s->flags2       = avctx->flags2;
363     s->max_b_frames = avctx->max_b_frames;
364     s->codec_id     = avctx->codec->id;
365 #if FF_API_MPV_GLOBAL_OPTS
366     if (avctx->luma_elim_threshold)
367         s->luma_elim_threshold   = avctx->luma_elim_threshold;
368     if (avctx->chroma_elim_threshold)
369         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
370 #endif
371     s->strict_std_compliance = avctx->strict_std_compliance;
372     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
373     s->mpeg_quant         = avctx->mpeg_quant;
374     s->rtp_mode           = !!avctx->rtp_payload_size;
375     s->intra_dc_precision = avctx->intra_dc_precision;
376     s->user_specified_pts = AV_NOPTS_VALUE;
377
378     if (s->gop_size <= 1) {
379         s->intra_only = 1;
380         s->gop_size   = 12;
381     } else {
382         s->intra_only = 0;
383     }
384
385     s->me_method = avctx->me_method;
386
387     /* Fixed QSCALE */
388     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
389
390 #if FF_API_MPV_GLOBAL_OPTS
391     if (s->flags & CODEC_FLAG_QP_RD)
392         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
393 #endif
394
395     s->adaptive_quant = (s->avctx->lumi_masking ||
396                          s->avctx->dark_masking ||
397                          s->avctx->temporal_cplx_masking ||
398                          s->avctx->spatial_cplx_masking  ||
399                          s->avctx->p_masking      ||
400                          s->avctx->border_masking ||
401                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
402                         !s->fixed_qscale;
403
404     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
405
406     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
407         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
408         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
409             return -1;
410     }
411
412     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
413         av_log(avctx, AV_LOG_INFO,
414                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
415     }
416
417     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
418         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
419         return -1;
420     }
421
422     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
423         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
424         return -1;
425     }
426
427     if (avctx->rc_max_rate &&
428         avctx->rc_max_rate == avctx->bit_rate &&
429         avctx->rc_max_rate != avctx->rc_min_rate) {
430         av_log(avctx, AV_LOG_INFO,
431                "impossible bitrate constraints, this will fail\n");
432     }
433
434     if (avctx->rc_buffer_size &&
435         avctx->bit_rate * (int64_t)avctx->time_base.num >
436             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
437         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
438         return -1;
439     }
440
441     if (!s->fixed_qscale &&
442         avctx->bit_rate * av_q2d(avctx->time_base) >
443             avctx->bit_rate_tolerance) {
444         av_log(avctx, AV_LOG_ERROR,
445                "bitrate tolerance too small for bitrate\n");
446         return -1;
447     }
448
449     if (s->avctx->rc_max_rate &&
450         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
451         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
452          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
453         90000LL * (avctx->rc_buffer_size - 1) >
454             s->avctx->rc_max_rate * 0xFFFFLL) {
455         av_log(avctx, AV_LOG_INFO,
456                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
457                "specified vbv buffer is too large for the given bitrate!\n");
458     }
459
460     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
461         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
462         s->codec_id != AV_CODEC_ID_FLV1) {
463         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
464         return -1;
465     }
466
467     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
468         av_log(avctx, AV_LOG_ERROR,
469                "OBMC is only supported with simple mb decision\n");
470         return -1;
471     }
472
473     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
474         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
475         return -1;
476     }
477
478     if (s->max_b_frames                    &&
479         s->codec_id != AV_CODEC_ID_MPEG4      &&
480         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
481         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
482         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
483         return -1;
484     }
485
486     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
487          s->codec_id == AV_CODEC_ID_H263  ||
488          s->codec_id == AV_CODEC_ID_H263P) &&
489         (avctx->sample_aspect_ratio.num > 255 ||
490          avctx->sample_aspect_ratio.den > 255)) {
491         av_log(avctx, AV_LOG_WARNING,
492                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
493                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
494         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
495                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
496     }
497
498     if ((s->codec_id == AV_CODEC_ID_H263  ||
499          s->codec_id == AV_CODEC_ID_H263P) &&
500         (avctx->width  > 2048 ||
501          avctx->height > 1152 )) {
502         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
503         return -1;
504     }
505     if ((s->codec_id == AV_CODEC_ID_H263  ||
506          s->codec_id == AV_CODEC_ID_H263P) &&
507         ((avctx->width &3) ||
508          (avctx->height&3) )) {
509         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
510         return -1;
511     }
512
513     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
514         (avctx->width  > 4095 ||
515          avctx->height > 4095 )) {
516         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
517         return -1;
518     }
519
520     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
521         (avctx->width  > 16383 ||
522          avctx->height > 16383 )) {
523         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
524         return -1;
525     }
526
527     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
528          s->codec_id == AV_CODEC_ID_WMV2) &&
529          avctx->width & 1) {
530          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
531          return -1;
532     }
533
534     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
535         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
536         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
537         return -1;
538     }
539
540     // FIXME mpeg2 uses that too
541     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
542         av_log(avctx, AV_LOG_ERROR,
543                "mpeg2 style quantization not supported by codec\n");
544         return -1;
545     }
546
547 #if FF_API_MPV_GLOBAL_OPTS
548     if (s->flags & CODEC_FLAG_CBP_RD)
549         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
550 #endif
551
552     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
553         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
554         return -1;
555     }
556
557     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
558         s->avctx->mb_decision != FF_MB_DECISION_RD) {
559         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
560         return -1;
561     }
562
563     if (s->avctx->scenechange_threshold < 1000000000 &&
564         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
565         av_log(avctx, AV_LOG_ERROR,
566                "closed gop with scene change detection are not supported yet, "
567                "set threshold to 1000000000\n");
568         return -1;
569     }
570
571     if (s->flags & CODEC_FLAG_LOW_DELAY) {
572         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
573             av_log(avctx, AV_LOG_ERROR,
574                   "low delay forcing is only available for mpeg2\n");
575             return -1;
576         }
577         if (s->max_b_frames != 0) {
578             av_log(avctx, AV_LOG_ERROR,
579                    "b frames cannot be used with low delay\n");
580             return -1;
581         }
582     }
583
584     if (s->q_scale_type == 1) {
585         if (avctx->qmax > 12) {
586             av_log(avctx, AV_LOG_ERROR,
587                    "non linear quant only supports qmax <= 12 currently\n");
588             return -1;
589         }
590     }
591
592     if (s->avctx->thread_count > 1         &&
593         s->codec_id != AV_CODEC_ID_MPEG4      &&
594         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
595         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
596         s->codec_id != AV_CODEC_ID_MJPEG      &&
597         (s->codec_id != AV_CODEC_ID_H263P)) {
598         av_log(avctx, AV_LOG_ERROR,
599                "multi threaded encoding not supported by codec\n");
600         return -1;
601     }
602
603     if (s->avctx->thread_count < 1) {
604         av_log(avctx, AV_LOG_ERROR,
605                "automatic thread number detection not supported by codec, "
606                "patch welcome\n");
607         return -1;
608     }
609
610     if (s->avctx->thread_count > 1)
611         s->rtp_mode = 1;
612
613     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
614         s->h263_slice_structured = 1;
615
616     if (!avctx->time_base.den || !avctx->time_base.num) {
617         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
618         return -1;
619     }
620
621     i = (INT_MAX / 2 + 128) >> 8;
622     if (avctx->me_threshold >= i) {
623         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
624                i - 1);
625         return -1;
626     }
627     if (avctx->mb_threshold >= i) {
628         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
629                i - 1);
630         return -1;
631     }
632
633     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
634         av_log(avctx, AV_LOG_INFO,
635                "notice: b_frame_strategy only affects the first pass\n");
636         avctx->b_frame_strategy = 0;
637     }
638
639     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
640     if (i > 1) {
641         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
642         avctx->time_base.den /= i;
643         avctx->time_base.num /= i;
644         //return -1;
645     }
646
647     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
648         // (a + x * 3 / 8) / x
649         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
650         s->inter_quant_bias = 0;
651     } else {
652         s->intra_quant_bias = 0;
653         // (a - x / 4) / x
654         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
655     }
656
657     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
658         s->intra_quant_bias = avctx->intra_quant_bias;
659     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
660         s->inter_quant_bias = avctx->inter_quant_bias;
661
662     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
663
664     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
665                                   &chroma_v_shift);
666
667     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
668         s->avctx->time_base.den > (1 << 16) - 1) {
669         av_log(avctx, AV_LOG_ERROR,
670                "timebase %d/%d not supported by MPEG 4 standard, "
671                "the maximum admitted value for the timebase denominator "
672                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
673                (1 << 16) - 1);
674         return -1;
675     }
676     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
677
678 #if FF_API_MPV_GLOBAL_OPTS
679     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
680         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
681     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
682         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
683     if (avctx->quantizer_noise_shaping)
684         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
685 #endif
686
687     switch (avctx->codec->id) {
688     case AV_CODEC_ID_MPEG1VIDEO:
689         s->out_format = FMT_MPEG1;
690         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
691         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
692         break;
693     case AV_CODEC_ID_MPEG2VIDEO:
694         s->out_format = FMT_MPEG1;
695         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
696         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
697         s->rtp_mode   = 1;
698         break;
699     case AV_CODEC_ID_LJPEG:
700     case AV_CODEC_ID_MJPEG:
701     case AV_CODEC_ID_AMV:
702         s->out_format = FMT_MJPEG;
703         s->intra_only = 1; /* force intra only for jpeg */
704         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
705             (avctx->pix_fmt == AV_PIX_FMT_BGR0
706              || s->avctx->pix_fmt == AV_PIX_FMT_BGRA
707              || s->avctx->pix_fmt == AV_PIX_FMT_BGR24)) {
708             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
709             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
710             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
711         } else {
712             s->mjpeg_vsample[0] = 2;
713             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
714             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
715             s->mjpeg_hsample[0] = 2;
716             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
717             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
718         }
719         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
720             ff_mjpeg_encode_init(s) < 0)
721             return -1;
722         avctx->delay = 0;
723         s->low_delay = 1;
724         break;
725     case AV_CODEC_ID_H261:
726         if (!CONFIG_H261_ENCODER)
727             return -1;
728         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
729             av_log(avctx, AV_LOG_ERROR,
730                    "The specified picture size of %dx%d is not valid for the "
731                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
732                     s->width, s->height);
733             return -1;
734         }
735         s->out_format = FMT_H261;
736         avctx->delay  = 0;
737         s->low_delay  = 1;
738         break;
739     case AV_CODEC_ID_H263:
740         if (!CONFIG_H263_ENCODER)
741             return -1;
742         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
743                              s->width, s->height) == 8) {
744             av_log(avctx, AV_LOG_ERROR,
745                    "The specified picture size of %dx%d is not valid for "
746                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
747                    "352x288, 704x576, and 1408x1152. "
748                    "Try H.263+.\n", s->width, s->height);
749             return -1;
750         }
751         s->out_format = FMT_H263;
752         avctx->delay  = 0;
753         s->low_delay  = 1;
754         break;
755     case AV_CODEC_ID_H263P:
756         s->out_format = FMT_H263;
757         s->h263_plus  = 1;
758         /* Fx */
759         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
760         s->modified_quant  = s->h263_aic;
761         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
762         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
763
764         /* /Fx */
765         /* These are just to be sure */
766         avctx->delay = 0;
767         s->low_delay = 1;
768         break;
769     case AV_CODEC_ID_FLV1:
770         s->out_format      = FMT_H263;
771         s->h263_flv        = 2; /* format = 1; 11-bit codes */
772         s->unrestricted_mv = 1;
773         s->rtp_mode  = 0; /* don't allow GOB */
774         avctx->delay = 0;
775         s->low_delay = 1;
776         break;
777     case AV_CODEC_ID_RV10:
778         s->out_format = FMT_H263;
779         avctx->delay  = 0;
780         s->low_delay  = 1;
781         break;
782     case AV_CODEC_ID_RV20:
783         s->out_format      = FMT_H263;
784         avctx->delay       = 0;
785         s->low_delay       = 1;
786         s->modified_quant  = 1;
787         s->h263_aic        = 1;
788         s->h263_plus       = 1;
789         s->loop_filter     = 1;
790         s->unrestricted_mv = 0;
791         break;
792     case AV_CODEC_ID_MPEG4:
793         s->out_format      = FMT_H263;
794         s->h263_pred       = 1;
795         s->unrestricted_mv = 1;
796         s->low_delay       = s->max_b_frames ? 0 : 1;
797         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
798         break;
799     case AV_CODEC_ID_MSMPEG4V2:
800         s->out_format      = FMT_H263;
801         s->h263_pred       = 1;
802         s->unrestricted_mv = 1;
803         s->msmpeg4_version = 2;
804         avctx->delay       = 0;
805         s->low_delay       = 1;
806         break;
807     case AV_CODEC_ID_MSMPEG4V3:
808         s->out_format        = FMT_H263;
809         s->h263_pred         = 1;
810         s->unrestricted_mv   = 1;
811         s->msmpeg4_version   = 3;
812         s->flipflop_rounding = 1;
813         avctx->delay         = 0;
814         s->low_delay         = 1;
815         break;
816     case AV_CODEC_ID_WMV1:
817         s->out_format        = FMT_H263;
818         s->h263_pred         = 1;
819         s->unrestricted_mv   = 1;
820         s->msmpeg4_version   = 4;
821         s->flipflop_rounding = 1;
822         avctx->delay         = 0;
823         s->low_delay         = 1;
824         break;
825     case AV_CODEC_ID_WMV2:
826         s->out_format        = FMT_H263;
827         s->h263_pred         = 1;
828         s->unrestricted_mv   = 1;
829         s->msmpeg4_version   = 5;
830         s->flipflop_rounding = 1;
831         avctx->delay         = 0;
832         s->low_delay         = 1;
833         break;
834     default:
835         return -1;
836     }
837
838     avctx->has_b_frames = !s->low_delay;
839
840     s->encoding = 1;
841
842     s->progressive_frame    =
843     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
844                                                 CODEC_FLAG_INTERLACED_ME) ||
845                                 s->alternate_scan);
846
847     /* init */
848     if (ff_MPV_common_init(s) < 0)
849         return -1;
850
851     ff_dct_encode_init(s);
852
853     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
854         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
855
856     s->quant_precision = 5;
857
858     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
859     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
860
861     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
862         ff_h261_encode_init(s);
863     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
864         ff_h263_encode_init(s);
865     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
866         ff_msmpeg4_encode_init(s);
867     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
868         && s->out_format == FMT_MPEG1)
869         ff_mpeg1_encode_init(s);
870
871     /* init q matrix */
872     for (i = 0; i < 64; i++) {
873         int j = s->dsp.idct_permutation[i];
874         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
875             s->mpeg_quant) {
876             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
877             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
878         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
879             s->intra_matrix[j] =
880             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
881         } else {
882             /* mpeg1/2 */
883             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
884             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
885         }
886         if (s->avctx->intra_matrix)
887             s->intra_matrix[j] = s->avctx->intra_matrix[i];
888         if (s->avctx->inter_matrix)
889             s->inter_matrix[j] = s->avctx->inter_matrix[i];
890     }
891
892     /* precompute matrix */
893     /* for mjpeg, we do include qscale in the matrix */
894     if (s->out_format != FMT_MJPEG) {
895         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
896                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
897                           31, 1);
898         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
899                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
900                           31, 0);
901     }
902
903     if (ff_rate_control_init(s) < 0)
904         return -1;
905
906     return 0;
907 }
908
909 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
910 {
911     MpegEncContext *s = avctx->priv_data;
912
913     ff_rate_control_uninit(s);
914
915     ff_MPV_common_end(s);
916     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
917         s->out_format == FMT_MJPEG)
918         ff_mjpeg_encode_close(s);
919
920     av_freep(&avctx->extradata);
921
922     return 0;
923 }
924
925 static int get_sae(uint8_t *src, int ref, int stride)
926 {
927     int x,y;
928     int acc = 0;
929
930     for (y = 0; y < 16; y++) {
931         for (x = 0; x < 16; x++) {
932             acc += FFABS(src[x + y * stride] - ref);
933         }
934     }
935
936     return acc;
937 }
938
939 static int get_intra_count(MpegEncContext *s, uint8_t *src,
940                            uint8_t *ref, int stride)
941 {
942     int x, y, w, h;
943     int acc = 0;
944
945     w = s->width  & ~15;
946     h = s->height & ~15;
947
948     for (y = 0; y < h; y += 16) {
949         for (x = 0; x < w; x += 16) {
950             int offset = x + y * stride;
951             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
952                                      16);
953             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
954             int sae  = get_sae(src + offset, mean, stride);
955
956             acc += sae + 500 < sad;
957         }
958     }
959     return acc;
960 }
961
962
963 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
964 {
965     AVFrame *pic = NULL;
966     int64_t pts;
967     int i;
968     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
969                                                  (s->low_delay ? 0 : 1);
970     int direct = 1;
971
972     if (pic_arg) {
973         pts = pic_arg->pts;
974         pic_arg->display_picture_number = s->input_picture_number++;
975
976         if (pts != AV_NOPTS_VALUE) {
977             if (s->user_specified_pts != AV_NOPTS_VALUE) {
978                 int64_t time = pts;
979                 int64_t last = s->user_specified_pts;
980
981                 if (time <= last) {
982                     av_log(s->avctx, AV_LOG_ERROR,
983                            "Error, Invalid timestamp=%"PRId64", "
984                            "last=%"PRId64"\n", pts, s->user_specified_pts);
985                     return -1;
986                 }
987
988                 if (!s->low_delay && pic_arg->display_picture_number == 1)
989                     s->dts_delta = time - last;
990             }
991             s->user_specified_pts = pts;
992         } else {
993             if (s->user_specified_pts != AV_NOPTS_VALUE) {
994                 s->user_specified_pts =
995                 pts = s->user_specified_pts + 1;
996                 av_log(s->avctx, AV_LOG_INFO,
997                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
998                        pts);
999             } else {
1000                 pts = pic_arg->display_picture_number;
1001             }
1002         }
1003     }
1004
1005   if (pic_arg) {
1006     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
1007         direct = 0;
1008     if (pic_arg->linesize[0] != s->linesize)
1009         direct = 0;
1010     if (pic_arg->linesize[1] != s->uvlinesize)
1011         direct = 0;
1012     if (pic_arg->linesize[2] != s->uvlinesize)
1013         direct = 0;
1014
1015     av_dlog(s->avctx, "%d %d %d %d\n", pic_arg->linesize[0],
1016             pic_arg->linesize[1], s->linesize, s->uvlinesize);
1017
1018     if (direct) {
1019         i = ff_find_unused_picture(s, 1);
1020         if (i < 0)
1021             return i;
1022
1023         pic = &s->picture[i].f;
1024         pic->reference = 3;
1025
1026         for (i = 0; i < 4; i++) {
1027             pic->data[i]     = pic_arg->data[i];
1028             pic->linesize[i] = pic_arg->linesize[i];
1029         }
1030         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
1031             return -1;
1032         }
1033     } else {
1034         i = ff_find_unused_picture(s, 0);
1035         if (i < 0)
1036             return i;
1037
1038         pic = &s->picture[i].f;
1039         pic->reference = 3;
1040
1041         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
1042             return -1;
1043         }
1044
1045         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1046             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1047             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1048             // empty
1049         } else {
1050             int h_chroma_shift, v_chroma_shift;
1051             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
1052                                           &v_chroma_shift);
1053
1054             for (i = 0; i < 3; i++) {
1055                 int src_stride = pic_arg->linesize[i];
1056                 int dst_stride = i ? s->uvlinesize : s->linesize;
1057                 int h_shift = i ? h_chroma_shift : 0;
1058                 int v_shift = i ? v_chroma_shift : 0;
1059                 int w = s->width  >> h_shift;
1060                 int h = s->height >> v_shift;
1061                 uint8_t *src = pic_arg->data[i];
1062                 uint8_t *dst = pic->data[i];
1063
1064                 if(s->codec_id == AV_CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)){
1065                     h= ((s->height+15)/16*16)>>v_shift;
1066                 }
1067
1068                 if (!s->avctx->rc_buffer_size)
1069                     dst += INPLACE_OFFSET;
1070
1071                 if (src_stride == dst_stride)
1072                     memcpy(dst, src, src_stride * h);
1073                 else {
1074                     while (h--) {
1075                         memcpy(dst, src, w);
1076                         dst += dst_stride;
1077                         src += src_stride;
1078                     }
1079                 }
1080             }
1081         }
1082     }
1083     copy_picture_attributes(s, pic, pic_arg);
1084     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1085   }
1086
1087     /* shift buffer entries */
1088     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1089         s->input_picture[i - 1] = s->input_picture[i];
1090
1091     s->input_picture[encoding_delay] = (Picture*) pic;
1092
1093     return 0;
1094 }
1095
1096 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1097 {
1098     int x, y, plane;
1099     int score = 0;
1100     int64_t score64 = 0;
1101
1102     for (plane = 0; plane < 3; plane++) {
1103         const int stride = p->f.linesize[plane];
1104         const int bw = plane ? 1 : 2;
1105         for (y = 0; y < s->mb_height * bw; y++) {
1106             for (x = 0; x < s->mb_width * bw; x++) {
1107                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1108                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1109                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1110                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1111
1112                 switch (s->avctx->frame_skip_exp) {
1113                 case 0: score    =  FFMAX(score, v);          break;
1114                 case 1: score   += FFABS(v);                  break;
1115                 case 2: score   += v * v;                     break;
1116                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1117                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1118                 }
1119             }
1120         }
1121     }
1122
1123     if (score)
1124         score64 = score;
1125
1126     if (score64 < s->avctx->frame_skip_threshold)
1127         return 1;
1128     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1129         return 1;
1130     return 0;
1131 }
1132
1133 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1134 {
1135     AVPacket pkt = { 0 };
1136     int ret, got_output;
1137
1138     av_init_packet(&pkt);
1139     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1140     if (ret < 0)
1141         return ret;
1142
1143     ret = pkt.size;
1144     av_free_packet(&pkt);
1145     return ret;
1146 }
1147
1148 static int estimate_best_b_count(MpegEncContext *s)
1149 {
1150     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1151     AVCodecContext *c = avcodec_alloc_context3(NULL);
1152     AVFrame input[FF_MAX_B_FRAMES + 2];
1153     const int scale = s->avctx->brd_scale;
1154     int i, j, out_size, p_lambda, b_lambda, lambda2;
1155     int64_t best_rd  = INT64_MAX;
1156     int best_b_count = -1;
1157
1158     av_assert0(scale >= 0 && scale <= 3);
1159
1160     //emms_c();
1161     //s->next_picture_ptr->quality;
1162     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1163     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1164     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1165     if (!b_lambda) // FIXME we should do this somewhere else
1166         b_lambda = p_lambda;
1167     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1168                FF_LAMBDA_SHIFT;
1169
1170     c->width        = s->width  >> scale;
1171     c->height       = s->height >> scale;
1172     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1173                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1174     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1175     c->mb_decision  = s->avctx->mb_decision;
1176     c->me_cmp       = s->avctx->me_cmp;
1177     c->mb_cmp       = s->avctx->mb_cmp;
1178     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1179     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1180     c->time_base    = s->avctx->time_base;
1181     c->max_b_frames = s->max_b_frames;
1182
1183     if (avcodec_open2(c, codec, NULL) < 0)
1184         return -1;
1185
1186     for (i = 0; i < s->max_b_frames + 2; i++) {
1187         int ysize = c->width * c->height;
1188         int csize = (c->width / 2) * (c->height / 2);
1189         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1190                                                 s->next_picture_ptr;
1191
1192         avcodec_get_frame_defaults(&input[i]);
1193         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1194         input[i].data[1]     = input[i].data[0] + ysize;
1195         input[i].data[2]     = input[i].data[1] + csize;
1196         input[i].linesize[0] = c->width;
1197         input[i].linesize[1] =
1198         input[i].linesize[2] = c->width / 2;
1199
1200         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1201             pre_input = *pre_input_ptr;
1202
1203             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1204                 pre_input.f.data[0] += INPLACE_OFFSET;
1205                 pre_input.f.data[1] += INPLACE_OFFSET;
1206                 pre_input.f.data[2] += INPLACE_OFFSET;
1207             }
1208
1209             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1210                                  pre_input.f.data[0], pre_input.f.linesize[0],
1211                                  c->width,      c->height);
1212             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1213                                  pre_input.f.data[1], pre_input.f.linesize[1],
1214                                  c->width >> 1, c->height >> 1);
1215             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1216                                  pre_input.f.data[2], pre_input.f.linesize[2],
1217                                  c->width >> 1, c->height >> 1);
1218         }
1219     }
1220
1221     for (j = 0; j < s->max_b_frames + 1; j++) {
1222         int64_t rd = 0;
1223
1224         if (!s->input_picture[j])
1225             break;
1226
1227         c->error[0] = c->error[1] = c->error[2] = 0;
1228
1229         input[0].pict_type = AV_PICTURE_TYPE_I;
1230         input[0].quality   = 1 * FF_QP2LAMBDA;
1231
1232         out_size = encode_frame(c, &input[0]);
1233
1234         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1235
1236         for (i = 0; i < s->max_b_frames + 1; i++) {
1237             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1238
1239             input[i + 1].pict_type = is_p ?
1240                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1241             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1242
1243             out_size = encode_frame(c, &input[i + 1]);
1244
1245             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1246         }
1247
1248         /* get the delayed frames */
1249         while (out_size) {
1250             out_size = encode_frame(c, NULL);
1251             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1252         }
1253
1254         rd += c->error[0] + c->error[1] + c->error[2];
1255
1256         if (rd < best_rd) {
1257             best_rd = rd;
1258             best_b_count = j;
1259         }
1260     }
1261
1262     avcodec_close(c);
1263     av_freep(&c);
1264
1265     for (i = 0; i < s->max_b_frames + 2; i++) {
1266         av_freep(&input[i].data[0]);
1267     }
1268
1269     return best_b_count;
1270 }
1271
1272 static int select_input_picture(MpegEncContext *s)
1273 {
1274     int i;
1275
1276     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1277         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1278     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1279
1280     /* set next picture type & ordering */
1281     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1282         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1283             s->next_picture_ptr == NULL || s->intra_only) {
1284             s->reordered_input_picture[0] = s->input_picture[0];
1285             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1286             s->reordered_input_picture[0]->f.coded_picture_number =
1287                 s->coded_picture_number++;
1288         } else {
1289             int b_frames;
1290
1291             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1292                 if (s->picture_in_gop_number < s->gop_size &&
1293                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1294                     // FIXME check that te gop check above is +-1 correct
1295                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1296                         for (i = 0; i < 4; i++)
1297                             s->input_picture[0]->f.data[i] = NULL;
1298                         s->input_picture[0]->f.type = 0;
1299                     } else {
1300                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1301                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1302
1303                         s->avctx->release_buffer(s->avctx,
1304                                                  &s->input_picture[0]->f);
1305                     }
1306
1307                     emms_c();
1308                     ff_vbv_update(s, 0);
1309
1310                     goto no_output_pic;
1311                 }
1312             }
1313
1314             if (s->flags & CODEC_FLAG_PASS2) {
1315                 for (i = 0; i < s->max_b_frames + 1; i++) {
1316                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1317
1318                     if (pict_num >= s->rc_context.num_entries)
1319                         break;
1320                     if (!s->input_picture[i]) {
1321                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1322                         break;
1323                     }
1324
1325                     s->input_picture[i]->f.pict_type =
1326                         s->rc_context.entry[pict_num].new_pict_type;
1327                 }
1328             }
1329
1330             if (s->avctx->b_frame_strategy == 0) {
1331                 b_frames = s->max_b_frames;
1332                 while (b_frames && !s->input_picture[b_frames])
1333                     b_frames--;
1334             } else if (s->avctx->b_frame_strategy == 1) {
1335                 for (i = 1; i < s->max_b_frames + 1; i++) {
1336                     if (s->input_picture[i] &&
1337                         s->input_picture[i]->b_frame_score == 0) {
1338                         s->input_picture[i]->b_frame_score =
1339                             get_intra_count(s,
1340                                             s->input_picture[i    ]->f.data[0],
1341                                             s->input_picture[i - 1]->f.data[0],
1342                                             s->linesize) + 1;
1343                     }
1344                 }
1345                 for (i = 0; i < s->max_b_frames + 1; i++) {
1346                     if (s->input_picture[i] == NULL ||
1347                         s->input_picture[i]->b_frame_score - 1 >
1348                             s->mb_num / s->avctx->b_sensitivity)
1349                         break;
1350                 }
1351
1352                 b_frames = FFMAX(0, i - 1);
1353
1354                 /* reset scores */
1355                 for (i = 0; i < b_frames + 1; i++) {
1356                     s->input_picture[i]->b_frame_score = 0;
1357                 }
1358             } else if (s->avctx->b_frame_strategy == 2) {
1359                 b_frames = estimate_best_b_count(s);
1360             } else {
1361                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1362                 b_frames = 0;
1363             }
1364
1365             emms_c();
1366
1367             for (i = b_frames - 1; i >= 0; i--) {
1368                 int type = s->input_picture[i]->f.pict_type;
1369                 if (type && type != AV_PICTURE_TYPE_B)
1370                     b_frames = i;
1371             }
1372             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1373                 b_frames == s->max_b_frames) {
1374                 av_log(s->avctx, AV_LOG_ERROR,
1375                        "warning, too many b frames in a row\n");
1376             }
1377
1378             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1379                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1380                     s->gop_size > s->picture_in_gop_number) {
1381                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1382                 } else {
1383                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1384                         b_frames = 0;
1385                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1386                 }
1387             }
1388
1389             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1390                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1391                 b_frames--;
1392
1393             s->reordered_input_picture[0] = s->input_picture[b_frames];
1394             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1395                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1396             s->reordered_input_picture[0]->f.coded_picture_number =
1397                 s->coded_picture_number++;
1398             for (i = 0; i < b_frames; i++) {
1399                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1400                 s->reordered_input_picture[i + 1]->f.pict_type =
1401                     AV_PICTURE_TYPE_B;
1402                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1403                     s->coded_picture_number++;
1404             }
1405         }
1406     }
1407 no_output_pic:
1408     if (s->reordered_input_picture[0]) {
1409         s->reordered_input_picture[0]->f.reference =
1410            s->reordered_input_picture[0]->f.pict_type !=
1411                AV_PICTURE_TYPE_B ? 3 : 0;
1412
1413         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1414
1415         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1416             s->avctx->rc_buffer_size) {
1417             // input is a shared pix, so we can't modifiy it -> alloc a new
1418             // one & ensure that the shared one is reuseable
1419
1420             Picture *pic;
1421             int i = ff_find_unused_picture(s, 0);
1422             if (i < 0)
1423                 return i;
1424             pic = &s->picture[i];
1425
1426             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1427             if (ff_alloc_picture(s, pic, 0) < 0) {
1428                 return -1;
1429             }
1430
1431             /* mark us unused / free shared pic */
1432             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1433                 s->avctx->release_buffer(s->avctx,
1434                                          &s->reordered_input_picture[0]->f);
1435             for (i = 0; i < 4; i++)
1436                 s->reordered_input_picture[0]->f.data[i] = NULL;
1437             s->reordered_input_picture[0]->f.type = 0;
1438
1439             copy_picture_attributes(s, &pic->f,
1440                                     &s->reordered_input_picture[0]->f);
1441
1442             s->current_picture_ptr = pic;
1443         } else {
1444             // input is not a shared pix -> reuse buffer for current_pix
1445
1446             assert(s->reordered_input_picture[0]->f.type ==
1447                        FF_BUFFER_TYPE_USER ||
1448                    s->reordered_input_picture[0]->f.type ==
1449                        FF_BUFFER_TYPE_INTERNAL);
1450
1451             s->current_picture_ptr = s->reordered_input_picture[0];
1452             for (i = 0; i < 4; i++) {
1453                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1454             }
1455         }
1456         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1457
1458         s->picture_number = s->new_picture.f.display_picture_number;
1459     } else {
1460         memset(&s->new_picture, 0, sizeof(Picture));
1461     }
1462     return 0;
1463 }
1464
1465 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1466                           AVFrame *pic_arg, int *got_packet)
1467 {
1468     MpegEncContext *s = avctx->priv_data;
1469     int i, stuffing_count, ret;
1470     int context_count = s->slice_context_count;
1471
1472     s->picture_in_gop_number++;
1473
1474     if (load_input_picture(s, pic_arg) < 0)
1475         return -1;
1476
1477     if (select_input_picture(s) < 0) {
1478         return -1;
1479     }
1480
1481     /* output? */
1482     if (s->new_picture.f.data[0]) {
1483         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1484             return ret;
1485         if (s->mb_info) {
1486             s->mb_info_ptr = av_packet_new_side_data(pkt,
1487                                  AV_PKT_DATA_H263_MB_INFO,
1488                                  s->mb_width*s->mb_height*12);
1489             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1490         }
1491
1492         for (i = 0; i < context_count; i++) {
1493             int start_y = s->thread_context[i]->start_mb_y;
1494             int   end_y = s->thread_context[i]->  end_mb_y;
1495             int h       = s->mb_height;
1496             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1497             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1498
1499             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1500         }
1501
1502         s->pict_type = s->new_picture.f.pict_type;
1503         //emms_c();
1504         ff_MPV_frame_start(s, avctx);
1505 vbv_retry:
1506         if (encode_picture(s, s->picture_number) < 0)
1507             return -1;
1508
1509         avctx->header_bits = s->header_bits;
1510         avctx->mv_bits     = s->mv_bits;
1511         avctx->misc_bits   = s->misc_bits;
1512         avctx->i_tex_bits  = s->i_tex_bits;
1513         avctx->p_tex_bits  = s->p_tex_bits;
1514         avctx->i_count     = s->i_count;
1515         // FIXME f/b_count in avctx
1516         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1517         avctx->skip_count  = s->skip_count;
1518
1519         ff_MPV_frame_end(s);
1520
1521         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1522             ff_mjpeg_encode_picture_trailer(s);
1523
1524         if (avctx->rc_buffer_size) {
1525             RateControlContext *rcc = &s->rc_context;
1526             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1527
1528             if (put_bits_count(&s->pb) > max_size &&
1529                 s->lambda < s->avctx->lmax) {
1530                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1531                                        (s->qscale + 1) / s->qscale);
1532                 if (s->adaptive_quant) {
1533                     int i;
1534                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1535                         s->lambda_table[i] =
1536                             FFMAX(s->lambda_table[i] + 1,
1537                                   s->lambda_table[i] * (s->qscale + 1) /
1538                                   s->qscale);
1539                 }
1540                 s->mb_skipped = 0;        // done in MPV_frame_start()
1541                 // done in encode_picture() so we must undo it
1542                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1543                     if (s->flipflop_rounding          ||
1544                         s->codec_id == AV_CODEC_ID_H263P ||
1545                         s->codec_id == AV_CODEC_ID_MPEG4)
1546                         s->no_rounding ^= 1;
1547                 }
1548                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1549                     s->time_base       = s->last_time_base;
1550                     s->last_non_b_time = s->time - s->pp_time;
1551                 }
1552                 for (i = 0; i < context_count; i++) {
1553                     PutBitContext *pb = &s->thread_context[i]->pb;
1554                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1555                 }
1556                 goto vbv_retry;
1557             }
1558
1559             assert(s->avctx->rc_max_rate);
1560         }
1561
1562         if (s->flags & CODEC_FLAG_PASS1)
1563             ff_write_pass1_stats(s);
1564
1565         for (i = 0; i < 4; i++) {
1566             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1567             avctx->error[i] += s->current_picture_ptr->f.error[i];
1568         }
1569
1570         if (s->flags & CODEC_FLAG_PASS1)
1571             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1572                    avctx->i_tex_bits + avctx->p_tex_bits ==
1573                        put_bits_count(&s->pb));
1574         flush_put_bits(&s->pb);
1575         s->frame_bits  = put_bits_count(&s->pb);
1576
1577         stuffing_count = ff_vbv_update(s, s->frame_bits);
1578         s->stuffing_bits = 8*stuffing_count;
1579         if (stuffing_count) {
1580             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1581                     stuffing_count + 50) {
1582                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1583                 return -1;
1584             }
1585
1586             switch (s->codec_id) {
1587             case AV_CODEC_ID_MPEG1VIDEO:
1588             case AV_CODEC_ID_MPEG2VIDEO:
1589                 while (stuffing_count--) {
1590                     put_bits(&s->pb, 8, 0);
1591                 }
1592             break;
1593             case AV_CODEC_ID_MPEG4:
1594                 put_bits(&s->pb, 16, 0);
1595                 put_bits(&s->pb, 16, 0x1C3);
1596                 stuffing_count -= 4;
1597                 while (stuffing_count--) {
1598                     put_bits(&s->pb, 8, 0xFF);
1599                 }
1600             break;
1601             default:
1602                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1603             }
1604             flush_put_bits(&s->pb);
1605             s->frame_bits  = put_bits_count(&s->pb);
1606         }
1607
1608         /* update mpeg1/2 vbv_delay for CBR */
1609         if (s->avctx->rc_max_rate                          &&
1610             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1611             s->out_format == FMT_MPEG1                     &&
1612             90000LL * (avctx->rc_buffer_size - 1) <=
1613                 s->avctx->rc_max_rate * 0xFFFFLL) {
1614             int vbv_delay, min_delay;
1615             double inbits  = s->avctx->rc_max_rate *
1616                              av_q2d(s->avctx->time_base);
1617             int    minbits = s->frame_bits - 8 *
1618                              (s->vbv_delay_ptr - s->pb.buf - 1);
1619             double bits    = s->rc_context.buffer_index + minbits - inbits;
1620
1621             if (bits < 0)
1622                 av_log(s->avctx, AV_LOG_ERROR,
1623                        "Internal error, negative bits\n");
1624
1625             assert(s->repeat_first_field == 0);
1626
1627             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1628             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1629                         s->avctx->rc_max_rate;
1630
1631             vbv_delay = FFMAX(vbv_delay, min_delay);
1632
1633             av_assert0(vbv_delay < 0xFFFF);
1634
1635             s->vbv_delay_ptr[0] &= 0xF8;
1636             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1637             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1638             s->vbv_delay_ptr[2] &= 0x07;
1639             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1640             avctx->vbv_delay     = vbv_delay * 300;
1641         }
1642         s->total_bits     += s->frame_bits;
1643         avctx->frame_bits  = s->frame_bits;
1644
1645         pkt->pts = s->current_picture.f.pts;
1646         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1647             if (!s->current_picture.f.coded_picture_number)
1648                 pkt->dts = pkt->pts - s->dts_delta;
1649             else
1650                 pkt->dts = s->reordered_pts;
1651             s->reordered_pts = pkt->pts;
1652         } else
1653             pkt->dts = pkt->pts;
1654         if (s->current_picture.f.key_frame)
1655             pkt->flags |= AV_PKT_FLAG_KEY;
1656         if (s->mb_info)
1657             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1658     } else {
1659         s->frame_bits = 0;
1660     }
1661     assert((s->frame_bits & 7) == 0);
1662
1663     pkt->size = s->frame_bits / 8;
1664     *got_packet = !!pkt->size;
1665     return 0;
1666 }
1667
1668 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1669                                                 int n, int threshold)
1670 {
1671     static const char tab[64] = {
1672         3, 2, 2, 1, 1, 1, 1, 1,
1673         1, 1, 1, 1, 1, 1, 1, 1,
1674         1, 1, 1, 1, 1, 1, 1, 1,
1675         0, 0, 0, 0, 0, 0, 0, 0,
1676         0, 0, 0, 0, 0, 0, 0, 0,
1677         0, 0, 0, 0, 0, 0, 0, 0,
1678         0, 0, 0, 0, 0, 0, 0, 0,
1679         0, 0, 0, 0, 0, 0, 0, 0
1680     };
1681     int score = 0;
1682     int run = 0;
1683     int i;
1684     DCTELEM *block = s->block[n];
1685     const int last_index = s->block_last_index[n];
1686     int skip_dc;
1687
1688     if (threshold < 0) {
1689         skip_dc = 0;
1690         threshold = -threshold;
1691     } else
1692         skip_dc = 1;
1693
1694     /* Are all we could set to zero already zero? */
1695     if (last_index <= skip_dc - 1)
1696         return;
1697
1698     for (i = 0; i <= last_index; i++) {
1699         const int j = s->intra_scantable.permutated[i];
1700         const int level = FFABS(block[j]);
1701         if (level == 1) {
1702             if (skip_dc && i == 0)
1703                 continue;
1704             score += tab[run];
1705             run = 0;
1706         } else if (level > 1) {
1707             return;
1708         } else {
1709             run++;
1710         }
1711     }
1712     if (score >= threshold)
1713         return;
1714     for (i = skip_dc; i <= last_index; i++) {
1715         const int j = s->intra_scantable.permutated[i];
1716         block[j] = 0;
1717     }
1718     if (block[0])
1719         s->block_last_index[n] = 0;
1720     else
1721         s->block_last_index[n] = -1;
1722 }
1723
1724 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1725                                int last_index)
1726 {
1727     int i;
1728     const int maxlevel = s->max_qcoeff;
1729     const int minlevel = s->min_qcoeff;
1730     int overflow = 0;
1731
1732     if (s->mb_intra) {
1733         i = 1; // skip clipping of intra dc
1734     } else
1735         i = 0;
1736
1737     for (; i <= last_index; i++) {
1738         const int j = s->intra_scantable.permutated[i];
1739         int level = block[j];
1740
1741         if (level > maxlevel) {
1742             level = maxlevel;
1743             overflow++;
1744         } else if (level < minlevel) {
1745             level = minlevel;
1746             overflow++;
1747         }
1748
1749         block[j] = level;
1750     }
1751
1752     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1753         av_log(s->avctx, AV_LOG_INFO,
1754                "warning, clipping %d dct coefficients to %d..%d\n",
1755                overflow, minlevel, maxlevel);
1756 }
1757
1758 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1759 {
1760     int x, y;
1761     // FIXME optimize
1762     for (y = 0; y < 8; y++) {
1763         for (x = 0; x < 8; x++) {
1764             int x2, y2;
1765             int sum = 0;
1766             int sqr = 0;
1767             int count = 0;
1768
1769             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1770                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1771                     int v = ptr[x2 + y2 * stride];
1772                     sum += v;
1773                     sqr += v * v;
1774                     count++;
1775                 }
1776             }
1777             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1778         }
1779     }
1780 }
1781
1782 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1783                                                 int motion_x, int motion_y,
1784                                                 int mb_block_height,
1785                                                 int mb_block_count)
1786 {
1787     int16_t weight[8][64];
1788     DCTELEM orig[8][64];
1789     const int mb_x = s->mb_x;
1790     const int mb_y = s->mb_y;
1791     int i;
1792     int skip_dct[8];
1793     int dct_offset = s->linesize * 8; // default for progressive frames
1794     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1795     int wrap_y, wrap_c;
1796
1797     for (i = 0; i < mb_block_count; i++)
1798         skip_dct[i] = s->skipdct;
1799
1800     if (s->adaptive_quant) {
1801         const int last_qp = s->qscale;
1802         const int mb_xy = mb_x + mb_y * s->mb_stride;
1803
1804         s->lambda = s->lambda_table[mb_xy];
1805         update_qscale(s);
1806
1807         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1808             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1809             s->dquant = s->qscale - last_qp;
1810
1811             if (s->out_format == FMT_H263) {
1812                 s->dquant = av_clip(s->dquant, -2, 2);
1813
1814                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1815                     if (!s->mb_intra) {
1816                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1817                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1818                                 s->dquant = 0;
1819                         }
1820                         if (s->mv_type == MV_TYPE_8X8)
1821                             s->dquant = 0;
1822                     }
1823                 }
1824             }
1825         }
1826         ff_set_qscale(s, last_qp + s->dquant);
1827     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1828         ff_set_qscale(s, s->qscale + s->dquant);
1829
1830     wrap_y = s->linesize;
1831     wrap_c = s->uvlinesize;
1832     ptr_y  = s->new_picture.f.data[0] +
1833              (mb_y * 16 * wrap_y)              + mb_x * 16;
1834     ptr_cb = s->new_picture.f.data[1] +
1835              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1836     ptr_cr = s->new_picture.f.data[2] +
1837              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1838
1839     if((mb_x*16+16 > s->width || mb_y*16+16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
1840         uint8_t *ebuf = s->edge_emu_buffer + 32;
1841         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1842                                 mb_y * 16, s->width, s->height);
1843         ptr_y = ebuf;
1844         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1845                                 mb_block_height, mb_x * 8, mb_y * 8,
1846                                 (s->width+1) >> 1, (s->height+1) >> 1);
1847         ptr_cb = ebuf + 18 * wrap_y;
1848         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1849                                 mb_block_height, mb_x * 8, mb_y * 8,
1850                                 (s->width+1) >> 1, (s->height+1) >> 1);
1851         ptr_cr = ebuf + 18 * wrap_y + 8;
1852     }
1853
1854     if (s->mb_intra) {
1855         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1856             int progressive_score, interlaced_score;
1857
1858             s->interlaced_dct = 0;
1859             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1860                                                     NULL, wrap_y, 8) +
1861                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1862                                                     NULL, wrap_y, 8) - 400;
1863
1864             if (progressive_score > 0) {
1865                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1866                                                        NULL, wrap_y * 2, 8) +
1867                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1868                                                        NULL, wrap_y * 2, 8);
1869                 if (progressive_score > interlaced_score) {
1870                     s->interlaced_dct = 1;
1871
1872                     dct_offset = wrap_y;
1873                     wrap_y <<= 1;
1874                     if (s->chroma_format == CHROMA_422)
1875                         wrap_c <<= 1;
1876                 }
1877             }
1878         }
1879
1880         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1881         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1882         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1883         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1884
1885         if (s->flags & CODEC_FLAG_GRAY) {
1886             skip_dct[4] = 1;
1887             skip_dct[5] = 1;
1888         } else {
1889             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1890             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1891             if (!s->chroma_y_shift) { /* 422 */
1892                 s->dsp.get_pixels(s->block[6],
1893                                   ptr_cb + (dct_offset >> 1), wrap_c);
1894                 s->dsp.get_pixels(s->block[7],
1895                                   ptr_cr + (dct_offset >> 1), wrap_c);
1896             }
1897         }
1898     } else {
1899         op_pixels_func (*op_pix)[4];
1900         qpel_mc_func (*op_qpix)[16];
1901         uint8_t *dest_y, *dest_cb, *dest_cr;
1902
1903         dest_y  = s->dest[0];
1904         dest_cb = s->dest[1];
1905         dest_cr = s->dest[2];
1906
1907         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1908             op_pix  = s->dsp.put_pixels_tab;
1909             op_qpix = s->dsp.put_qpel_pixels_tab;
1910         } else {
1911             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1912             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1913         }
1914
1915         if (s->mv_dir & MV_DIR_FORWARD) {
1916             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1917                           s->last_picture.f.data,
1918                           op_pix, op_qpix);
1919             op_pix  = s->dsp.avg_pixels_tab;
1920             op_qpix = s->dsp.avg_qpel_pixels_tab;
1921         }
1922         if (s->mv_dir & MV_DIR_BACKWARD) {
1923             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1924                           s->next_picture.f.data,
1925                           op_pix, op_qpix);
1926         }
1927
1928         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1929             int progressive_score, interlaced_score;
1930
1931             s->interlaced_dct = 0;
1932             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1933                                                     ptr_y,              wrap_y,
1934                                                     8) +
1935                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1936                                                     ptr_y + wrap_y * 8, wrap_y,
1937                                                     8) - 400;
1938
1939             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1940                 progressive_score -= 400;
1941
1942             if (progressive_score > 0) {
1943                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1944                                                        ptr_y,
1945                                                        wrap_y * 2, 8) +
1946                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1947                                                        ptr_y + wrap_y,
1948                                                        wrap_y * 2, 8);
1949
1950                 if (progressive_score > interlaced_score) {
1951                     s->interlaced_dct = 1;
1952
1953                     dct_offset = wrap_y;
1954                     wrap_y <<= 1;
1955                     if (s->chroma_format == CHROMA_422)
1956                         wrap_c <<= 1;
1957                 }
1958             }
1959         }
1960
1961         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1962         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1963         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1964                            dest_y + dct_offset, wrap_y);
1965         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1966                            dest_y + dct_offset + 8, wrap_y);
1967
1968         if (s->flags & CODEC_FLAG_GRAY) {
1969             skip_dct[4] = 1;
1970             skip_dct[5] = 1;
1971         } else {
1972             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1973             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1974             if (!s->chroma_y_shift) { /* 422 */
1975                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1976                                    dest_cb + (dct_offset >> 1), wrap_c);
1977                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1978                                    dest_cr + (dct_offset >> 1), wrap_c);
1979             }
1980         }
1981         /* pre quantization */
1982         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1983                 2 * s->qscale * s->qscale) {
1984             // FIXME optimize
1985             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1986                               wrap_y, 8) < 20 * s->qscale)
1987                 skip_dct[0] = 1;
1988             if (s->dsp.sad[1](NULL, ptr_y + 8,
1989                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1990                 skip_dct[1] = 1;
1991             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1992                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1993                 skip_dct[2] = 1;
1994             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1995                               dest_y + dct_offset + 8,
1996                               wrap_y, 8) < 20 * s->qscale)
1997                 skip_dct[3] = 1;
1998             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1999                               wrap_c, 8) < 20 * s->qscale)
2000                 skip_dct[4] = 1;
2001             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2002                               wrap_c, 8) < 20 * s->qscale)
2003                 skip_dct[5] = 1;
2004             if (!s->chroma_y_shift) { /* 422 */
2005                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2006                                   dest_cb + (dct_offset >> 1),
2007                                   wrap_c, 8) < 20 * s->qscale)
2008                     skip_dct[6] = 1;
2009                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2010                                   dest_cr + (dct_offset >> 1),
2011                                   wrap_c, 8) < 20 * s->qscale)
2012                     skip_dct[7] = 1;
2013             }
2014         }
2015     }
2016
2017     if (s->quantizer_noise_shaping) {
2018         if (!skip_dct[0])
2019             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2020         if (!skip_dct[1])
2021             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2022         if (!skip_dct[2])
2023             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2024         if (!skip_dct[3])
2025             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2026         if (!skip_dct[4])
2027             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2028         if (!skip_dct[5])
2029             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2030         if (!s->chroma_y_shift) { /* 422 */
2031             if (!skip_dct[6])
2032                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2033                                   wrap_c);
2034             if (!skip_dct[7])
2035                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2036                                   wrap_c);
2037         }
2038         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
2039     }
2040
2041     /* DCT & quantize */
2042     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2043     {
2044         for (i = 0; i < mb_block_count; i++) {
2045             if (!skip_dct[i]) {
2046                 int overflow;
2047                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2048                 // FIXME we could decide to change to quantizer instead of
2049                 // clipping
2050                 // JS: I don't think that would be a good idea it could lower
2051                 //     quality instead of improve it. Just INTRADC clipping
2052                 //     deserves changes in quantizer
2053                 if (overflow)
2054                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2055             } else
2056                 s->block_last_index[i] = -1;
2057         }
2058         if (s->quantizer_noise_shaping) {
2059             for (i = 0; i < mb_block_count; i++) {
2060                 if (!skip_dct[i]) {
2061                     s->block_last_index[i] =
2062                         dct_quantize_refine(s, s->block[i], weight[i],
2063                                             orig[i], i, s->qscale);
2064                 }
2065             }
2066         }
2067
2068         if (s->luma_elim_threshold && !s->mb_intra)
2069             for (i = 0; i < 4; i++)
2070                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2071         if (s->chroma_elim_threshold && !s->mb_intra)
2072             for (i = 4; i < mb_block_count; i++)
2073                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2074
2075         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2076             for (i = 0; i < mb_block_count; i++) {
2077                 if (s->block_last_index[i] == -1)
2078                     s->coded_score[i] = INT_MAX / 256;
2079             }
2080         }
2081     }
2082
2083     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2084         s->block_last_index[4] =
2085         s->block_last_index[5] = 0;
2086         s->block[4][0] =
2087         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2088     }
2089
2090     // non c quantize code returns incorrect block_last_index FIXME
2091     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2092         for (i = 0; i < mb_block_count; i++) {
2093             int j;
2094             if (s->block_last_index[i] > 0) {
2095                 for (j = 63; j > 0; j--) {
2096                     if (s->block[i][s->intra_scantable.permutated[j]])
2097                         break;
2098                 }
2099                 s->block_last_index[i] = j;
2100             }
2101         }
2102     }
2103
2104     /* huffman encode */
2105     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2106     case AV_CODEC_ID_MPEG1VIDEO:
2107     case AV_CODEC_ID_MPEG2VIDEO:
2108         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2109             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2110         break;
2111     case AV_CODEC_ID_MPEG4:
2112         if (CONFIG_MPEG4_ENCODER)
2113             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2114         break;
2115     case AV_CODEC_ID_MSMPEG4V2:
2116     case AV_CODEC_ID_MSMPEG4V3:
2117     case AV_CODEC_ID_WMV1:
2118         if (CONFIG_MSMPEG4_ENCODER)
2119             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2120         break;
2121     case AV_CODEC_ID_WMV2:
2122         if (CONFIG_WMV2_ENCODER)
2123             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2124         break;
2125     case AV_CODEC_ID_H261:
2126         if (CONFIG_H261_ENCODER)
2127             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2128         break;
2129     case AV_CODEC_ID_H263:
2130     case AV_CODEC_ID_H263P:
2131     case AV_CODEC_ID_FLV1:
2132     case AV_CODEC_ID_RV10:
2133     case AV_CODEC_ID_RV20:
2134         if (CONFIG_H263_ENCODER)
2135             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2136         break;
2137     case AV_CODEC_ID_MJPEG:
2138     case AV_CODEC_ID_AMV:
2139         if (CONFIG_MJPEG_ENCODER)
2140             ff_mjpeg_encode_mb(s, s->block);
2141         break;
2142     default:
2143         av_assert1(0);
2144     }
2145 }
2146
2147 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2148 {
2149     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2150     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2151 }
2152
2153 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2154     int i;
2155
2156     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2157
2158     /* mpeg1 */
2159     d->mb_skip_run= s->mb_skip_run;
2160     for(i=0; i<3; i++)
2161         d->last_dc[i] = s->last_dc[i];
2162
2163     /* statistics */
2164     d->mv_bits= s->mv_bits;
2165     d->i_tex_bits= s->i_tex_bits;
2166     d->p_tex_bits= s->p_tex_bits;
2167     d->i_count= s->i_count;
2168     d->f_count= s->f_count;
2169     d->b_count= s->b_count;
2170     d->skip_count= s->skip_count;
2171     d->misc_bits= s->misc_bits;
2172     d->last_bits= 0;
2173
2174     d->mb_skipped= 0;
2175     d->qscale= s->qscale;
2176     d->dquant= s->dquant;
2177
2178     d->esc3_level_length= s->esc3_level_length;
2179 }
2180
2181 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2182     int i;
2183
2184     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2185     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2186
2187     /* mpeg1 */
2188     d->mb_skip_run= s->mb_skip_run;
2189     for(i=0; i<3; i++)
2190         d->last_dc[i] = s->last_dc[i];
2191
2192     /* statistics */
2193     d->mv_bits= s->mv_bits;
2194     d->i_tex_bits= s->i_tex_bits;
2195     d->p_tex_bits= s->p_tex_bits;
2196     d->i_count= s->i_count;
2197     d->f_count= s->f_count;
2198     d->b_count= s->b_count;
2199     d->skip_count= s->skip_count;
2200     d->misc_bits= s->misc_bits;
2201
2202     d->mb_intra= s->mb_intra;
2203     d->mb_skipped= s->mb_skipped;
2204     d->mv_type= s->mv_type;
2205     d->mv_dir= s->mv_dir;
2206     d->pb= s->pb;
2207     if(s->data_partitioning){
2208         d->pb2= s->pb2;
2209         d->tex_pb= s->tex_pb;
2210     }
2211     d->block= s->block;
2212     for(i=0; i<8; i++)
2213         d->block_last_index[i]= s->block_last_index[i];
2214     d->interlaced_dct= s->interlaced_dct;
2215     d->qscale= s->qscale;
2216
2217     d->esc3_level_length= s->esc3_level_length;
2218 }
2219
2220 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2221                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2222                            int *dmin, int *next_block, int motion_x, int motion_y)
2223 {
2224     int score;
2225     uint8_t *dest_backup[3];
2226
2227     copy_context_before_encode(s, backup, type);
2228
2229     s->block= s->blocks[*next_block];
2230     s->pb= pb[*next_block];
2231     if(s->data_partitioning){
2232         s->pb2   = pb2   [*next_block];
2233         s->tex_pb= tex_pb[*next_block];
2234     }
2235
2236     if(*next_block){
2237         memcpy(dest_backup, s->dest, sizeof(s->dest));
2238         s->dest[0] = s->rd_scratchpad;
2239         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2240         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2241         assert(s->linesize >= 32); //FIXME
2242     }
2243
2244     encode_mb(s, motion_x, motion_y);
2245
2246     score= put_bits_count(&s->pb);
2247     if(s->data_partitioning){
2248         score+= put_bits_count(&s->pb2);
2249         score+= put_bits_count(&s->tex_pb);
2250     }
2251
2252     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2253         ff_MPV_decode_mb(s, s->block);
2254
2255         score *= s->lambda2;
2256         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2257     }
2258
2259     if(*next_block){
2260         memcpy(s->dest, dest_backup, sizeof(s->dest));
2261     }
2262
2263     if(score<*dmin){
2264         *dmin= score;
2265         *next_block^=1;
2266
2267         copy_context_after_encode(best, s, type);
2268     }
2269 }
2270
2271 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2272     uint32_t *sq = ff_squareTbl + 256;
2273     int acc=0;
2274     int x,y;
2275
2276     if(w==16 && h==16)
2277         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2278     else if(w==8 && h==8)
2279         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2280
2281     for(y=0; y<h; y++){
2282         for(x=0; x<w; x++){
2283             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2284         }
2285     }
2286
2287     av_assert2(acc>=0);
2288
2289     return acc;
2290 }
2291
2292 static int sse_mb(MpegEncContext *s){
2293     int w= 16;
2294     int h= 16;
2295
2296     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2297     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2298
2299     if(w==16 && h==16)
2300       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2301         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2302                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2303                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2304       }else{
2305         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2306                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2307                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2308       }
2309     else
2310         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2311                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2312                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2313 }
2314
2315 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2316     MpegEncContext *s= *(void**)arg;
2317
2318
2319     s->me.pre_pass=1;
2320     s->me.dia_size= s->avctx->pre_dia_size;
2321     s->first_slice_line=1;
2322     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2323         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2324             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2325         }
2326         s->first_slice_line=0;
2327     }
2328
2329     s->me.pre_pass=0;
2330
2331     return 0;
2332 }
2333
2334 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2335     MpegEncContext *s= *(void**)arg;
2336
2337     ff_check_alignment();
2338
2339     s->me.dia_size= s->avctx->dia_size;
2340     s->first_slice_line=1;
2341     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2342         s->mb_x=0; //for block init below
2343         ff_init_block_index(s);
2344         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2345             s->block_index[0]+=2;
2346             s->block_index[1]+=2;
2347             s->block_index[2]+=2;
2348             s->block_index[3]+=2;
2349
2350             /* compute motion vector & mb_type and store in context */
2351             if(s->pict_type==AV_PICTURE_TYPE_B)
2352                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2353             else
2354                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2355         }
2356         s->first_slice_line=0;
2357     }
2358     return 0;
2359 }
2360
2361 static int mb_var_thread(AVCodecContext *c, void *arg){
2362     MpegEncContext *s= *(void**)arg;
2363     int mb_x, mb_y;
2364
2365     ff_check_alignment();
2366
2367     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2368         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2369             int xx = mb_x * 16;
2370             int yy = mb_y * 16;
2371             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2372             int varc;
2373             int sum = s->dsp.pix_sum(pix, s->linesize);
2374
2375             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2376
2377             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2378             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2379             s->me.mb_var_sum_temp    += varc;
2380         }
2381     }
2382     return 0;
2383 }
2384
2385 static void write_slice_end(MpegEncContext *s){
2386     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2387         if(s->partitioned_frame){
2388             ff_mpeg4_merge_partitions(s);
2389         }
2390
2391         ff_mpeg4_stuffing(&s->pb);
2392     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2393         ff_mjpeg_encode_stuffing(s);
2394     }
2395
2396     avpriv_align_put_bits(&s->pb);
2397     flush_put_bits(&s->pb);
2398
2399     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2400         s->misc_bits+= get_bits_diff(s);
2401 }
2402
2403 static void write_mb_info(MpegEncContext *s)
2404 {
2405     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2406     int offset = put_bits_count(&s->pb);
2407     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2408     int gobn = s->mb_y / s->gob_index;
2409     int pred_x, pred_y;
2410     if (CONFIG_H263_ENCODER)
2411         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2412     bytestream_put_le32(&ptr, offset);
2413     bytestream_put_byte(&ptr, s->qscale);
2414     bytestream_put_byte(&ptr, gobn);
2415     bytestream_put_le16(&ptr, mba);
2416     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2417     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2418     /* 4MV not implemented */
2419     bytestream_put_byte(&ptr, 0); /* hmv2 */
2420     bytestream_put_byte(&ptr, 0); /* vmv2 */
2421 }
2422
2423 static void update_mb_info(MpegEncContext *s, int startcode)
2424 {
2425     if (!s->mb_info)
2426         return;
2427     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2428         s->mb_info_size += 12;
2429         s->prev_mb_info = s->last_mb_info;
2430     }
2431     if (startcode) {
2432         s->prev_mb_info = put_bits_count(&s->pb)/8;
2433         /* This might have incremented mb_info_size above, and we return without
2434          * actually writing any info into that slot yet. But in that case,
2435          * this will be called again at the start of the after writing the
2436          * start code, actually writing the mb info. */
2437         return;
2438     }
2439
2440     s->last_mb_info = put_bits_count(&s->pb)/8;
2441     if (!s->mb_info_size)
2442         s->mb_info_size += 12;
2443     write_mb_info(s);
2444 }
2445
2446 static int encode_thread(AVCodecContext *c, void *arg){
2447     MpegEncContext *s= *(void**)arg;
2448     int mb_x, mb_y, pdif = 0;
2449     int chr_h= 16>>s->chroma_y_shift;
2450     int i, j;
2451     MpegEncContext best_s, backup_s;
2452     uint8_t bit_buf[2][MAX_MB_BYTES];
2453     uint8_t bit_buf2[2][MAX_MB_BYTES];
2454     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2455     PutBitContext pb[2], pb2[2], tex_pb[2];
2456
2457     ff_check_alignment();
2458
2459     for(i=0; i<2; i++){
2460         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2461         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2462         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2463     }
2464
2465     s->last_bits= put_bits_count(&s->pb);
2466     s->mv_bits=0;
2467     s->misc_bits=0;
2468     s->i_tex_bits=0;
2469     s->p_tex_bits=0;
2470     s->i_count=0;
2471     s->f_count=0;
2472     s->b_count=0;
2473     s->skip_count=0;
2474
2475     for(i=0; i<3; i++){
2476         /* init last dc values */
2477         /* note: quant matrix value (8) is implied here */
2478         s->last_dc[i] = 128 << s->intra_dc_precision;
2479
2480         s->current_picture.f.error[i] = 0;
2481     }
2482     if(s->codec_id==AV_CODEC_ID_AMV){
2483         s->last_dc[0] = 128*8/13;
2484         s->last_dc[1] = 128*8/14;
2485         s->last_dc[2] = 128*8/14;
2486     }
2487     s->mb_skip_run = 0;
2488     memset(s->last_mv, 0, sizeof(s->last_mv));
2489
2490     s->last_mv_dir = 0;
2491
2492     switch(s->codec_id){
2493     case AV_CODEC_ID_H263:
2494     case AV_CODEC_ID_H263P:
2495     case AV_CODEC_ID_FLV1:
2496         if (CONFIG_H263_ENCODER)
2497             s->gob_index = ff_h263_get_gob_height(s);
2498         break;
2499     case AV_CODEC_ID_MPEG4:
2500         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2501             ff_mpeg4_init_partitions(s);
2502         break;
2503     }
2504
2505     s->resync_mb_x=0;
2506     s->resync_mb_y=0;
2507     s->first_slice_line = 1;
2508     s->ptr_lastgob = s->pb.buf;
2509     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2510         s->mb_x=0;
2511         s->mb_y= mb_y;
2512
2513         ff_set_qscale(s, s->qscale);
2514         ff_init_block_index(s);
2515
2516         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2517             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2518             int mb_type= s->mb_type[xy];
2519 //            int d;
2520             int dmin= INT_MAX;
2521             int dir;
2522
2523             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2524                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2525                 return -1;
2526             }
2527             if(s->data_partitioning){
2528                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2529                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2530                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2531                     return -1;
2532                 }
2533             }
2534
2535             s->mb_x = mb_x;
2536             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2537             ff_update_block_index(s);
2538
2539             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2540                 ff_h261_reorder_mb_index(s);
2541                 xy= s->mb_y*s->mb_stride + s->mb_x;
2542                 mb_type= s->mb_type[xy];
2543             }
2544
2545             /* write gob / video packet header  */
2546             if(s->rtp_mode){
2547                 int current_packet_size, is_gob_start;
2548
2549                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2550
2551                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2552
2553                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2554
2555                 switch(s->codec_id){
2556                 case AV_CODEC_ID_H263:
2557                 case AV_CODEC_ID_H263P:
2558                     if(!s->h263_slice_structured)
2559                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2560                     break;
2561                 case AV_CODEC_ID_MPEG2VIDEO:
2562                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2563                 case AV_CODEC_ID_MPEG1VIDEO:
2564                     if(s->mb_skip_run) is_gob_start=0;
2565                     break;
2566                 case AV_CODEC_ID_MJPEG:
2567                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2568                     break;
2569                 }
2570
2571                 if(is_gob_start){
2572                     if(s->start_mb_y != mb_y || mb_x!=0){
2573                         write_slice_end(s);
2574                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2575                             ff_mpeg4_init_partitions(s);
2576                         }
2577                     }
2578
2579                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2580                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2581
2582                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2583                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2584                         int d= 100 / s->avctx->error_rate;
2585                         if(r % d == 0){
2586                             current_packet_size=0;
2587                             s->pb.buf_ptr= s->ptr_lastgob;
2588                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2589                         }
2590                     }
2591
2592                     if (s->avctx->rtp_callback){
2593                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2594                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2595                     }
2596                     update_mb_info(s, 1);
2597
2598                     switch(s->codec_id){
2599                     case AV_CODEC_ID_MPEG4:
2600                         if (CONFIG_MPEG4_ENCODER) {
2601                             ff_mpeg4_encode_video_packet_header(s);
2602                             ff_mpeg4_clean_buffers(s);
2603                         }
2604                     break;
2605                     case AV_CODEC_ID_MPEG1VIDEO:
2606                     case AV_CODEC_ID_MPEG2VIDEO:
2607                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2608                             ff_mpeg1_encode_slice_header(s);
2609                             ff_mpeg1_clean_buffers(s);
2610                         }
2611                     break;
2612                     case AV_CODEC_ID_H263:
2613                     case AV_CODEC_ID_H263P:
2614                         if (CONFIG_H263_ENCODER)
2615                             ff_h263_encode_gob_header(s, mb_y);
2616                     break;
2617                     }
2618
2619                     if(s->flags&CODEC_FLAG_PASS1){
2620                         int bits= put_bits_count(&s->pb);
2621                         s->misc_bits+= bits - s->last_bits;
2622                         s->last_bits= bits;
2623                     }
2624
2625                     s->ptr_lastgob += current_packet_size;
2626                     s->first_slice_line=1;
2627                     s->resync_mb_x=mb_x;
2628                     s->resync_mb_y=mb_y;
2629                 }
2630             }
2631
2632             if(  (s->resync_mb_x   == s->mb_x)
2633                && s->resync_mb_y+1 == s->mb_y){
2634                 s->first_slice_line=0;
2635             }
2636
2637             s->mb_skipped=0;
2638             s->dquant=0; //only for QP_RD
2639
2640             update_mb_info(s, 0);
2641
2642             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2643                 int next_block=0;
2644                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2645
2646                 copy_context_before_encode(&backup_s, s, -1);
2647                 backup_s.pb= s->pb;
2648                 best_s.data_partitioning= s->data_partitioning;
2649                 best_s.partitioned_frame= s->partitioned_frame;
2650                 if(s->data_partitioning){
2651                     backup_s.pb2= s->pb2;
2652                     backup_s.tex_pb= s->tex_pb;
2653                 }
2654
2655                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2656                     s->mv_dir = MV_DIR_FORWARD;
2657                     s->mv_type = MV_TYPE_16X16;
2658                     s->mb_intra= 0;
2659                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2660                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2661                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2662                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2663                 }
2664                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2665                     s->mv_dir = MV_DIR_FORWARD;
2666                     s->mv_type = MV_TYPE_FIELD;
2667                     s->mb_intra= 0;
2668                     for(i=0; i<2; i++){
2669                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2670                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2671                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2672                     }
2673                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2674                                  &dmin, &next_block, 0, 0);
2675                 }
2676                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2677                     s->mv_dir = MV_DIR_FORWARD;
2678                     s->mv_type = MV_TYPE_16X16;
2679                     s->mb_intra= 0;
2680                     s->mv[0][0][0] = 0;
2681                     s->mv[0][0][1] = 0;
2682                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2683                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2684                 }
2685                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2686                     s->mv_dir = MV_DIR_FORWARD;
2687                     s->mv_type = MV_TYPE_8X8;
2688                     s->mb_intra= 0;
2689                     for(i=0; i<4; i++){
2690                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2691                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2692                     }
2693                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2694                                  &dmin, &next_block, 0, 0);
2695                 }
2696                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2697                     s->mv_dir = MV_DIR_FORWARD;
2698                     s->mv_type = MV_TYPE_16X16;
2699                     s->mb_intra= 0;
2700                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2701                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2702                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2703                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2704                 }
2705                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2706                     s->mv_dir = MV_DIR_BACKWARD;
2707                     s->mv_type = MV_TYPE_16X16;
2708                     s->mb_intra= 0;
2709                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2710                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2711                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2712                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2713                 }
2714                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2715                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2716                     s->mv_type = MV_TYPE_16X16;
2717                     s->mb_intra= 0;
2718                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2719                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2720                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2721                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2722                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2723                                  &dmin, &next_block, 0, 0);
2724                 }
2725                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2726                     s->mv_dir = MV_DIR_FORWARD;
2727                     s->mv_type = MV_TYPE_FIELD;
2728                     s->mb_intra= 0;
2729                     for(i=0; i<2; i++){
2730                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2731                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2732                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2733                     }
2734                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2735                                  &dmin, &next_block, 0, 0);
2736                 }
2737                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2738                     s->mv_dir = MV_DIR_BACKWARD;
2739                     s->mv_type = MV_TYPE_FIELD;
2740                     s->mb_intra= 0;
2741                     for(i=0; i<2; i++){
2742                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2743                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2744                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2745                     }
2746                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2747                                  &dmin, &next_block, 0, 0);
2748                 }
2749                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2750                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2751                     s->mv_type = MV_TYPE_FIELD;
2752                     s->mb_intra= 0;
2753                     for(dir=0; dir<2; dir++){
2754                         for(i=0; i<2; i++){
2755                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2756                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2757                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2758                         }
2759                     }
2760                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2761                                  &dmin, &next_block, 0, 0);
2762                 }
2763                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2764                     s->mv_dir = 0;
2765                     s->mv_type = MV_TYPE_16X16;
2766                     s->mb_intra= 1;
2767                     s->mv[0][0][0] = 0;
2768                     s->mv[0][0][1] = 0;
2769                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2770                                  &dmin, &next_block, 0, 0);
2771                     if(s->h263_pred || s->h263_aic){
2772                         if(best_s.mb_intra)
2773                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2774                         else
2775                             ff_clean_intra_table_entries(s); //old mode?
2776                     }
2777                 }
2778
2779                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2780                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2781                         const int last_qp= backup_s.qscale;
2782                         int qpi, qp, dc[6];
2783                         DCTELEM ac[6][16];
2784                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2785                         static const int dquant_tab[4]={-1,1,-2,2};
2786
2787                         av_assert2(backup_s.dquant == 0);
2788
2789                         //FIXME intra
2790                         s->mv_dir= best_s.mv_dir;
2791                         s->mv_type = MV_TYPE_16X16;
2792                         s->mb_intra= best_s.mb_intra;
2793                         s->mv[0][0][0] = best_s.mv[0][0][0];
2794                         s->mv[0][0][1] = best_s.mv[0][0][1];
2795                         s->mv[1][0][0] = best_s.mv[1][0][0];
2796                         s->mv[1][0][1] = best_s.mv[1][0][1];
2797
2798                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2799                         for(; qpi<4; qpi++){
2800                             int dquant= dquant_tab[qpi];
2801                             qp= last_qp + dquant;
2802                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2803                                 continue;
2804                             backup_s.dquant= dquant;
2805                             if(s->mb_intra && s->dc_val[0]){
2806                                 for(i=0; i<6; i++){
2807                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2808                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2809                                 }
2810                             }
2811
2812                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2813                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2814                             if(best_s.qscale != qp){
2815                                 if(s->mb_intra && s->dc_val[0]){
2816                                     for(i=0; i<6; i++){
2817                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2818                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2819                                     }
2820                                 }
2821                             }
2822                         }
2823                     }
2824                 }
2825                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2826                     int mx= s->b_direct_mv_table[xy][0];
2827                     int my= s->b_direct_mv_table[xy][1];
2828
2829                     backup_s.dquant = 0;
2830                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2831                     s->mb_intra= 0;
2832                     ff_mpeg4_set_direct_mv(s, mx, my);
2833                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2834                                  &dmin, &next_block, mx, my);
2835                 }
2836                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2837                     backup_s.dquant = 0;
2838                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2839                     s->mb_intra= 0;
2840                     ff_mpeg4_set_direct_mv(s, 0, 0);
2841                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2842                                  &dmin, &next_block, 0, 0);
2843                 }
2844                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2845                     int coded=0;
2846                     for(i=0; i<6; i++)
2847                         coded |= s->block_last_index[i];
2848                     if(coded){
2849                         int mx,my;
2850                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2851                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2852                             mx=my=0; //FIXME find the one we actually used
2853                             ff_mpeg4_set_direct_mv(s, mx, my);
2854                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2855                             mx= s->mv[1][0][0];
2856                             my= s->mv[1][0][1];
2857                         }else{
2858                             mx= s->mv[0][0][0];
2859                             my= s->mv[0][0][1];
2860                         }
2861
2862                         s->mv_dir= best_s.mv_dir;
2863                         s->mv_type = best_s.mv_type;
2864                         s->mb_intra= 0;
2865 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2866                         s->mv[0][0][1] = best_s.mv[0][0][1];
2867                         s->mv[1][0][0] = best_s.mv[1][0][0];
2868                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2869                         backup_s.dquant= 0;
2870                         s->skipdct=1;
2871                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2872                                         &dmin, &next_block, mx, my);
2873                         s->skipdct=0;
2874                     }
2875                 }
2876
2877                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2878
2879                 copy_context_after_encode(s, &best_s, -1);
2880
2881                 pb_bits_count= put_bits_count(&s->pb);
2882                 flush_put_bits(&s->pb);
2883                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2884                 s->pb= backup_s.pb;
2885
2886                 if(s->data_partitioning){
2887                     pb2_bits_count= put_bits_count(&s->pb2);
2888                     flush_put_bits(&s->pb2);
2889                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2890                     s->pb2= backup_s.pb2;
2891
2892                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2893                     flush_put_bits(&s->tex_pb);
2894                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2895                     s->tex_pb= backup_s.tex_pb;
2896                 }
2897                 s->last_bits= put_bits_count(&s->pb);
2898
2899                 if (CONFIG_H263_ENCODER &&
2900                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2901                     ff_h263_update_motion_val(s);
2902
2903                 if(next_block==0){ //FIXME 16 vs linesize16
2904                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2905                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2906                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2907                 }
2908
2909                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2910                     ff_MPV_decode_mb(s, s->block);
2911             } else {
2912                 int motion_x = 0, motion_y = 0;
2913                 s->mv_type=MV_TYPE_16X16;
2914                 // only one MB-Type possible
2915
2916                 switch(mb_type){
2917                 case CANDIDATE_MB_TYPE_INTRA:
2918                     s->mv_dir = 0;
2919                     s->mb_intra= 1;
2920                     motion_x= s->mv[0][0][0] = 0;
2921                     motion_y= s->mv[0][0][1] = 0;
2922                     break;
2923                 case CANDIDATE_MB_TYPE_INTER:
2924                     s->mv_dir = MV_DIR_FORWARD;
2925                     s->mb_intra= 0;
2926                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2927                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2928                     break;
2929                 case CANDIDATE_MB_TYPE_INTER_I:
2930                     s->mv_dir = MV_DIR_FORWARD;
2931                     s->mv_type = MV_TYPE_FIELD;
2932                     s->mb_intra= 0;
2933                     for(i=0; i<2; i++){
2934                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2935                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2936                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2937                     }
2938                     break;
2939                 case CANDIDATE_MB_TYPE_INTER4V:
2940                     s->mv_dir = MV_DIR_FORWARD;
2941                     s->mv_type = MV_TYPE_8X8;
2942                     s->mb_intra= 0;
2943                     for(i=0; i<4; i++){
2944                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2945                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2946                     }
2947                     break;
2948                 case CANDIDATE_MB_TYPE_DIRECT:
2949                     if (CONFIG_MPEG4_ENCODER) {
2950                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2951                         s->mb_intra= 0;
2952                         motion_x=s->b_direct_mv_table[xy][0];
2953                         motion_y=s->b_direct_mv_table[xy][1];
2954                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2955                     }
2956                     break;
2957                 case CANDIDATE_MB_TYPE_DIRECT0:
2958                     if (CONFIG_MPEG4_ENCODER) {
2959                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2960                         s->mb_intra= 0;
2961                         ff_mpeg4_set_direct_mv(s, 0, 0);
2962                     }
2963                     break;
2964                 case CANDIDATE_MB_TYPE_BIDIR:
2965                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2966                     s->mb_intra= 0;
2967                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2968                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2969                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2970                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2971                     break;
2972                 case CANDIDATE_MB_TYPE_BACKWARD:
2973                     s->mv_dir = MV_DIR_BACKWARD;
2974                     s->mb_intra= 0;
2975                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2976                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2977                     break;
2978                 case CANDIDATE_MB_TYPE_FORWARD:
2979                     s->mv_dir = MV_DIR_FORWARD;
2980                     s->mb_intra= 0;
2981                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2982                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2983                     break;
2984                 case CANDIDATE_MB_TYPE_FORWARD_I:
2985                     s->mv_dir = MV_DIR_FORWARD;
2986                     s->mv_type = MV_TYPE_FIELD;
2987                     s->mb_intra= 0;
2988                     for(i=0; i<2; i++){
2989                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2990                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2991                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2992                     }
2993                     break;
2994                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2995                     s->mv_dir = MV_DIR_BACKWARD;
2996                     s->mv_type = MV_TYPE_FIELD;
2997                     s->mb_intra= 0;
2998                     for(i=0; i<2; i++){
2999                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3000                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3001                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3002                     }
3003                     break;
3004                 case CANDIDATE_MB_TYPE_BIDIR_I:
3005                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3006                     s->mv_type = MV_TYPE_FIELD;
3007                     s->mb_intra= 0;
3008                     for(dir=0; dir<2; dir++){
3009                         for(i=0; i<2; i++){
3010                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3011                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3012                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3013                         }
3014                     }
3015                     break;
3016                 default:
3017                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3018                 }
3019
3020                 encode_mb(s, motion_x, motion_y);
3021
3022                 // RAL: Update last macroblock type
3023                 s->last_mv_dir = s->mv_dir;
3024
3025                 if (CONFIG_H263_ENCODER &&
3026                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3027                     ff_h263_update_motion_val(s);
3028
3029                 ff_MPV_decode_mb(s, s->block);
3030             }
3031
3032             /* clean the MV table in IPS frames for direct mode in B frames */
3033             if(s->mb_intra /* && I,P,S_TYPE */){
3034                 s->p_mv_table[xy][0]=0;
3035                 s->p_mv_table[xy][1]=0;
3036             }
3037
3038             if(s->flags&CODEC_FLAG_PSNR){
3039                 int w= 16;
3040                 int h= 16;
3041
3042                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3043                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3044
3045                 s->current_picture.f.error[0] += sse(
3046                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3047                     s->dest[0], w, h, s->linesize);
3048                 s->current_picture.f.error[1] += sse(
3049                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3050                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3051                 s->current_picture.f.error[2] += sse(
3052                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3053                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3054             }
3055             if(s->loop_filter){
3056                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3057                     ff_h263_loop_filter(s);
3058             }
3059             av_dlog(s->avctx, "MB %d %d bits\n",
3060                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3061         }
3062     }
3063
3064     //not beautiful here but we must write it before flushing so it has to be here
3065     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3066         ff_msmpeg4_encode_ext_header(s);
3067
3068     write_slice_end(s);
3069
3070     /* Send the last GOB if RTP */
3071     if (s->avctx->rtp_callback) {
3072         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3073         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3074         /* Call the RTP callback to send the last GOB */
3075         emms_c();
3076         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3077     }
3078
3079     return 0;
3080 }
3081
3082 #define MERGE(field) dst->field += src->field; src->field=0
3083 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3084     MERGE(me.scene_change_score);
3085     MERGE(me.mc_mb_var_sum_temp);
3086     MERGE(me.mb_var_sum_temp);
3087 }
3088
3089 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3090     int i;
3091
3092     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3093     MERGE(dct_count[1]);
3094     MERGE(mv_bits);
3095     MERGE(i_tex_bits);
3096     MERGE(p_tex_bits);
3097     MERGE(i_count);
3098     MERGE(f_count);
3099     MERGE(b_count);
3100     MERGE(skip_count);
3101     MERGE(misc_bits);
3102     MERGE(error_count);
3103     MERGE(padding_bug_score);
3104     MERGE(current_picture.f.error[0]);
3105     MERGE(current_picture.f.error[1]);
3106     MERGE(current_picture.f.error[2]);
3107
3108     if(dst->avctx->noise_reduction){
3109         for(i=0; i<64; i++){
3110             MERGE(dct_error_sum[0][i]);
3111             MERGE(dct_error_sum[1][i]);
3112         }
3113     }
3114
3115     assert(put_bits_count(&src->pb) % 8 ==0);
3116     assert(put_bits_count(&dst->pb) % 8 ==0);
3117     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3118     flush_put_bits(&dst->pb);
3119 }
3120
3121 static int estimate_qp(MpegEncContext *s, int dry_run){
3122     if (s->next_lambda){
3123         s->current_picture_ptr->f.quality =
3124         s->current_picture.f.quality = s->next_lambda;
3125         if(!dry_run) s->next_lambda= 0;
3126     } else if (!s->fixed_qscale) {
3127         s->current_picture_ptr->f.quality =
3128         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3129         if (s->current_picture.f.quality < 0)
3130             return -1;
3131     }
3132
3133     if(s->adaptive_quant){
3134         switch(s->codec_id){
3135         case AV_CODEC_ID_MPEG4:
3136             if (CONFIG_MPEG4_ENCODER)
3137                 ff_clean_mpeg4_qscales(s);
3138             break;
3139         case AV_CODEC_ID_H263:
3140         case AV_CODEC_ID_H263P:
3141         case AV_CODEC_ID_FLV1:
3142             if (CONFIG_H263_ENCODER)
3143                 ff_clean_h263_qscales(s);
3144             break;
3145         default:
3146             ff_init_qscale_tab(s);
3147         }
3148
3149         s->lambda= s->lambda_table[0];
3150         //FIXME broken
3151     }else
3152         s->lambda = s->current_picture.f.quality;
3153     update_qscale(s);
3154     return 0;
3155 }
3156
3157 /* must be called before writing the header */
3158 static void set_frame_distances(MpegEncContext * s){
3159     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3160     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3161
3162     if(s->pict_type==AV_PICTURE_TYPE_B){
3163         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3164         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3165     }else{
3166         s->pp_time= s->time - s->last_non_b_time;
3167         s->last_non_b_time= s->time;
3168         assert(s->picture_number==0 || s->pp_time > 0);
3169     }
3170 }
3171
3172 static int encode_picture(MpegEncContext *s, int picture_number)
3173 {
3174     int i;
3175     int bits;
3176     int context_count = s->slice_context_count;
3177
3178     s->picture_number = picture_number;
3179
3180     /* Reset the average MB variance */
3181     s->me.mb_var_sum_temp    =
3182     s->me.mc_mb_var_sum_temp = 0;
3183
3184     /* we need to initialize some time vars before we can encode b-frames */
3185     // RAL: Condition added for MPEG1VIDEO
3186     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3187         set_frame_distances(s);
3188     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3189         ff_set_mpeg4_time(s);
3190
3191     s->me.scene_change_score=0;
3192
3193 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3194
3195     if(s->pict_type==AV_PICTURE_TYPE_I){
3196         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3197         else                        s->no_rounding=0;
3198     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3199         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3200             s->no_rounding ^= 1;
3201     }
3202
3203     if(s->flags & CODEC_FLAG_PASS2){
3204         if (estimate_qp(s,1) < 0)
3205             return -1;
3206         ff_get_2pass_fcode(s);
3207     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3208         if(s->pict_type==AV_PICTURE_TYPE_B)
3209             s->lambda= s->last_lambda_for[s->pict_type];
3210         else
3211             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3212         update_qscale(s);
3213     }
3214
3215     if(s->codec_id != AV_CODEC_ID_AMV){
3216         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3217         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3218         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3219         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3220     }
3221
3222     s->mb_intra=0; //for the rate distortion & bit compare functions
3223     for(i=1; i<context_count; i++){
3224         ff_update_duplicate_context(s->thread_context[i], s);
3225     }
3226
3227     if(ff_init_me(s)<0)
3228         return -1;
3229
3230     /* Estimate motion for every MB */
3231     if(s->pict_type != AV_PICTURE_TYPE_I){
3232         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3233         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3234         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3235             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3236                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3237             }
3238         }
3239
3240         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3241     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3242         /* I-Frame */
3243         for(i=0; i<s->mb_stride*s->mb_height; i++)
3244             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3245
3246         if(!s->fixed_qscale){
3247             /* finding spatial complexity for I-frame rate control */
3248             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3249         }
3250     }
3251     for(i=1; i<context_count; i++){
3252         merge_context_after_me(s, s->thread_context[i]);
3253     }
3254     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3255     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3256     emms_c();
3257
3258     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3259         s->pict_type= AV_PICTURE_TYPE_I;
3260         for(i=0; i<s->mb_stride*s->mb_height; i++)
3261             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3262         if(s->msmpeg4_version >= 3)
3263             s->no_rounding=1;
3264         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3265                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3266     }
3267
3268     if(!s->umvplus){
3269         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3270             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3271
3272             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3273                 int a,b;
3274                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3275                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3276                 s->f_code= FFMAX3(s->f_code, a, b);
3277             }
3278
3279             ff_fix_long_p_mvs(s);
3280             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3281             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3282                 int j;
3283                 for(i=0; i<2; i++){
3284                     for(j=0; j<2; j++)
3285                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3286                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3287                 }
3288             }
3289         }
3290
3291         if(s->pict_type==AV_PICTURE_TYPE_B){
3292             int a, b;
3293
3294             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3295             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3296             s->f_code = FFMAX(a, b);
3297
3298             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3299             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3300             s->b_code = FFMAX(a, b);
3301
3302             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3303             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3304             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3305             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3306             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3307                 int dir, j;
3308                 for(dir=0; dir<2; dir++){
3309                     for(i=0; i<2; i++){
3310                         for(j=0; j<2; j++){
3311                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3312                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3313                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3314                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3315                         }
3316                     }
3317                 }
3318             }
3319         }
3320     }
3321
3322     if (estimate_qp(s, 0) < 0)
3323         return -1;
3324
3325     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3326         s->qscale= 3; //reduce clipping problems
3327
3328     if (s->out_format == FMT_MJPEG) {
3329         /* for mjpeg, we do include qscale in the matrix */
3330         for(i=1;i<64;i++){
3331             int j= s->dsp.idct_permutation[i];
3332
3333             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3334         }
3335         s->y_dc_scale_table=
3336         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3337         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3338         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3339                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3340         s->qscale= 8;
3341     }
3342     if(s->codec_id == AV_CODEC_ID_AMV){
3343         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3344         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3345         for(i=1;i<64;i++){
3346             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3347
3348             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3349             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3350         }
3351         s->y_dc_scale_table= y;
3352         s->c_dc_scale_table= c;
3353         s->intra_matrix[0] = 13;
3354         s->chroma_intra_matrix[0] = 14;
3355         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3356                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3357         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3358                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3359         s->qscale= 8;
3360     }
3361
3362     //FIXME var duplication
3363     s->current_picture_ptr->f.key_frame =
3364     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3365     s->current_picture_ptr->f.pict_type =
3366     s->current_picture.f.pict_type = s->pict_type;
3367
3368     if (s->current_picture.f.key_frame)
3369         s->picture_in_gop_number=0;
3370
3371     s->mb_x = s->mb_y = 0;
3372     s->last_bits= put_bits_count(&s->pb);
3373     switch(s->out_format) {
3374     case FMT_MJPEG:
3375         if (CONFIG_MJPEG_ENCODER)
3376             ff_mjpeg_encode_picture_header(s);
3377         break;
3378     case FMT_H261:
3379         if (CONFIG_H261_ENCODER)
3380             ff_h261_encode_picture_header(s, picture_number);
3381         break;
3382     case FMT_H263:
3383         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3384             ff_wmv2_encode_picture_header(s, picture_number);
3385         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3386             ff_msmpeg4_encode_picture_header(s, picture_number);
3387         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3388             ff_mpeg4_encode_picture_header(s, picture_number);
3389         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3390             ff_rv10_encode_picture_header(s, picture_number);
3391         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3392             ff_rv20_encode_picture_header(s, picture_number);
3393         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3394             ff_flv_encode_picture_header(s, picture_number);
3395         else if (CONFIG_H263_ENCODER)
3396             ff_h263_encode_picture_header(s, picture_number);
3397         break;
3398     case FMT_MPEG1:
3399         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3400             ff_mpeg1_encode_picture_header(s, picture_number);
3401         break;
3402     case FMT_H264:
3403         break;
3404     default:
3405         av_assert0(0);
3406     }
3407     bits= put_bits_count(&s->pb);
3408     s->header_bits= bits - s->last_bits;
3409
3410     for(i=1; i<context_count; i++){
3411         update_duplicate_context_after_me(s->thread_context[i], s);
3412     }
3413     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3414     for(i=1; i<context_count; i++){
3415         merge_context_after_encode(s, s->thread_context[i]);
3416     }
3417     emms_c();
3418     return 0;
3419 }
3420
3421 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3422     const int intra= s->mb_intra;
3423     int i;
3424
3425     s->dct_count[intra]++;
3426
3427     for(i=0; i<64; i++){
3428         int level= block[i];
3429
3430         if(level){
3431             if(level>0){
3432                 s->dct_error_sum[intra][i] += level;
3433                 level -= s->dct_offset[intra][i];
3434                 if(level<0) level=0;
3435             }else{
3436                 s->dct_error_sum[intra][i] -= level;
3437                 level += s->dct_offset[intra][i];
3438                 if(level>0) level=0;
3439             }
3440             block[i]= level;
3441         }
3442     }
3443 }
3444
3445 static int dct_quantize_trellis_c(MpegEncContext *s,
3446                                   DCTELEM *block, int n,
3447                                   int qscale, int *overflow){
3448     const int *qmat;
3449     const uint8_t *scantable= s->intra_scantable.scantable;
3450     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3451     int max=0;
3452     unsigned int threshold1, threshold2;
3453     int bias=0;
3454     int run_tab[65];
3455     int level_tab[65];
3456     int score_tab[65];
3457     int survivor[65];
3458     int survivor_count;
3459     int last_run=0;
3460     int last_level=0;
3461     int last_score= 0;
3462     int last_i;
3463     int coeff[2][64];
3464     int coeff_count[64];
3465     int qmul, qadd, start_i, last_non_zero, i, dc;
3466     const int esc_length= s->ac_esc_length;
3467     uint8_t * length;
3468     uint8_t * last_length;
3469     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3470
3471     s->dsp.fdct (block);
3472
3473     if(s->dct_error_sum)
3474         s->denoise_dct(s, block);
3475     qmul= qscale*16;
3476     qadd= ((qscale-1)|1)*8;
3477
3478     if (s->mb_intra) {
3479         int q;
3480         if (!s->h263_aic) {
3481             if (n < 4)
3482                 q = s->y_dc_scale;
3483             else
3484                 q = s->c_dc_scale;
3485             q = q << 3;
3486         } else{
3487             /* For AIC we skip quant/dequant of INTRADC */
3488             q = 1 << 3;
3489             qadd=0;
3490         }
3491
3492         /* note: block[0] is assumed to be positive */
3493         block[0] = (block[0] + (q >> 1)) / q;
3494         start_i = 1;
3495         last_non_zero = 0;
3496         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3497         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3498             bias= 1<<(QMAT_SHIFT-1);
3499         length     = s->intra_ac_vlc_length;
3500         last_length= s->intra_ac_vlc_last_length;
3501     } else {
3502         start_i = 0;
3503         last_non_zero = -1;
3504         qmat = s->q_inter_matrix[qscale];
3505         length     = s->inter_ac_vlc_length;
3506         last_length= s->inter_ac_vlc_last_length;
3507     }
3508     last_i= start_i;
3509
3510     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3511     threshold2= (threshold1<<1);
3512
3513     for(i=63; i>=start_i; i--) {
3514         const int j = scantable[i];
3515         int level = block[j] * qmat[j];
3516
3517         if(((unsigned)(level+threshold1))>threshold2){
3518             last_non_zero = i;
3519             break;
3520         }
3521     }
3522
3523     for(i=start_i; i<=last_non_zero; i++) {
3524         const int j = scantable[i];
3525         int level = block[j] * qmat[j];
3526
3527 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3528 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3529         if(((unsigned)(level+threshold1))>threshold2){
3530             if(level>0){
3531                 level= (bias + level)>>QMAT_SHIFT;
3532                 coeff[0][i]= level;
3533                 coeff[1][i]= level-1;
3534 //                coeff[2][k]= level-2;
3535             }else{
3536                 level= (bias - level)>>QMAT_SHIFT;
3537                 coeff[0][i]= -level;
3538                 coeff[1][i]= -level+1;
3539 //                coeff[2][k]= -level+2;
3540             }
3541             coeff_count[i]= FFMIN(level, 2);
3542             av_assert2(coeff_count[i]);
3543             max |=level;
3544         }else{
3545             coeff[0][i]= (level>>31)|1;
3546             coeff_count[i]= 1;
3547         }
3548     }
3549
3550     *overflow= s->max_qcoeff < max; //overflow might have happened
3551
3552     if(last_non_zero < start_i){
3553         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3554         return last_non_zero;
3555     }
3556
3557     score_tab[start_i]= 0;
3558     survivor[0]= start_i;
3559     survivor_count= 1;
3560
3561     for(i=start_i; i<=last_non_zero; i++){
3562         int level_index, j, zero_distortion;
3563         int dct_coeff= FFABS(block[ scantable[i] ]);
3564         int best_score=256*256*256*120;
3565
3566         if (s->dsp.fdct == ff_fdct_ifast)
3567             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3568         zero_distortion= dct_coeff*dct_coeff;
3569
3570         for(level_index=0; level_index < coeff_count[i]; level_index++){
3571             int distortion;
3572             int level= coeff[level_index][i];
3573             const int alevel= FFABS(level);
3574             int unquant_coeff;
3575
3576             av_assert2(level);
3577
3578             if(s->out_format == FMT_H263){
3579                 unquant_coeff= alevel*qmul + qadd;
3580             }else{ //MPEG1
3581                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3582                 if(s->mb_intra){
3583                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3584                         unquant_coeff =   (unquant_coeff - 1) | 1;
3585                 }else{
3586                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3587                         unquant_coeff =   (unquant_coeff - 1) | 1;
3588                 }
3589                 unquant_coeff<<= 3;
3590             }
3591
3592             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3593             level+=64;
3594             if((level&(~127)) == 0){
3595                 for(j=survivor_count-1; j>=0; j--){
3596                     int run= i - survivor[j];
3597                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3598                     score += score_tab[i-run];
3599
3600                     if(score < best_score){
3601                         best_score= score;
3602                         run_tab[i+1]= run;
3603                         level_tab[i+1]= level-64;
3604                     }
3605                 }
3606
3607                 if(s->out_format == FMT_H263){
3608                     for(j=survivor_count-1; j>=0; j--){
3609                         int run= i - survivor[j];
3610                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3611                         score += score_tab[i-run];
3612                         if(score < last_score){
3613                             last_score= score;
3614                             last_run= run;
3615                             last_level= level-64;
3616                             last_i= i+1;
3617                         }
3618                     }
3619                 }
3620             }else{
3621                 distortion += esc_length*lambda;
3622                 for(j=survivor_count-1; j>=0; j--){
3623                     int run= i - survivor[j];
3624                     int score= distortion + score_tab[i-run];
3625
3626                     if(score < best_score){
3627                         best_score= score;
3628                         run_tab[i+1]= run;
3629                         level_tab[i+1]= level-64;
3630                     }
3631                 }
3632
3633                 if(s->out_format == FMT_H263){
3634                   for(j=survivor_count-1; j>=0; j--){
3635                         int run= i - survivor[j];
3636                         int score= distortion + score_tab[i-run];
3637                         if(score < last_score){
3638                             last_score= score;
3639                             last_run= run;
3640                             last_level= level-64;
3641                             last_i= i+1;
3642                         }
3643                     }
3644                 }
3645             }
3646         }
3647
3648         score_tab[i+1]= best_score;
3649
3650         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3651         if(last_non_zero <= 27){
3652             for(; survivor_count; survivor_count--){
3653                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3654                     break;
3655             }
3656         }else{
3657             for(; survivor_count; survivor_count--){
3658                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3659                     break;
3660             }
3661         }
3662
3663         survivor[ survivor_count++ ]= i+1;
3664     }
3665
3666     if(s->out_format != FMT_H263){
3667         last_score= 256*256*256*120;
3668         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3669             int score= score_tab[i];
3670             if(i) score += lambda*2; //FIXME exacter?
3671
3672             if(score < last_score){
3673                 last_score= score;
3674                 last_i= i;
3675                 last_level= level_tab[i];
3676                 last_run= run_tab[i];
3677             }
3678         }
3679     }
3680
3681     s->coded_score[n] = last_score;
3682
3683     dc= FFABS(block[0]);
3684     last_non_zero= last_i - 1;
3685     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3686
3687     if(last_non_zero < start_i)
3688         return last_non_zero;
3689
3690     if(last_non_zero == 0 && start_i == 0){
3691         int best_level= 0;
3692         int best_score= dc * dc;
3693
3694         for(i=0; i<coeff_count[0]; i++){
3695             int level= coeff[i][0];
3696             int alevel= FFABS(level);
3697             int unquant_coeff, score, distortion;
3698
3699             if(s->out_format == FMT_H263){
3700                     unquant_coeff= (alevel*qmul + qadd)>>3;
3701             }else{ //MPEG1
3702                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3703                     unquant_coeff =   (unquant_coeff - 1) | 1;
3704             }
3705             unquant_coeff = (unquant_coeff + 4) >> 3;
3706             unquant_coeff<<= 3 + 3;
3707
3708             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3709             level+=64;
3710             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3711             else                    score= distortion + esc_length*lambda;
3712
3713             if(score < best_score){
3714                 best_score= score;
3715                 best_level= level - 64;
3716             }
3717         }
3718         block[0]= best_level;
3719         s->coded_score[n] = best_score - dc*dc;
3720         if(best_level == 0) return -1;
3721         else                return last_non_zero;
3722     }
3723
3724     i= last_i;
3725     av_assert2(last_level);
3726
3727     block[ perm_scantable[last_non_zero] ]= last_level;
3728     i -= last_run + 1;
3729
3730     for(; i>start_i; i -= run_tab[i] + 1){
3731         block[ perm_scantable[i-1] ]= level_tab[i];
3732     }
3733
3734     return last_non_zero;
3735 }
3736
3737 //#define REFINE_STATS 1
3738 static int16_t basis[64][64];
3739
3740 static void build_basis(uint8_t *perm){
3741     int i, j, x, y;
3742     emms_c();
3743     for(i=0; i<8; i++){
3744         for(j=0; j<8; j++){
3745             for(y=0; y<8; y++){
3746                 for(x=0; x<8; x++){
3747                     double s= 0.25*(1<<BASIS_SHIFT);
3748                     int index= 8*i + j;
3749                     int perm_index= perm[index];
3750                     if(i==0) s*= sqrt(0.5);
3751                     if(j==0) s*= sqrt(0.5);
3752                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3753                 }
3754             }
3755         }
3756     }
3757 }
3758
3759 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3760                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3761                         int n, int qscale){
3762     int16_t rem[64];
3763     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3764     const uint8_t *scantable= s->intra_scantable.scantable;
3765     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3766 //    unsigned int threshold1, threshold2;
3767 //    int bias=0;
3768     int run_tab[65];
3769     int prev_run=0;
3770     int prev_level=0;
3771     int qmul, qadd, start_i, last_non_zero, i, dc;
3772     uint8_t * length;
3773     uint8_t * last_length;
3774     int lambda;
3775     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3776 #ifdef REFINE_STATS
3777 static int count=0;
3778 static int after_last=0;
3779 static int to_zero=0;
3780 static int from_zero=0;
3781 static int raise=0;
3782 static int lower=0;
3783 static int messed_sign=0;
3784 #endif
3785
3786     if(basis[0][0] == 0)
3787         build_basis(s->dsp.idct_permutation);
3788
3789     qmul= qscale*2;
3790     qadd= (qscale-1)|1;
3791     if (s->mb_intra) {
3792         if (!s->h263_aic) {
3793             if (n < 4)
3794                 q = s->y_dc_scale;
3795             else
3796                 q = s->c_dc_scale;
3797         } else{
3798             /* For AIC we skip quant/dequant of INTRADC */
3799             q = 1;
3800             qadd=0;
3801         }
3802         q <<= RECON_SHIFT-3;
3803         /* note: block[0] is assumed to be positive */
3804         dc= block[0]*q;
3805 //        block[0] = (block[0] + (q >> 1)) / q;
3806         start_i = 1;
3807 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3808 //            bias= 1<<(QMAT_SHIFT-1);
3809         length     = s->intra_ac_vlc_length;
3810         last_length= s->intra_ac_vlc_last_length;
3811     } else {
3812         dc= 0;
3813         start_i = 0;
3814         length     = s->inter_ac_vlc_length;
3815         last_length= s->inter_ac_vlc_last_length;
3816     }
3817     last_non_zero = s->block_last_index[n];
3818
3819 #ifdef REFINE_STATS
3820 {START_TIMER
3821 #endif
3822     dc += (1<<(RECON_SHIFT-1));
3823     for(i=0; i<64; i++){
3824         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3825     }
3826 #ifdef REFINE_STATS
3827 STOP_TIMER("memset rem[]")}
3828 #endif
3829     sum=0;
3830     for(i=0; i<64; i++){
3831         int one= 36;
3832         int qns=4;
3833         int w;
3834
3835         w= FFABS(weight[i]) + qns*one;
3836         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3837
3838         weight[i] = w;
3839 //        w=weight[i] = (63*qns + (w/2)) / w;
3840
3841         av_assert2(w>0);
3842         av_assert2(w<(1<<6));
3843         sum += w*w;
3844     }
3845     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3846 #ifdef REFINE_STATS
3847 {START_TIMER
3848 #endif
3849     run=0;
3850     rle_index=0;
3851     for(i=start_i; i<=last_non_zero; i++){
3852         int j= perm_scantable[i];
3853         const int level= block[j];
3854         int coeff;
3855
3856         if(level){
3857             if(level<0) coeff= qmul*level - qadd;
3858             else        coeff= qmul*level + qadd;
3859             run_tab[rle_index++]=run;
3860             run=0;
3861
3862             s->dsp.add_8x8basis(rem, basis[j], coeff);
3863         }else{
3864             run++;
3865         }
3866     }
3867 #ifdef REFINE_STATS
3868 if(last_non_zero>0){
3869 STOP_TIMER("init rem[]")
3870 }
3871 }
3872
3873 {START_TIMER
3874 #endif
3875     for(;;){
3876         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3877         int best_coeff=0;
3878         int best_change=0;
3879         int run2, best_unquant_change=0, analyze_gradient;
3880 #ifdef REFINE_STATS
3881 {START_TIMER
3882 #endif
3883         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3884
3885         if(analyze_gradient){
3886 #ifdef REFINE_STATS
3887 {START_TIMER
3888 #endif
3889             for(i=0; i<64; i++){
3890                 int w= weight[i];
3891
3892                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3893             }
3894 #ifdef REFINE_STATS
3895 STOP_TIMER("rem*w*w")}
3896 {START_TIMER
3897 #endif
3898             s->dsp.fdct(d1);
3899 #ifdef REFINE_STATS
3900 STOP_TIMER("dct")}
3901 #endif
3902         }
3903
3904         if(start_i){
3905             const int level= block[0];
3906             int change, old_coeff;
3907
3908             av_assert2(s->mb_intra);
3909
3910             old_coeff= q*level;
3911
3912             for(change=-1; change<=1; change+=2){
3913                 int new_level= level + change;
3914                 int score, new_coeff;
3915
3916                 new_coeff= q*new_level;
3917                 if(new_coeff >= 2048 || new_coeff < 0)
3918                     continue;
3919
3920                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3921                 if(score<best_score){
3922                     best_score= score;
3923                     best_coeff= 0;
3924                     best_change= change;
3925                     best_unquant_change= new_coeff - old_coeff;
3926                 }
3927             }
3928         }
3929
3930         run=0;
3931         rle_index=0;
3932         run2= run_tab[rle_index++];
3933         prev_level=0;
3934         prev_run=0;
3935
3936         for(i=start_i; i<64; i++){
3937             int j= perm_scantable[i];
3938             const int level= block[j];
3939             int change, old_coeff;
3940
3941             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3942                 break;
3943
3944             if(level){
3945                 if(level<0) old_coeff= qmul*level - qadd;
3946                 else        old_coeff= qmul*level + qadd;
3947                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3948             }else{
3949                 old_coeff=0;
3950                 run2--;
3951                 av_assert2(run2>=0 || i >= last_non_zero );
3952             }
3953
3954             for(change=-1; change<=1; change+=2){
3955                 int new_level= level + change;
3956                 int score, new_coeff, unquant_change;
3957
3958                 score=0;
3959                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3960                    continue;
3961
3962                 if(new_level){
3963                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3964                     else            new_coeff= qmul*new_level + qadd;
3965                     if(new_coeff >= 2048 || new_coeff <= -2048)
3966                         continue;
3967                     //FIXME check for overflow
3968
3969                     if(level){
3970                         if(level < 63 && level > -63){
3971                             if(i < last_non_zero)
3972                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3973                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3974                             else
3975                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3976                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3977                         }
3978                     }else{
3979                         av_assert2(FFABS(new_level)==1);
3980
3981                         if(analyze_gradient){
3982                             int g= d1[ scantable[i] ];
3983                             if(g && (g^new_level) >= 0)
3984                                 continue;
3985                         }
3986
3987                         if(i < last_non_zero){
3988                             int next_i= i + run2 + 1;
3989                             int next_level= block[ perm_scantable[next_i] ] + 64;
3990
3991                             if(next_level&(~127))
3992                                 next_level= 0;
3993
3994                             if(next_i < last_non_zero)
3995                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3996                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3997                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3998                             else
3999                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4000                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4001                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4002                         }else{
4003                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4004                             if(prev_level){
4005                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4006                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4007                             }
4008                         }
4009                     }
4010                 }else{
4011                     new_coeff=0;
4012                     av_assert2(FFABS(level)==1);
4013
4014                     if(i < last_non_zero){
4015                         int next_i= i + run2 + 1;
4016                         int next_level= block[ perm_scantable[next_i] ] + 64;
4017
4018                         if(next_level&(~127))
4019                             next_level= 0;
4020
4021                         if(next_i < last_non_zero)
4022                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4023                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4024                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4025                         else
4026                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4027                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4028                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4029                     }else{
4030                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4031                         if(prev_level){
4032                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4033                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4034                         }
4035                     }
4036                 }
4037
4038                 score *= lambda;
4039
4040                 unquant_change= new_coeff - old_coeff;
4041                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4042
4043                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4044                 if(score<best_score){
4045                     best_score= score;
4046                     best_coeff= i;
4047                     best_change= change;
4048                     best_unquant_change= unquant_change;
4049                 }
4050             }
4051             if(level){
4052                 prev_level= level + 64;
4053                 if(prev_level&(~127))
4054                     prev_level= 0;
4055                 prev_run= run;
4056                 run=0;
4057             }else{
4058                 run++;
4059             }
4060         }
4061 #ifdef REFINE_STATS
4062 STOP_TIMER("iterative step")}
4063 #endif
4064
4065         if(best_change){
4066             int j= perm_scantable[ best_coeff ];
4067
4068             block[j] += best_change;
4069
4070             if(best_coeff > last_non_zero){
4071                 last_non_zero= best_coeff;
4072                 av_assert2(block[j]);
4073 #ifdef REFINE_STATS
4074 after_last++;
4075 #endif
4076             }else{
4077 #ifdef REFINE_STATS
4078 if(block[j]){
4079     if(block[j] - best_change){
4080         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4081             raise++;
4082         }else{
4083             lower++;
4084         }
4085     }else{
4086         from_zero++;
4087     }
4088 }else{
4089     to_zero++;
4090 }
4091 #endif
4092                 for(; last_non_zero>=start_i; last_non_zero--){
4093                     if(block[perm_scantable[last_non_zero]])
4094                         break;
4095                 }
4096             }
4097 #ifdef REFINE_STATS
4098 count++;
4099 if(256*256*256*64 % count == 0){
4100     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4101 }
4102 #endif
4103             run=0;
4104             rle_index=0;
4105             for(i=start_i; i<=last_non_zero; i++){
4106                 int j= perm_scantable[i];
4107                 const int level= block[j];
4108
4109                  if(level){
4110                      run_tab[rle_index++]=run;
4111                      run=0;
4112                  }else{
4113                      run++;
4114                  }
4115             }
4116
4117             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4118         }else{
4119             break;
4120         }
4121     }
4122 #ifdef REFINE_STATS
4123 if(last_non_zero>0){
4124 STOP_TIMER("iterative search")
4125 }
4126 }
4127 #endif
4128
4129     return last_non_zero;
4130 }
4131
4132 int ff_dct_quantize_c(MpegEncContext *s,
4133                         DCTELEM *block, int n,
4134                         int qscale, int *overflow)
4135 {
4136     int i, j, level, last_non_zero, q, start_i;
4137     const int *qmat;
4138     const uint8_t *scantable= s->intra_scantable.scantable;
4139     int bias;
4140     int max=0;
4141     unsigned int threshold1, threshold2;
4142
4143     s->dsp.fdct (block);
4144
4145     if(s->dct_error_sum)
4146         s->denoise_dct(s, block);
4147
4148     if (s->mb_intra) {
4149         if (!s->h263_aic) {
4150             if (n < 4)
4151                 q = s->y_dc_scale;
4152             else
4153                 q = s->c_dc_scale;
4154             q = q << 3;
4155         } else
4156             /* For AIC we skip quant/dequant of INTRADC */
4157             q = 1 << 3;
4158
4159         /* note: block[0] is assumed to be positive */
4160         block[0] = (block[0] + (q >> 1)) / q;
4161         start_i = 1;
4162         last_non_zero = 0;
4163         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4164         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4165     } else {
4166         start_i = 0;
4167         last_non_zero = -1;
4168         qmat = s->q_inter_matrix[qscale];
4169         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4170     }
4171     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4172     threshold2= (threshold1<<1);
4173     for(i=63;i>=start_i;i--) {
4174         j = scantable[i];
4175         level = block[j] * qmat[j];
4176
4177         if(((unsigned)(level+threshold1))>threshold2){
4178             last_non_zero = i;
4179             break;
4180         }else{
4181             block[j]=0;
4182         }
4183     }
4184     for(i=start_i; i<=last_non_zero; i++) {
4185         j = scantable[i];
4186         level = block[j] * qmat[j];
4187
4188 //        if(   bias+level >= (1<<QMAT_SHIFT)
4189 //           || bias-level >= (1<<QMAT_SHIFT)){
4190         if(((unsigned)(level+threshold1))>threshold2){
4191             if(level>0){
4192                 level= (bias + level)>>QMAT_SHIFT;
4193                 block[j]= level;
4194             }else{
4195                 level= (bias - level)>>QMAT_SHIFT;
4196                 block[j]= -level;
4197             }
4198             max |=level;
4199         }else{
4200             block[j]=0;
4201         }
4202     }
4203     *overflow= s->max_qcoeff < max; //overflow might have happened
4204
4205     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4206     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4207         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4208
4209     return last_non_zero;
4210 }
4211
4212 #define OFFSET(x) offsetof(MpegEncContext, x)
4213 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4214 static const AVOption h263_options[] = {
4215     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4216     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4217     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4218     FF_MPV_COMMON_OPTS
4219     { NULL },
4220 };
4221
4222 static const AVClass h263_class = {
4223     .class_name = "H.263 encoder",
4224     .item_name  = av_default_item_name,
4225     .option     = h263_options,
4226     .version    = LIBAVUTIL_VERSION_INT,
4227 };
4228
4229 AVCodec ff_h263_encoder = {
4230     .name           = "h263",
4231     .type           = AVMEDIA_TYPE_VIDEO,
4232     .id             = AV_CODEC_ID_H263,
4233     .priv_data_size = sizeof(MpegEncContext),
4234     .init           = ff_MPV_encode_init,
4235     .encode2        = ff_MPV_encode_picture,
4236     .close          = ff_MPV_encode_end,
4237     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4238     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4239     .priv_class     = &h263_class,
4240 };
4241
4242 static const AVOption h263p_options[] = {
4243     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4244     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4245     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4246     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4247     FF_MPV_COMMON_OPTS
4248     { NULL },
4249 };
4250 static const AVClass h263p_class = {
4251     .class_name = "H.263p encoder",
4252     .item_name  = av_default_item_name,
4253     .option     = h263p_options,
4254     .version    = LIBAVUTIL_VERSION_INT,
4255 };
4256
4257 AVCodec ff_h263p_encoder = {
4258     .name           = "h263p",
4259     .type           = AVMEDIA_TYPE_VIDEO,
4260     .id             = AV_CODEC_ID_H263P,
4261     .priv_data_size = sizeof(MpegEncContext),
4262     .init           = ff_MPV_encode_init,
4263     .encode2        = ff_MPV_encode_picture,
4264     .close          = ff_MPV_encode_end,
4265     .capabilities   = CODEC_CAP_SLICE_THREADS,
4266     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4267     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4268     .priv_class     = &h263p_class,
4269 };
4270
4271 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4272
4273 AVCodec ff_msmpeg4v2_encoder = {
4274     .name           = "msmpeg4v2",
4275     .type           = AVMEDIA_TYPE_VIDEO,
4276     .id             = AV_CODEC_ID_MSMPEG4V2,
4277     .priv_data_size = sizeof(MpegEncContext),
4278     .init           = ff_MPV_encode_init,
4279     .encode2        = ff_MPV_encode_picture,
4280     .close          = ff_MPV_encode_end,
4281     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4282     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4283     .priv_class     = &msmpeg4v2_class,
4284 };
4285
4286 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4287
4288 AVCodec ff_msmpeg4v3_encoder = {
4289     .name           = "msmpeg4",
4290     .type           = AVMEDIA_TYPE_VIDEO,
4291     .id             = AV_CODEC_ID_MSMPEG4V3,
4292     .priv_data_size = sizeof(MpegEncContext),
4293     .init           = ff_MPV_encode_init,
4294     .encode2        = ff_MPV_encode_picture,
4295     .close          = ff_MPV_encode_end,
4296     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4297     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4298     .priv_class     = &msmpeg4v3_class,
4299 };
4300
4301 FF_MPV_GENERIC_CLASS(wmv1)
4302
4303 AVCodec ff_wmv1_encoder = {
4304     .name           = "wmv1",
4305     .type           = AVMEDIA_TYPE_VIDEO,
4306     .id             = AV_CODEC_ID_WMV1,
4307     .priv_data_size = sizeof(MpegEncContext),
4308     .init           = ff_MPV_encode_init,
4309     .encode2        = ff_MPV_encode_picture,
4310     .close          = ff_MPV_encode_end,
4311     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4312     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4313     .priv_class     = &wmv1_class,
4314 };