]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
d527ace6dda6921b3947cd294ecbf2979e4ac3a9
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "h263.h"
37 #include "mjpegenc.h"
38 #include "msmpeg4.h"
39 #include "faandct.h"
40 #include "thread.h"
41 #include "aandcttab.h"
42 #include "flv.h"
43 #include "mpeg4video.h"
44 #include "internal.h"
45 #include "bytestream.h"
46 #include <limits.h>
47
48 //#undef NDEBUG
49 //#include <assert.h>
50
51 static int encode_picture(MpegEncContext *s, int picture_number);
52 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
53 static int sse_mb(MpegEncContext *s);
54 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
55 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
56
57 /* enable all paranoid tests for rounding, overflows, etc... */
58 //#define PARANOID
59
60 //#define DEBUG
61
62 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
63 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
64
65 const AVOption ff_mpv_generic_options[] = {
66     FF_MPV_COMMON_OPTS
67     { NULL },
68 };
69
70 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
71                        uint16_t (*qmat16)[2][64],
72                        const uint16_t *quant_matrix,
73                        int bias, int qmin, int qmax, int intra)
74 {
75     int qscale;
76     int shift = 0;
77
78     for (qscale = qmin; qscale <= qmax; qscale++) {
79         int i;
80         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
81             dsp->fdct == ff_jpeg_fdct_islow_10 ||
82             dsp->fdct == ff_faandct) {
83             for (i = 0; i < 64; i++) {
84                 const int j = dsp->idct_permutation[i];
85                 /* 16 <= qscale * quant_matrix[i] <= 7905
86                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
87                  *             19952 <=              x  <= 249205026
88                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
89                  *           3444240 >= (1 << 36) / (x) >= 275 */
90
91                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
92                                         (qscale * quant_matrix[j]));
93             }
94         } else if (dsp->fdct == ff_fdct_ifast) {
95             for (i = 0; i < 64; i++) {
96                 const int j = dsp->idct_permutation[i];
97                 /* 16 <= qscale * quant_matrix[i] <= 7905
98                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
99                  *             19952 <=              x  <= 249205026
100                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
101                  *           3444240 >= (1 << 36) / (x) >= 275 */
102
103                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
104                                         (ff_aanscales[i] * qscale *
105                                          quant_matrix[j]));
106             }
107         } else {
108             for (i = 0; i < 64; i++) {
109                 const int j = dsp->idct_permutation[i];
110                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
111                  * Assume x = qscale * quant_matrix[i]
112                  * So             16 <=              x  <= 7905
113                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
114                  * so          32768 >= (1 << 19) / (x) >= 67 */
115                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
116                                         (qscale * quant_matrix[j]));
117                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
118                 //                    (qscale * quant_matrix[i]);
119                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
120                                        (qscale * quant_matrix[j]);
121
122                 if (qmat16[qscale][0][i] == 0 ||
123                     qmat16[qscale][0][i] == 128 * 256)
124                     qmat16[qscale][0][i] = 128 * 256 - 1;
125                 qmat16[qscale][1][i] =
126                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
127                                 qmat16[qscale][0][i]);
128             }
129         }
130
131         for (i = intra; i < 64; i++) {
132             int64_t max = 8191;
133             if (dsp->fdct == ff_fdct_ifast) {
134                 max = (8191LL * ff_aanscales[i]) >> 14;
135             }
136             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
137                 shift++;
138             }
139         }
140     }
141     if (shift) {
142         av_log(NULL, AV_LOG_INFO,
143                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
144                QMAT_SHIFT - shift);
145     }
146 }
147
148 static inline void update_qscale(MpegEncContext *s)
149 {
150     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
151                 (FF_LAMBDA_SHIFT + 7);
152     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
153
154     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
155                  FF_LAMBDA_SHIFT;
156 }
157
158 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
159 {
160     int i;
161
162     if (matrix) {
163         put_bits(pb, 1, 1);
164         for (i = 0; i < 64; i++) {
165             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
166         }
167     } else
168         put_bits(pb, 1, 0);
169 }
170
171 /**
172  * init s->current_picture.qscale_table from s->lambda_table
173  */
174 void ff_init_qscale_tab(MpegEncContext *s)
175 {
176     int8_t * const qscale_table = s->current_picture.f.qscale_table;
177     int i;
178
179     for (i = 0; i < s->mb_num; i++) {
180         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
181         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
182         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
183                                                   s->avctx->qmax);
184     }
185 }
186
187 static void copy_picture_attributes(MpegEncContext *s,
188                                     AVFrame *dst,
189                                     AVFrame *src)
190 {
191     int i;
192
193     dst->pict_type              = src->pict_type;
194     dst->quality                = src->quality;
195     dst->coded_picture_number   = src->coded_picture_number;
196     dst->display_picture_number = src->display_picture_number;
197     //dst->reference              = src->reference;
198     dst->pts                    = src->pts;
199     dst->interlaced_frame       = src->interlaced_frame;
200     dst->top_field_first        = src->top_field_first;
201
202     if (s->avctx->me_threshold) {
203         if (!src->motion_val[0])
204             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
205         if (!src->mb_type)
206             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
207         if (!src->ref_index[0])
208             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
209         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
210             av_log(s->avctx, AV_LOG_ERROR,
211                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
212                    src->motion_subsample_log2, dst->motion_subsample_log2);
213
214         memcpy(dst->mb_type, src->mb_type,
215                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
216
217         for (i = 0; i < 2; i++) {
218             int stride = ((16 * s->mb_width ) >>
219                           src->motion_subsample_log2) + 1;
220             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
221
222             if (src->motion_val[i] &&
223                 src->motion_val[i] != dst->motion_val[i]) {
224                 memcpy(dst->motion_val[i], src->motion_val[i],
225                        2 * stride * height * sizeof(int16_t));
226             }
227             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
228                 memcpy(dst->ref_index[i], src->ref_index[i],
229                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
230             }
231         }
232     }
233 }
234
235 static void update_duplicate_context_after_me(MpegEncContext *dst,
236                                               MpegEncContext *src)
237 {
238 #define COPY(a) dst->a= src->a
239     COPY(pict_type);
240     COPY(current_picture);
241     COPY(f_code);
242     COPY(b_code);
243     COPY(qscale);
244     COPY(lambda);
245     COPY(lambda2);
246     COPY(picture_in_gop_number);
247     COPY(gop_picture_number);
248     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
249     COPY(progressive_frame);    // FIXME don't set in encode_header
250     COPY(partitioned_frame);    // FIXME don't set in encode_header
251 #undef COPY
252 }
253
254 /**
255  * Set the given MpegEncContext to defaults for encoding.
256  * the changed fields will not depend upon the prior state of the MpegEncContext.
257  */
258 static void MPV_encode_defaults(MpegEncContext *s)
259 {
260     int i;
261     ff_MPV_common_defaults(s);
262
263     for (i = -16; i < 16; i++) {
264         default_fcode_tab[i + MAX_MV] = 1;
265     }
266     s->me.mv_penalty = default_mv_penalty;
267     s->fcode_tab     = default_fcode_tab;
268 }
269
270 /* init video encoder */
271 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
272 {
273     MpegEncContext *s = avctx->priv_data;
274     int i;
275     int chroma_h_shift, chroma_v_shift;
276
277     MPV_encode_defaults(s);
278
279     switch (avctx->codec_id) {
280     case AV_CODEC_ID_MPEG2VIDEO:
281         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
282             avctx->pix_fmt != PIX_FMT_YUV422P) {
283             av_log(avctx, AV_LOG_ERROR,
284                    "only YUV420 and YUV422 are supported\n");
285             return -1;
286         }
287         break;
288     case AV_CODEC_ID_LJPEG:
289         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
290             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
291             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
292             avctx->pix_fmt != PIX_FMT_BGRA     &&
293             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
294               avctx->pix_fmt != PIX_FMT_YUV422P &&
295               avctx->pix_fmt != PIX_FMT_YUV444P) ||
296              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
297             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
298             return -1;
299         }
300         break;
301     case AV_CODEC_ID_MJPEG:
302         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
303             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
304             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
305               avctx->pix_fmt != PIX_FMT_YUV422P) ||
306              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
307             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
308             return -1;
309         }
310         break;
311     default:
312         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
313             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
314             return -1;
315         }
316     }
317
318     switch (avctx->pix_fmt) {
319     case PIX_FMT_YUVJ422P:
320     case PIX_FMT_YUV422P:
321         s->chroma_format = CHROMA_422;
322         break;
323     case PIX_FMT_YUVJ420P:
324     case PIX_FMT_YUV420P:
325     default:
326         s->chroma_format = CHROMA_420;
327         break;
328     }
329
330     s->bit_rate = avctx->bit_rate;
331     s->width    = avctx->width;
332     s->height   = avctx->height;
333     if (avctx->gop_size > 600 &&
334         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
335         av_log(avctx, AV_LOG_ERROR,
336                "Warning keyframe interval too large! reducing it ...\n");
337         avctx->gop_size = 600;
338     }
339     s->gop_size     = avctx->gop_size;
340     s->avctx        = avctx;
341     s->flags        = avctx->flags;
342     s->flags2       = avctx->flags2;
343     s->max_b_frames = avctx->max_b_frames;
344     s->codec_id     = avctx->codec->id;
345 #if FF_API_MPV_GLOBAL_OPTS
346     if (avctx->luma_elim_threshold)
347         s->luma_elim_threshold   = avctx->luma_elim_threshold;
348     if (avctx->chroma_elim_threshold)
349         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
350 #endif
351     s->strict_std_compliance = avctx->strict_std_compliance;
352     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
353     s->mpeg_quant         = avctx->mpeg_quant;
354     s->rtp_mode           = !!avctx->rtp_payload_size;
355     s->intra_dc_precision = avctx->intra_dc_precision;
356     s->user_specified_pts = AV_NOPTS_VALUE;
357
358     if (s->gop_size <= 1) {
359         s->intra_only = 1;
360         s->gop_size   = 12;
361     } else {
362         s->intra_only = 0;
363     }
364
365     s->me_method = avctx->me_method;
366
367     /* Fixed QSCALE */
368     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
369
370 #if FF_API_MPV_GLOBAL_OPTS
371     if (s->flags & CODEC_FLAG_QP_RD)
372         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
373 #endif
374
375     s->adaptive_quant = (s->avctx->lumi_masking ||
376                          s->avctx->dark_masking ||
377                          s->avctx->temporal_cplx_masking ||
378                          s->avctx->spatial_cplx_masking  ||
379                          s->avctx->p_masking      ||
380                          s->avctx->border_masking ||
381                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
382                         !s->fixed_qscale;
383
384     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
385
386     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
387         av_log(avctx, AV_LOG_ERROR,
388                "a vbv buffer size is needed, "
389                "for encoding with a maximum bitrate\n");
390         return -1;
391     }
392
393     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
394         av_log(avctx, AV_LOG_INFO,
395                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
396     }
397
398     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
399         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
400         return -1;
401     }
402
403     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
404         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
405         return -1;
406     }
407
408     if (avctx->rc_max_rate &&
409         avctx->rc_max_rate == avctx->bit_rate &&
410         avctx->rc_max_rate != avctx->rc_min_rate) {
411         av_log(avctx, AV_LOG_INFO,
412                "impossible bitrate constraints, this will fail\n");
413     }
414
415     if (avctx->rc_buffer_size &&
416         avctx->bit_rate * (int64_t)avctx->time_base.num >
417             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
418         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
419         return -1;
420     }
421
422     if (!s->fixed_qscale &&
423         avctx->bit_rate * av_q2d(avctx->time_base) >
424             avctx->bit_rate_tolerance) {
425         av_log(avctx, AV_LOG_ERROR,
426                "bitrate tolerance too small for bitrate\n");
427         return -1;
428     }
429
430     if (s->avctx->rc_max_rate &&
431         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
432         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
433          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
434         90000LL * (avctx->rc_buffer_size - 1) >
435             s->avctx->rc_max_rate * 0xFFFFLL) {
436         av_log(avctx, AV_LOG_INFO,
437                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
438                "specified vbv buffer is too large for the given bitrate!\n");
439     }
440
441     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
442         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
443         s->codec_id != AV_CODEC_ID_FLV1) {
444         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
445         return -1;
446     }
447
448     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
449         av_log(avctx, AV_LOG_ERROR,
450                "OBMC is only supported with simple mb decision\n");
451         return -1;
452     }
453
454     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
455         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
456         return -1;
457     }
458
459     if (s->max_b_frames                    &&
460         s->codec_id != AV_CODEC_ID_MPEG4      &&
461         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
462         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
463         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
464         return -1;
465     }
466
467     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
468          s->codec_id == AV_CODEC_ID_H263  ||
469          s->codec_id == AV_CODEC_ID_H263P) &&
470         (avctx->sample_aspect_ratio.num > 255 ||
471          avctx->sample_aspect_ratio.den > 255)) {
472         av_log(avctx, AV_LOG_ERROR,
473                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
474                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
475         return -1;
476     }
477
478     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
479         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
480         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
481         return -1;
482     }
483
484     // FIXME mpeg2 uses that too
485     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
486         av_log(avctx, AV_LOG_ERROR,
487                "mpeg2 style quantization not supported by codec\n");
488         return -1;
489     }
490
491 #if FF_API_MPV_GLOBAL_OPTS
492     if (s->flags & CODEC_FLAG_CBP_RD)
493         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
494 #endif
495
496     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
497         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
498         return -1;
499     }
500
501     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
502         s->avctx->mb_decision != FF_MB_DECISION_RD) {
503         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
504         return -1;
505     }
506
507     if (s->avctx->scenechange_threshold < 1000000000 &&
508         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
509         av_log(avctx, AV_LOG_ERROR,
510                "closed gop with scene change detection are not supported yet, "
511                "set threshold to 1000000000\n");
512         return -1;
513     }
514
515     if (s->flags & CODEC_FLAG_LOW_DELAY) {
516         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
517             av_log(avctx, AV_LOG_ERROR,
518                   "low delay forcing is only available for mpeg2\n");
519             return -1;
520         }
521         if (s->max_b_frames != 0) {
522             av_log(avctx, AV_LOG_ERROR,
523                    "b frames cannot be used with low delay\n");
524             return -1;
525         }
526     }
527
528     if (s->q_scale_type == 1) {
529         if (avctx->qmax > 12) {
530             av_log(avctx, AV_LOG_ERROR,
531                    "non linear quant only supports qmax <= 12 currently\n");
532             return -1;
533         }
534     }
535
536     if (s->avctx->thread_count > 1         &&
537         s->codec_id != AV_CODEC_ID_MPEG4      &&
538         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
539         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
540         (s->codec_id != AV_CODEC_ID_H263P)) {
541         av_log(avctx, AV_LOG_ERROR,
542                "multi threaded encoding not supported by codec\n");
543         return -1;
544     }
545
546     if (s->avctx->thread_count < 1) {
547         av_log(avctx, AV_LOG_ERROR,
548                "automatic thread number detection not supported by codec,"
549                "patch welcome\n");
550         return -1;
551     }
552
553     if (s->avctx->thread_count > 1)
554         s->rtp_mode = 1;
555
556     if (!avctx->time_base.den || !avctx->time_base.num) {
557         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
558         return -1;
559     }
560
561     i = (INT_MAX / 2 + 128) >> 8;
562     if (avctx->me_threshold >= i) {
563         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
564                i - 1);
565         return -1;
566     }
567     if (avctx->mb_threshold >= i) {
568         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
569                i - 1);
570         return -1;
571     }
572
573     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
574         av_log(avctx, AV_LOG_INFO,
575                "notice: b_frame_strategy only affects the first pass\n");
576         avctx->b_frame_strategy = 0;
577     }
578
579     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
580     if (i > 1) {
581         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
582         avctx->time_base.den /= i;
583         avctx->time_base.num /= i;
584         //return -1;
585     }
586
587     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
588         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
589         // (a + x * 3 / 8) / x
590         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
591         s->inter_quant_bias = 0;
592     } else {
593         s->intra_quant_bias = 0;
594         // (a - x / 4) / x
595         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
596     }
597
598     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
599         s->intra_quant_bias = avctx->intra_quant_bias;
600     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
601         s->inter_quant_bias = avctx->inter_quant_bias;
602
603     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
604                                   &chroma_v_shift);
605
606     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
607         s->avctx->time_base.den > (1 << 16) - 1) {
608         av_log(avctx, AV_LOG_ERROR,
609                "timebase %d/%d not supported by MPEG 4 standard, "
610                "the maximum admitted value for the timebase denominator "
611                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
612                (1 << 16) - 1);
613         return -1;
614     }
615     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
616
617 #if FF_API_MPV_GLOBAL_OPTS
618     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
619         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
620     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
621         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
622     if (avctx->quantizer_noise_shaping)
623         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
624 #endif
625
626     switch (avctx->codec->id) {
627     case AV_CODEC_ID_MPEG1VIDEO:
628         s->out_format = FMT_MPEG1;
629         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
630         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
631         break;
632     case AV_CODEC_ID_MPEG2VIDEO:
633         s->out_format = FMT_MPEG1;
634         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
635         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
636         s->rtp_mode   = 1;
637         break;
638     case AV_CODEC_ID_LJPEG:
639     case AV_CODEC_ID_MJPEG:
640         s->out_format = FMT_MJPEG;
641         s->intra_only = 1; /* force intra only for jpeg */
642         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
643             avctx->pix_fmt   == PIX_FMT_BGRA) {
644             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
645             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
646             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
647         } else {
648             s->mjpeg_vsample[0] = 2;
649             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
650             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
651             s->mjpeg_hsample[0] = 2;
652             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
653             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
654         }
655         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
656             ff_mjpeg_encode_init(s) < 0)
657             return -1;
658         avctx->delay = 0;
659         s->low_delay = 1;
660         break;
661     case AV_CODEC_ID_H261:
662         if (!CONFIG_H261_ENCODER)
663             return -1;
664         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
665             av_log(avctx, AV_LOG_ERROR,
666                    "The specified picture size of %dx%d is not valid for the "
667                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
668                     s->width, s->height);
669             return -1;
670         }
671         s->out_format = FMT_H261;
672         avctx->delay  = 0;
673         s->low_delay  = 1;
674         break;
675     case AV_CODEC_ID_H263:
676         if (!CONFIG_H263_ENCODER)
677         return -1;
678         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
679                              s->width, s->height) == 8) {
680             av_log(avctx, AV_LOG_INFO,
681                    "The specified picture size of %dx%d is not valid for "
682                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
683                    "352x288, 704x576, and 1408x1152."
684                    "Try H.263+.\n", s->width, s->height);
685             return -1;
686         }
687         s->out_format = FMT_H263;
688         avctx->delay  = 0;
689         s->low_delay  = 1;
690         break;
691     case AV_CODEC_ID_H263P:
692         s->out_format = FMT_H263;
693         s->h263_plus  = 1;
694         /* Fx */
695         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
696         s->modified_quant  = s->h263_aic;
697         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
698         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
699
700         /* /Fx */
701         /* These are just to be sure */
702         avctx->delay = 0;
703         s->low_delay = 1;
704         break;
705     case AV_CODEC_ID_FLV1:
706         s->out_format      = FMT_H263;
707         s->h263_flv        = 2; /* format = 1; 11-bit codes */
708         s->unrestricted_mv = 1;
709         s->rtp_mode  = 0; /* don't allow GOB */
710         avctx->delay = 0;
711         s->low_delay = 1;
712         break;
713     case AV_CODEC_ID_RV10:
714         s->out_format = FMT_H263;
715         avctx->delay  = 0;
716         s->low_delay  = 1;
717         break;
718     case AV_CODEC_ID_RV20:
719         s->out_format      = FMT_H263;
720         avctx->delay       = 0;
721         s->low_delay       = 1;
722         s->modified_quant  = 1;
723         s->h263_aic        = 1;
724         s->h263_plus       = 1;
725         s->loop_filter     = 1;
726         s->unrestricted_mv = 0;
727         break;
728     case AV_CODEC_ID_MPEG4:
729         s->out_format      = FMT_H263;
730         s->h263_pred       = 1;
731         s->unrestricted_mv = 1;
732         s->low_delay       = s->max_b_frames ? 0 : 1;
733         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
734         break;
735     case AV_CODEC_ID_MSMPEG4V2:
736         s->out_format      = FMT_H263;
737         s->h263_pred       = 1;
738         s->unrestricted_mv = 1;
739         s->msmpeg4_version = 2;
740         avctx->delay       = 0;
741         s->low_delay       = 1;
742         break;
743     case AV_CODEC_ID_MSMPEG4V3:
744         s->out_format        = FMT_H263;
745         s->h263_pred         = 1;
746         s->unrestricted_mv   = 1;
747         s->msmpeg4_version   = 3;
748         s->flipflop_rounding = 1;
749         avctx->delay         = 0;
750         s->low_delay         = 1;
751         break;
752     case AV_CODEC_ID_WMV1:
753         s->out_format        = FMT_H263;
754         s->h263_pred         = 1;
755         s->unrestricted_mv   = 1;
756         s->msmpeg4_version   = 4;
757         s->flipflop_rounding = 1;
758         avctx->delay         = 0;
759         s->low_delay         = 1;
760         break;
761     case AV_CODEC_ID_WMV2:
762         s->out_format        = FMT_H263;
763         s->h263_pred         = 1;
764         s->unrestricted_mv   = 1;
765         s->msmpeg4_version   = 5;
766         s->flipflop_rounding = 1;
767         avctx->delay         = 0;
768         s->low_delay         = 1;
769         break;
770     default:
771         return -1;
772     }
773
774     avctx->has_b_frames = !s->low_delay;
775
776     s->encoding = 1;
777
778     s->progressive_frame    =
779     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
780                                                 CODEC_FLAG_INTERLACED_ME) ||
781                                 s->alternate_scan);
782
783     /* init */
784     if (ff_MPV_common_init(s) < 0)
785         return -1;
786
787     if (!s->dct_quantize)
788         s->dct_quantize = ff_dct_quantize_c;
789     if (!s->denoise_dct)
790         s->denoise_dct  = denoise_dct_c;
791     s->fast_dct_quantize = s->dct_quantize;
792     if (avctx->trellis)
793         s->dct_quantize  = dct_quantize_trellis_c;
794
795     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
796         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
797
798     s->quant_precision = 5;
799
800     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
801     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
802
803     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
804         ff_h261_encode_init(s);
805     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
806         ff_h263_encode_init(s);
807     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
808         ff_msmpeg4_encode_init(s);
809     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
810         && s->out_format == FMT_MPEG1)
811         ff_mpeg1_encode_init(s);
812
813     /* init q matrix */
814     for (i = 0; i < 64; i++) {
815         int j = s->dsp.idct_permutation[i];
816         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
817             s->mpeg_quant) {
818             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
819             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
820         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
821             s->intra_matrix[j] =
822             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
823         } else {
824             /* mpeg1/2 */
825             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
826             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
827         }
828         if (s->avctx->intra_matrix)
829             s->intra_matrix[j] = s->avctx->intra_matrix[i];
830         if (s->avctx->inter_matrix)
831             s->inter_matrix[j] = s->avctx->inter_matrix[i];
832     }
833
834     /* precompute matrix */
835     /* for mjpeg, we do include qscale in the matrix */
836     if (s->out_format != FMT_MJPEG) {
837         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
838                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
839                           31, 1);
840         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
841                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
842                           31, 0);
843     }
844
845     if (ff_rate_control_init(s) < 0)
846         return -1;
847
848     return 0;
849 }
850
851 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
852 {
853     MpegEncContext *s = avctx->priv_data;
854
855     ff_rate_control_uninit(s);
856
857     ff_MPV_common_end(s);
858     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
859         s->out_format == FMT_MJPEG)
860         ff_mjpeg_encode_close(s);
861
862     av_freep(&avctx->extradata);
863
864     return 0;
865 }
866
867 static int get_sae(uint8_t *src, int ref, int stride)
868 {
869     int x,y;
870     int acc = 0;
871
872     for (y = 0; y < 16; y++) {
873         for (x = 0; x < 16; x++) {
874             acc += FFABS(src[x + y * stride] - ref);
875         }
876     }
877
878     return acc;
879 }
880
881 static int get_intra_count(MpegEncContext *s, uint8_t *src,
882                            uint8_t *ref, int stride)
883 {
884     int x, y, w, h;
885     int acc = 0;
886
887     w = s->width  & ~15;
888     h = s->height & ~15;
889
890     for (y = 0; y < h; y += 16) {
891         for (x = 0; x < w; x += 16) {
892             int offset = x + y * stride;
893             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
894                                      16);
895             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
896             int sae  = get_sae(src + offset, mean, stride);
897
898             acc += sae + 500 < sad;
899         }
900     }
901     return acc;
902 }
903
904
905 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
906 {
907     AVFrame *pic = NULL;
908     int64_t pts;
909     int i;
910     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
911                                                  (s->low_delay ? 0 : 1);
912     int direct = 1;
913
914     if (pic_arg) {
915         pts = pic_arg->pts;
916         pic_arg->display_picture_number = s->input_picture_number++;
917
918         if (pts != AV_NOPTS_VALUE) {
919             if (s->user_specified_pts != AV_NOPTS_VALUE) {
920                 int64_t time = pts;
921                 int64_t last = s->user_specified_pts;
922
923                 if (time <= last) {
924                     av_log(s->avctx, AV_LOG_ERROR,
925                            "Error, Invalid timestamp=%"PRId64", "
926                            "last=%"PRId64"\n", pts, s->user_specified_pts);
927                     return -1;
928                 }
929
930                 if (!s->low_delay && pic_arg->display_picture_number == 1)
931                     s->dts_delta = time - last;
932             }
933             s->user_specified_pts = pts;
934         } else {
935             if (s->user_specified_pts != AV_NOPTS_VALUE) {
936                 s->user_specified_pts =
937                 pts = s->user_specified_pts + 1;
938                 av_log(s->avctx, AV_LOG_INFO,
939                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
940                        pts);
941             } else {
942                 pts = pic_arg->display_picture_number;
943             }
944         }
945     }
946
947   if (pic_arg) {
948     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
949         direct = 0;
950     if (pic_arg->linesize[0] != s->linesize)
951         direct = 0;
952     if (pic_arg->linesize[1] != s->uvlinesize)
953         direct = 0;
954     if (pic_arg->linesize[2] != s->uvlinesize)
955         direct = 0;
956
957     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
958     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
959
960     if (direct) {
961         i = ff_find_unused_picture(s, 1);
962         if (i < 0)
963             return i;
964
965         pic = &s->picture[i].f;
966         pic->reference = 3;
967
968         for (i = 0; i < 4; i++) {
969             pic->data[i]     = pic_arg->data[i];
970             pic->linesize[i] = pic_arg->linesize[i];
971         }
972         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
973             return -1;
974         }
975     } else {
976         i = ff_find_unused_picture(s, 0);
977         if (i < 0)
978             return i;
979
980         pic = &s->picture[i].f;
981         pic->reference = 3;
982
983         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
984             return -1;
985         }
986
987         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
988             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
989             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
990             // empty
991         } else {
992             int h_chroma_shift, v_chroma_shift;
993             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
994                                           &v_chroma_shift);
995
996             for (i = 0; i < 3; i++) {
997                 int src_stride = pic_arg->linesize[i];
998                 int dst_stride = i ? s->uvlinesize : s->linesize;
999                 int h_shift = i ? h_chroma_shift : 0;
1000                 int v_shift = i ? v_chroma_shift : 0;
1001                 int w = s->width  >> h_shift;
1002                 int h = s->height >> v_shift;
1003                 uint8_t *src = pic_arg->data[i];
1004                 uint8_t *dst = pic->data[i];
1005
1006                 if (!s->avctx->rc_buffer_size)
1007                     dst += INPLACE_OFFSET;
1008
1009                 if (src_stride == dst_stride)
1010                     memcpy(dst, src, src_stride * h);
1011                 else {
1012                     while (h--) {
1013                         memcpy(dst, src, w);
1014                         dst += dst_stride;
1015                         src += src_stride;
1016                     }
1017                 }
1018             }
1019         }
1020     }
1021     copy_picture_attributes(s, pic, pic_arg);
1022     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1023   }
1024
1025     /* shift buffer entries */
1026     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1027         s->input_picture[i - 1] = s->input_picture[i];
1028
1029     s->input_picture[encoding_delay] = (Picture*) pic;
1030
1031     return 0;
1032 }
1033
1034 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1035 {
1036     int x, y, plane;
1037     int score = 0;
1038     int64_t score64 = 0;
1039
1040     for (plane = 0; plane < 3; plane++) {
1041         const int stride = p->f.linesize[plane];
1042         const int bw = plane ? 1 : 2;
1043         for (y = 0; y < s->mb_height * bw; y++) {
1044             for (x = 0; x < s->mb_width * bw; x++) {
1045                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1046                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1047                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1048                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1049
1050                 switch (s->avctx->frame_skip_exp) {
1051                 case 0: score    =  FFMAX(score, v);          break;
1052                 case 1: score   += FFABS(v);                  break;
1053                 case 2: score   += v * v;                     break;
1054                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1055                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1056                 }
1057             }
1058         }
1059     }
1060
1061     if (score)
1062         score64 = score;
1063
1064     if (score64 < s->avctx->frame_skip_threshold)
1065         return 1;
1066     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1067         return 1;
1068     return 0;
1069 }
1070
1071 static int estimate_best_b_count(MpegEncContext *s)
1072 {
1073     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1074     AVCodecContext *c = avcodec_alloc_context3(NULL);
1075     AVFrame input[FF_MAX_B_FRAMES + 2];
1076     const int scale = s->avctx->brd_scale;
1077     int i, j, out_size, p_lambda, b_lambda, lambda2;
1078     int outbuf_size  = s->width * s->height; // FIXME
1079     uint8_t *outbuf  = av_malloc(outbuf_size);
1080     int64_t best_rd  = INT64_MAX;
1081     int best_b_count = -1;
1082
1083     assert(scale >= 0 && scale <= 3);
1084
1085     //emms_c();
1086     //s->next_picture_ptr->quality;
1087     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1088     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1089     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1090     if (!b_lambda) // FIXME we should do this somewhere else
1091         b_lambda = p_lambda;
1092     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1093                FF_LAMBDA_SHIFT;
1094
1095     c->width        = s->width  >> scale;
1096     c->height       = s->height >> scale;
1097     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1098                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1099     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1100     c->mb_decision  = s->avctx->mb_decision;
1101     c->me_cmp       = s->avctx->me_cmp;
1102     c->mb_cmp       = s->avctx->mb_cmp;
1103     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1104     c->pix_fmt      = PIX_FMT_YUV420P;
1105     c->time_base    = s->avctx->time_base;
1106     c->max_b_frames = s->max_b_frames;
1107
1108     if (avcodec_open2(c, codec, NULL) < 0)
1109         return -1;
1110
1111     for (i = 0; i < s->max_b_frames + 2; i++) {
1112         int ysize = c->width * c->height;
1113         int csize = (c->width / 2) * (c->height / 2);
1114         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1115                                                 s->next_picture_ptr;
1116
1117         avcodec_get_frame_defaults(&input[i]);
1118         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1119         input[i].data[1]     = input[i].data[0] + ysize;
1120         input[i].data[2]     = input[i].data[1] + csize;
1121         input[i].linesize[0] = c->width;
1122         input[i].linesize[1] =
1123         input[i].linesize[2] = c->width / 2;
1124
1125         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1126             pre_input = *pre_input_ptr;
1127
1128             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1129                 pre_input.f.data[0] += INPLACE_OFFSET;
1130                 pre_input.f.data[1] += INPLACE_OFFSET;
1131                 pre_input.f.data[2] += INPLACE_OFFSET;
1132             }
1133
1134             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1135                                  pre_input.f.data[0], pre_input.f.linesize[0],
1136                                  c->width,      c->height);
1137             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1138                                  pre_input.f.data[1], pre_input.f.linesize[1],
1139                                  c->width >> 1, c->height >> 1);
1140             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1141                                  pre_input.f.data[2], pre_input.f.linesize[2],
1142                                  c->width >> 1, c->height >> 1);
1143         }
1144     }
1145
1146     for (j = 0; j < s->max_b_frames + 1; j++) {
1147         int64_t rd = 0;
1148
1149         if (!s->input_picture[j])
1150             break;
1151
1152         c->error[0] = c->error[1] = c->error[2] = 0;
1153
1154         input[0].pict_type = AV_PICTURE_TYPE_I;
1155         input[0].quality   = 1 * FF_QP2LAMBDA;
1156         out_size           = avcodec_encode_video(c, outbuf,
1157                                                   outbuf_size, &input[0]);
1158         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1159
1160         for (i = 0; i < s->max_b_frames + 1; i++) {
1161             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1162
1163             input[i + 1].pict_type = is_p ?
1164                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1165             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1166             out_size = avcodec_encode_video(c, outbuf, outbuf_size,
1167                                             &input[i + 1]);
1168             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1169         }
1170
1171         /* get the delayed frames */
1172         while (out_size) {
1173             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
1174             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1175         }
1176
1177         rd += c->error[0] + c->error[1] + c->error[2];
1178
1179         if (rd < best_rd) {
1180             best_rd = rd;
1181             best_b_count = j;
1182         }
1183     }
1184
1185     av_freep(&outbuf);
1186     avcodec_close(c);
1187     av_freep(&c);
1188
1189     for (i = 0; i < s->max_b_frames + 2; i++) {
1190         av_freep(&input[i].data[0]);
1191     }
1192
1193     return best_b_count;
1194 }
1195
1196 static int select_input_picture(MpegEncContext *s)
1197 {
1198     int i;
1199
1200     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1201         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1202     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1203
1204     /* set next picture type & ordering */
1205     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1206         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1207             s->next_picture_ptr == NULL || s->intra_only) {
1208             s->reordered_input_picture[0] = s->input_picture[0];
1209             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1210             s->reordered_input_picture[0]->f.coded_picture_number =
1211                 s->coded_picture_number++;
1212         } else {
1213             int b_frames;
1214
1215             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1216                 if (s->picture_in_gop_number < s->gop_size &&
1217                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1218                     // FIXME check that te gop check above is +-1 correct
1219                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1220                     //       s->input_picture[0]->f.data[0],
1221                     //       s->input_picture[0]->pts);
1222
1223                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1224                         for (i = 0; i < 4; i++)
1225                             s->input_picture[0]->f.data[i] = NULL;
1226                         s->input_picture[0]->f.type = 0;
1227                     } else {
1228                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1229                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1230
1231                         s->avctx->release_buffer(s->avctx,
1232                                                  &s->input_picture[0]->f);
1233                     }
1234
1235                     emms_c();
1236                     ff_vbv_update(s, 0);
1237
1238                     goto no_output_pic;
1239                 }
1240             }
1241
1242             if (s->flags & CODEC_FLAG_PASS2) {
1243                 for (i = 0; i < s->max_b_frames + 1; i++) {
1244                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1245
1246                     if (pict_num >= s->rc_context.num_entries)
1247                         break;
1248                     if (!s->input_picture[i]) {
1249                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1250                         break;
1251                     }
1252
1253                     s->input_picture[i]->f.pict_type =
1254                         s->rc_context.entry[pict_num].new_pict_type;
1255                 }
1256             }
1257
1258             if (s->avctx->b_frame_strategy == 0) {
1259                 b_frames = s->max_b_frames;
1260                 while (b_frames && !s->input_picture[b_frames])
1261                     b_frames--;
1262             } else if (s->avctx->b_frame_strategy == 1) {
1263                 for (i = 1; i < s->max_b_frames + 1; i++) {
1264                     if (s->input_picture[i] &&
1265                         s->input_picture[i]->b_frame_score == 0) {
1266                         s->input_picture[i]->b_frame_score =
1267                             get_intra_count(s,
1268                                             s->input_picture[i    ]->f.data[0],
1269                                             s->input_picture[i - 1]->f.data[0],
1270                                             s->linesize) + 1;
1271                     }
1272                 }
1273                 for (i = 0; i < s->max_b_frames + 1; i++) {
1274                     if (s->input_picture[i] == NULL ||
1275                         s->input_picture[i]->b_frame_score - 1 >
1276                             s->mb_num / s->avctx->b_sensitivity)
1277                         break;
1278                 }
1279
1280                 b_frames = FFMAX(0, i - 1);
1281
1282                 /* reset scores */
1283                 for (i = 0; i < b_frames + 1; i++) {
1284                     s->input_picture[i]->b_frame_score = 0;
1285                 }
1286             } else if (s->avctx->b_frame_strategy == 2) {
1287                 b_frames = estimate_best_b_count(s);
1288             } else {
1289                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1290                 b_frames = 0;
1291             }
1292
1293             emms_c();
1294             //static int b_count = 0;
1295             //b_count += b_frames;
1296             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1297
1298             for (i = b_frames - 1; i >= 0; i--) {
1299                 int type = s->input_picture[i]->f.pict_type;
1300                 if (type && type != AV_PICTURE_TYPE_B)
1301                     b_frames = i;
1302             }
1303             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1304                 b_frames == s->max_b_frames) {
1305                 av_log(s->avctx, AV_LOG_ERROR,
1306                        "warning, too many b frames in a row\n");
1307             }
1308
1309             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1310                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1311                     s->gop_size > s->picture_in_gop_number) {
1312                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1313                 } else {
1314                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1315                         b_frames = 0;
1316                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1317                 }
1318             }
1319
1320             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1321                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1322                 b_frames--;
1323
1324             s->reordered_input_picture[0] = s->input_picture[b_frames];
1325             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1326                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1327             s->reordered_input_picture[0]->f.coded_picture_number =
1328                 s->coded_picture_number++;
1329             for (i = 0; i < b_frames; i++) {
1330                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1331                 s->reordered_input_picture[i + 1]->f.pict_type =
1332                     AV_PICTURE_TYPE_B;
1333                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1334                     s->coded_picture_number++;
1335             }
1336         }
1337     }
1338 no_output_pic:
1339     if (s->reordered_input_picture[0]) {
1340         s->reordered_input_picture[0]->f.reference =
1341            s->reordered_input_picture[0]->f.pict_type !=
1342                AV_PICTURE_TYPE_B ? 3 : 0;
1343
1344         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1345
1346         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1347             s->avctx->rc_buffer_size) {
1348             // input is a shared pix, so we can't modifiy it -> alloc a new
1349             // one & ensure that the shared one is reuseable
1350
1351             Picture *pic;
1352             int i = ff_find_unused_picture(s, 0);
1353             if (i < 0)
1354                 return i;
1355             pic = &s->picture[i];
1356
1357             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1358             if (ff_alloc_picture(s, pic, 0) < 0) {
1359                 return -1;
1360             }
1361
1362             /* mark us unused / free shared pic */
1363             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1364                 s->avctx->release_buffer(s->avctx,
1365                                          &s->reordered_input_picture[0]->f);
1366             for (i = 0; i < 4; i++)
1367                 s->reordered_input_picture[0]->f.data[i] = NULL;
1368             s->reordered_input_picture[0]->f.type = 0;
1369
1370             copy_picture_attributes(s, &pic->f,
1371                                     &s->reordered_input_picture[0]->f);
1372
1373             s->current_picture_ptr = pic;
1374         } else {
1375             // input is not a shared pix -> reuse buffer for current_pix
1376
1377             assert(s->reordered_input_picture[0]->f.type ==
1378                        FF_BUFFER_TYPE_USER ||
1379                    s->reordered_input_picture[0]->f.type ==
1380                        FF_BUFFER_TYPE_INTERNAL);
1381
1382             s->current_picture_ptr = s->reordered_input_picture[0];
1383             for (i = 0; i < 4; i++) {
1384                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1385             }
1386         }
1387         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1388
1389         s->picture_number = s->new_picture.f.display_picture_number;
1390         //printf("dpn:%d\n", s->picture_number);
1391     } else {
1392         memset(&s->new_picture, 0, sizeof(Picture));
1393     }
1394     return 0;
1395 }
1396
1397 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1398                           const AVFrame *pic_arg, int *got_packet)
1399 {
1400     MpegEncContext *s = avctx->priv_data;
1401     int i, stuffing_count, ret;
1402     int context_count = s->slice_context_count;
1403
1404     s->picture_in_gop_number++;
1405
1406     if (load_input_picture(s, pic_arg) < 0)
1407         return -1;
1408
1409     if (select_input_picture(s) < 0) {
1410         return -1;
1411     }
1412
1413     /* output? */
1414     if (s->new_picture.f.data[0]) {
1415         if (!pkt->data &&
1416             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1417             return ret;
1418         if (s->mb_info) {
1419             s->mb_info_ptr = av_packet_new_side_data(pkt,
1420                                  AV_PKT_DATA_H263_MB_INFO,
1421                                  s->mb_width*s->mb_height*12);
1422             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1423         }
1424
1425         for (i = 0; i < context_count; i++) {
1426             int start_y = s->thread_context[i]->start_mb_y;
1427             int   end_y = s->thread_context[i]->  end_mb_y;
1428             int h       = s->mb_height;
1429             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1430             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1431
1432             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1433         }
1434
1435         s->pict_type = s->new_picture.f.pict_type;
1436         //emms_c();
1437         //printf("qs:%f %f %d\n", s->new_picture.quality,
1438         //       s->current_picture.quality, s->qscale);
1439         ff_MPV_frame_start(s, avctx);
1440 vbv_retry:
1441         if (encode_picture(s, s->picture_number) < 0)
1442             return -1;
1443
1444         avctx->header_bits = s->header_bits;
1445         avctx->mv_bits     = s->mv_bits;
1446         avctx->misc_bits   = s->misc_bits;
1447         avctx->i_tex_bits  = s->i_tex_bits;
1448         avctx->p_tex_bits  = s->p_tex_bits;
1449         avctx->i_count     = s->i_count;
1450         // FIXME f/b_count in avctx
1451         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1452         avctx->skip_count  = s->skip_count;
1453
1454         ff_MPV_frame_end(s);
1455
1456         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1457             ff_mjpeg_encode_picture_trailer(s);
1458
1459         if (avctx->rc_buffer_size) {
1460             RateControlContext *rcc = &s->rc_context;
1461             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1462
1463             if (put_bits_count(&s->pb) > max_size &&
1464                 s->lambda < s->avctx->lmax) {
1465                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1466                                        (s->qscale + 1) / s->qscale);
1467                 if (s->adaptive_quant) {
1468                     int i;
1469                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1470                         s->lambda_table[i] =
1471                             FFMAX(s->lambda_table[i] + 1,
1472                                   s->lambda_table[i] * (s->qscale + 1) /
1473                                   s->qscale);
1474                 }
1475                 s->mb_skipped = 0;        // done in MPV_frame_start()
1476                 // done in encode_picture() so we must undo it
1477                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1478                     if (s->flipflop_rounding          ||
1479                         s->codec_id == AV_CODEC_ID_H263P ||
1480                         s->codec_id == AV_CODEC_ID_MPEG4)
1481                         s->no_rounding ^= 1;
1482                 }
1483                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1484                     s->time_base       = s->last_time_base;
1485                     s->last_non_b_time = s->time - s->pp_time;
1486                 }
1487                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1488                 for (i = 0; i < context_count; i++) {
1489                     PutBitContext *pb = &s->thread_context[i]->pb;
1490                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1491                 }
1492                 goto vbv_retry;
1493             }
1494
1495             assert(s->avctx->rc_max_rate);
1496         }
1497
1498         if (s->flags & CODEC_FLAG_PASS1)
1499             ff_write_pass1_stats(s);
1500
1501         for (i = 0; i < 4; i++) {
1502             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1503             avctx->error[i] += s->current_picture_ptr->f.error[i];
1504         }
1505
1506         if (s->flags & CODEC_FLAG_PASS1)
1507             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1508                    avctx->i_tex_bits + avctx->p_tex_bits ==
1509                        put_bits_count(&s->pb));
1510         flush_put_bits(&s->pb);
1511         s->frame_bits  = put_bits_count(&s->pb);
1512
1513         stuffing_count = ff_vbv_update(s, s->frame_bits);
1514         if (stuffing_count) {
1515             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1516                     stuffing_count + 50) {
1517                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1518                 return -1;
1519             }
1520
1521             switch (s->codec_id) {
1522             case AV_CODEC_ID_MPEG1VIDEO:
1523             case AV_CODEC_ID_MPEG2VIDEO:
1524                 while (stuffing_count--) {
1525                     put_bits(&s->pb, 8, 0);
1526                 }
1527             break;
1528             case AV_CODEC_ID_MPEG4:
1529                 put_bits(&s->pb, 16, 0);
1530                 put_bits(&s->pb, 16, 0x1C3);
1531                 stuffing_count -= 4;
1532                 while (stuffing_count--) {
1533                     put_bits(&s->pb, 8, 0xFF);
1534                 }
1535             break;
1536             default:
1537                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1538             }
1539             flush_put_bits(&s->pb);
1540             s->frame_bits  = put_bits_count(&s->pb);
1541         }
1542
1543         /* update mpeg1/2 vbv_delay for CBR */
1544         if (s->avctx->rc_max_rate                          &&
1545             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1546             s->out_format == FMT_MPEG1                     &&
1547             90000LL * (avctx->rc_buffer_size - 1) <=
1548                 s->avctx->rc_max_rate * 0xFFFFLL) {
1549             int vbv_delay, min_delay;
1550             double inbits  = s->avctx->rc_max_rate *
1551                              av_q2d(s->avctx->time_base);
1552             int    minbits = s->frame_bits - 8 *
1553                              (s->vbv_delay_ptr - s->pb.buf - 1);
1554             double bits    = s->rc_context.buffer_index + minbits - inbits;
1555
1556             if (bits < 0)
1557                 av_log(s->avctx, AV_LOG_ERROR,
1558                        "Internal error, negative bits\n");
1559
1560             assert(s->repeat_first_field == 0);
1561
1562             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1563             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1564                         s->avctx->rc_max_rate;
1565
1566             vbv_delay = FFMAX(vbv_delay, min_delay);
1567
1568             assert(vbv_delay < 0xFFFF);
1569
1570             s->vbv_delay_ptr[0] &= 0xF8;
1571             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1572             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1573             s->vbv_delay_ptr[2] &= 0x07;
1574             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1575             avctx->vbv_delay     = vbv_delay * 300;
1576         }
1577         s->total_bits     += s->frame_bits;
1578         avctx->frame_bits  = s->frame_bits;
1579
1580         pkt->pts = s->current_picture.f.pts;
1581         if (!s->low_delay) {
1582             if (!s->current_picture.f.coded_picture_number)
1583                 pkt->dts = pkt->pts - s->dts_delta;
1584             else
1585                 pkt->dts = s->reordered_pts;
1586             s->reordered_pts = s->input_picture[0]->f.pts;
1587         } else
1588             pkt->dts = pkt->pts;
1589         if (s->current_picture.f.key_frame)
1590             pkt->flags |= AV_PKT_FLAG_KEY;
1591         if (s->mb_info)
1592             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1593     } else {
1594         assert((put_bits_ptr(&s->pb) == s->pb.buf));
1595         s->frame_bits = 0;
1596     }
1597     assert((s->frame_bits & 7) == 0);
1598
1599     pkt->size = s->frame_bits / 8;
1600     *got_packet = !!pkt->size;
1601     return 0;
1602 }
1603
1604 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1605                                                 int n, int threshold)
1606 {
1607     static const char tab[64] = {
1608         3, 2, 2, 1, 1, 1, 1, 1,
1609         1, 1, 1, 1, 1, 1, 1, 1,
1610         1, 1, 1, 1, 1, 1, 1, 1,
1611         0, 0, 0, 0, 0, 0, 0, 0,
1612         0, 0, 0, 0, 0, 0, 0, 0,
1613         0, 0, 0, 0, 0, 0, 0, 0,
1614         0, 0, 0, 0, 0, 0, 0, 0,
1615         0, 0, 0, 0, 0, 0, 0, 0
1616     };
1617     int score = 0;
1618     int run = 0;
1619     int i;
1620     DCTELEM *block = s->block[n];
1621     const int last_index = s->block_last_index[n];
1622     int skip_dc;
1623
1624     if (threshold < 0) {
1625         skip_dc = 0;
1626         threshold = -threshold;
1627     } else
1628         skip_dc = 1;
1629
1630     /* Are all we could set to zero already zero? */
1631     if (last_index <= skip_dc - 1)
1632         return;
1633
1634     for (i = 0; i <= last_index; i++) {
1635         const int j = s->intra_scantable.permutated[i];
1636         const int level = FFABS(block[j]);
1637         if (level == 1) {
1638             if (skip_dc && i == 0)
1639                 continue;
1640             score += tab[run];
1641             run = 0;
1642         } else if (level > 1) {
1643             return;
1644         } else {
1645             run++;
1646         }
1647     }
1648     if (score >= threshold)
1649         return;
1650     for (i = skip_dc; i <= last_index; i++) {
1651         const int j = s->intra_scantable.permutated[i];
1652         block[j] = 0;
1653     }
1654     if (block[0])
1655         s->block_last_index[n] = 0;
1656     else
1657         s->block_last_index[n] = -1;
1658 }
1659
1660 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1661                                int last_index)
1662 {
1663     int i;
1664     const int maxlevel = s->max_qcoeff;
1665     const int minlevel = s->min_qcoeff;
1666     int overflow = 0;
1667
1668     if (s->mb_intra) {
1669         i = 1; // skip clipping of intra dc
1670     } else
1671         i = 0;
1672
1673     for (; i <= last_index; i++) {
1674         const int j = s->intra_scantable.permutated[i];
1675         int level = block[j];
1676
1677         if (level > maxlevel) {
1678             level = maxlevel;
1679             overflow++;
1680         } else if (level < minlevel) {
1681             level = minlevel;
1682             overflow++;
1683         }
1684
1685         block[j] = level;
1686     }
1687
1688     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1689         av_log(s->avctx, AV_LOG_INFO,
1690                "warning, clipping %d dct coefficients to %d..%d\n",
1691                overflow, minlevel, maxlevel);
1692 }
1693
1694 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1695 {
1696     int x, y;
1697     // FIXME optimize
1698     for (y = 0; y < 8; y++) {
1699         for (x = 0; x < 8; x++) {
1700             int x2, y2;
1701             int sum = 0;
1702             int sqr = 0;
1703             int count = 0;
1704
1705             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1706                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1707                     int v = ptr[x2 + y2 * stride];
1708                     sum += v;
1709                     sqr += v * v;
1710                     count++;
1711                 }
1712             }
1713             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1714         }
1715     }
1716 }
1717
1718 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1719                                                 int motion_x, int motion_y,
1720                                                 int mb_block_height,
1721                                                 int mb_block_count)
1722 {
1723     int16_t weight[8][64];
1724     DCTELEM orig[8][64];
1725     const int mb_x = s->mb_x;
1726     const int mb_y = s->mb_y;
1727     int i;
1728     int skip_dct[8];
1729     int dct_offset = s->linesize * 8; // default for progressive frames
1730     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1731     int wrap_y, wrap_c;
1732
1733     for (i = 0; i < mb_block_count; i++)
1734         skip_dct[i] = s->skipdct;
1735
1736     if (s->adaptive_quant) {
1737         const int last_qp = s->qscale;
1738         const int mb_xy = mb_x + mb_y * s->mb_stride;
1739
1740         s->lambda = s->lambda_table[mb_xy];
1741         update_qscale(s);
1742
1743         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1744             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1745             s->dquant = s->qscale - last_qp;
1746
1747             if (s->out_format == FMT_H263) {
1748                 s->dquant = av_clip(s->dquant, -2, 2);
1749
1750                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1751                     if (!s->mb_intra) {
1752                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1753                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1754                                 s->dquant = 0;
1755                         }
1756                         if (s->mv_type == MV_TYPE_8X8)
1757                             s->dquant = 0;
1758                     }
1759                 }
1760             }
1761         }
1762         ff_set_qscale(s, last_qp + s->dquant);
1763     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1764         ff_set_qscale(s, s->qscale + s->dquant);
1765
1766     wrap_y = s->linesize;
1767     wrap_c = s->uvlinesize;
1768     ptr_y  = s->new_picture.f.data[0] +
1769              (mb_y * 16 * wrap_y)              + mb_x * 16;
1770     ptr_cb = s->new_picture.f.data[1] +
1771              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1772     ptr_cr = s->new_picture.f.data[2] +
1773              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1774
1775     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1776         uint8_t *ebuf = s->edge_emu_buffer + 32;
1777         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1778                                 mb_y * 16, s->width, s->height);
1779         ptr_y = ebuf;
1780         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1781                                 mb_block_height, mb_x * 8, mb_y * 8,
1782                                 s->width >> 1, s->height >> 1);
1783         ptr_cb = ebuf + 18 * wrap_y;
1784         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1785                                 mb_block_height, mb_x * 8, mb_y * 8,
1786                                 s->width >> 1, s->height >> 1);
1787         ptr_cr = ebuf + 18 * wrap_y + 8;
1788     }
1789
1790     if (s->mb_intra) {
1791         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1792             int progressive_score, interlaced_score;
1793
1794             s->interlaced_dct = 0;
1795             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1796                                                     NULL, wrap_y, 8) +
1797                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1798                                                     NULL, wrap_y, 8) - 400;
1799
1800             if (progressive_score > 0) {
1801                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1802                                                        NULL, wrap_y * 2, 8) +
1803                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1804                                                        NULL, wrap_y * 2, 8);
1805                 if (progressive_score > interlaced_score) {
1806                     s->interlaced_dct = 1;
1807
1808                     dct_offset = wrap_y;
1809                     wrap_y <<= 1;
1810                     if (s->chroma_format == CHROMA_422)
1811                         wrap_c <<= 1;
1812                 }
1813             }
1814         }
1815
1816         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1817         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1818         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1819         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1820
1821         if (s->flags & CODEC_FLAG_GRAY) {
1822             skip_dct[4] = 1;
1823             skip_dct[5] = 1;
1824         } else {
1825             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1826             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1827             if (!s->chroma_y_shift) { /* 422 */
1828                 s->dsp.get_pixels(s->block[6],
1829                                   ptr_cb + (dct_offset >> 1), wrap_c);
1830                 s->dsp.get_pixels(s->block[7],
1831                                   ptr_cr + (dct_offset >> 1), wrap_c);
1832             }
1833         }
1834     } else {
1835         op_pixels_func (*op_pix)[4];
1836         qpel_mc_func (*op_qpix)[16];
1837         uint8_t *dest_y, *dest_cb, *dest_cr;
1838
1839         dest_y  = s->dest[0];
1840         dest_cb = s->dest[1];
1841         dest_cr = s->dest[2];
1842
1843         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1844             op_pix  = s->dsp.put_pixels_tab;
1845             op_qpix = s->dsp.put_qpel_pixels_tab;
1846         } else {
1847             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1848             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1849         }
1850
1851         if (s->mv_dir & MV_DIR_FORWARD) {
1852             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1853                           s->last_picture.f.data,
1854                           op_pix, op_qpix);
1855             op_pix  = s->dsp.avg_pixels_tab;
1856             op_qpix = s->dsp.avg_qpel_pixels_tab;
1857         }
1858         if (s->mv_dir & MV_DIR_BACKWARD) {
1859             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1860                           s->next_picture.f.data,
1861                           op_pix, op_qpix);
1862         }
1863
1864         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1865             int progressive_score, interlaced_score;
1866
1867             s->interlaced_dct = 0;
1868             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1869                                                     ptr_y,              wrap_y,
1870                                                     8) +
1871                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1872                                                     ptr_y + wrap_y * 8, wrap_y,
1873                                                     8) - 400;
1874
1875             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1876                 progressive_score -= 400;
1877
1878             if (progressive_score > 0) {
1879                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1880                                                        ptr_y,
1881                                                        wrap_y * 2, 8) +
1882                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1883                                                        ptr_y + wrap_y,
1884                                                        wrap_y * 2, 8);
1885
1886                 if (progressive_score > interlaced_score) {
1887                     s->interlaced_dct = 1;
1888
1889                     dct_offset = wrap_y;
1890                     wrap_y <<= 1;
1891                     if (s->chroma_format == CHROMA_422)
1892                         wrap_c <<= 1;
1893                 }
1894             }
1895         }
1896
1897         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1898         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1899         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1900                            dest_y + dct_offset, wrap_y);
1901         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1902                            dest_y + dct_offset + 8, wrap_y);
1903
1904         if (s->flags & CODEC_FLAG_GRAY) {
1905             skip_dct[4] = 1;
1906             skip_dct[5] = 1;
1907         } else {
1908             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1909             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1910             if (!s->chroma_y_shift) { /* 422 */
1911                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1912                                    dest_cb + (dct_offset >> 1), wrap_c);
1913                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1914                                    dest_cr + (dct_offset >> 1), wrap_c);
1915             }
1916         }
1917         /* pre quantization */
1918         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1919                 2 * s->qscale * s->qscale) {
1920             // FIXME optimize
1921             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1922                               wrap_y, 8) < 20 * s->qscale)
1923                 skip_dct[0] = 1;
1924             if (s->dsp.sad[1](NULL, ptr_y + 8,
1925                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1926                 skip_dct[1] = 1;
1927             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1928                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1929                 skip_dct[2] = 1;
1930             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1931                               dest_y + dct_offset + 8,
1932                               wrap_y, 8) < 20 * s->qscale)
1933                 skip_dct[3] = 1;
1934             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1935                               wrap_c, 8) < 20 * s->qscale)
1936                 skip_dct[4] = 1;
1937             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1938                               wrap_c, 8) < 20 * s->qscale)
1939                 skip_dct[5] = 1;
1940             if (!s->chroma_y_shift) { /* 422 */
1941                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1942                                   dest_cb + (dct_offset >> 1),
1943                                   wrap_c, 8) < 20 * s->qscale)
1944                     skip_dct[6] = 1;
1945                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1946                                   dest_cr + (dct_offset >> 1),
1947                                   wrap_c, 8) < 20 * s->qscale)
1948                     skip_dct[7] = 1;
1949             }
1950         }
1951     }
1952
1953     if (s->quantizer_noise_shaping) {
1954         if (!skip_dct[0])
1955             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1956         if (!skip_dct[1])
1957             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1958         if (!skip_dct[2])
1959             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1960         if (!skip_dct[3])
1961             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1962         if (!skip_dct[4])
1963             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1964         if (!skip_dct[5])
1965             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1966         if (!s->chroma_y_shift) { /* 422 */
1967             if (!skip_dct[6])
1968                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1969                                   wrap_c);
1970             if (!skip_dct[7])
1971                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1972                                   wrap_c);
1973         }
1974         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
1975     }
1976
1977     /* DCT & quantize */
1978     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1979     {
1980         for (i = 0; i < mb_block_count; i++) {
1981             if (!skip_dct[i]) {
1982                 int overflow;
1983                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1984                 // FIXME we could decide to change to quantizer instead of
1985                 // clipping
1986                 // JS: I don't think that would be a good idea it could lower
1987                 //     quality instead of improve it. Just INTRADC clipping
1988                 //     deserves changes in quantizer
1989                 if (overflow)
1990                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
1991             } else
1992                 s->block_last_index[i] = -1;
1993         }
1994         if (s->quantizer_noise_shaping) {
1995             for (i = 0; i < mb_block_count; i++) {
1996                 if (!skip_dct[i]) {
1997                     s->block_last_index[i] =
1998                         dct_quantize_refine(s, s->block[i], weight[i],
1999                                             orig[i], i, s->qscale);
2000                 }
2001             }
2002         }
2003
2004         if (s->luma_elim_threshold && !s->mb_intra)
2005             for (i = 0; i < 4; i++)
2006                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2007         if (s->chroma_elim_threshold && !s->mb_intra)
2008             for (i = 4; i < mb_block_count; i++)
2009                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2010
2011         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2012             for (i = 0; i < mb_block_count; i++) {
2013                 if (s->block_last_index[i] == -1)
2014                     s->coded_score[i] = INT_MAX / 256;
2015             }
2016         }
2017     }
2018
2019     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2020         s->block_last_index[4] =
2021         s->block_last_index[5] = 0;
2022         s->block[4][0] =
2023         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2024     }
2025
2026     // non c quantize code returns incorrect block_last_index FIXME
2027     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2028         for (i = 0; i < mb_block_count; i++) {
2029             int j;
2030             if (s->block_last_index[i] > 0) {
2031                 for (j = 63; j > 0; j--) {
2032                     if (s->block[i][s->intra_scantable.permutated[j]])
2033                         break;
2034                 }
2035                 s->block_last_index[i] = j;
2036             }
2037         }
2038     }
2039
2040     /* huffman encode */
2041     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2042     case AV_CODEC_ID_MPEG1VIDEO:
2043     case AV_CODEC_ID_MPEG2VIDEO:
2044         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2045             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2046         break;
2047     case AV_CODEC_ID_MPEG4:
2048         if (CONFIG_MPEG4_ENCODER)
2049             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2050         break;
2051     case AV_CODEC_ID_MSMPEG4V2:
2052     case AV_CODEC_ID_MSMPEG4V3:
2053     case AV_CODEC_ID_WMV1:
2054         if (CONFIG_MSMPEG4_ENCODER)
2055             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2056         break;
2057     case AV_CODEC_ID_WMV2:
2058         if (CONFIG_WMV2_ENCODER)
2059             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2060         break;
2061     case AV_CODEC_ID_H261:
2062         if (CONFIG_H261_ENCODER)
2063             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2064         break;
2065     case AV_CODEC_ID_H263:
2066     case AV_CODEC_ID_H263P:
2067     case AV_CODEC_ID_FLV1:
2068     case AV_CODEC_ID_RV10:
2069     case AV_CODEC_ID_RV20:
2070         if (CONFIG_H263_ENCODER)
2071             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2072         break;
2073     case AV_CODEC_ID_MJPEG:
2074         if (CONFIG_MJPEG_ENCODER)
2075             ff_mjpeg_encode_mb(s, s->block);
2076         break;
2077     default:
2078         assert(0);
2079     }
2080 }
2081
2082 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2083 {
2084     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2085     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2086 }
2087
2088 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2089     int i;
2090
2091     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2092
2093     /* mpeg1 */
2094     d->mb_skip_run= s->mb_skip_run;
2095     for(i=0; i<3; i++)
2096         d->last_dc[i] = s->last_dc[i];
2097
2098     /* statistics */
2099     d->mv_bits= s->mv_bits;
2100     d->i_tex_bits= s->i_tex_bits;
2101     d->p_tex_bits= s->p_tex_bits;
2102     d->i_count= s->i_count;
2103     d->f_count= s->f_count;
2104     d->b_count= s->b_count;
2105     d->skip_count= s->skip_count;
2106     d->misc_bits= s->misc_bits;
2107     d->last_bits= 0;
2108
2109     d->mb_skipped= 0;
2110     d->qscale= s->qscale;
2111     d->dquant= s->dquant;
2112
2113     d->esc3_level_length= s->esc3_level_length;
2114 }
2115
2116 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2117     int i;
2118
2119     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2120     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2121
2122     /* mpeg1 */
2123     d->mb_skip_run= s->mb_skip_run;
2124     for(i=0; i<3; i++)
2125         d->last_dc[i] = s->last_dc[i];
2126
2127     /* statistics */
2128     d->mv_bits= s->mv_bits;
2129     d->i_tex_bits= s->i_tex_bits;
2130     d->p_tex_bits= s->p_tex_bits;
2131     d->i_count= s->i_count;
2132     d->f_count= s->f_count;
2133     d->b_count= s->b_count;
2134     d->skip_count= s->skip_count;
2135     d->misc_bits= s->misc_bits;
2136
2137     d->mb_intra= s->mb_intra;
2138     d->mb_skipped= s->mb_skipped;
2139     d->mv_type= s->mv_type;
2140     d->mv_dir= s->mv_dir;
2141     d->pb= s->pb;
2142     if(s->data_partitioning){
2143         d->pb2= s->pb2;
2144         d->tex_pb= s->tex_pb;
2145     }
2146     d->block= s->block;
2147     for(i=0; i<8; i++)
2148         d->block_last_index[i]= s->block_last_index[i];
2149     d->interlaced_dct= s->interlaced_dct;
2150     d->qscale= s->qscale;
2151
2152     d->esc3_level_length= s->esc3_level_length;
2153 }
2154
2155 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2156                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2157                            int *dmin, int *next_block, int motion_x, int motion_y)
2158 {
2159     int score;
2160     uint8_t *dest_backup[3];
2161
2162     copy_context_before_encode(s, backup, type);
2163
2164     s->block= s->blocks[*next_block];
2165     s->pb= pb[*next_block];
2166     if(s->data_partitioning){
2167         s->pb2   = pb2   [*next_block];
2168         s->tex_pb= tex_pb[*next_block];
2169     }
2170
2171     if(*next_block){
2172         memcpy(dest_backup, s->dest, sizeof(s->dest));
2173         s->dest[0] = s->rd_scratchpad;
2174         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2175         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2176         assert(s->linesize >= 32); //FIXME
2177     }
2178
2179     encode_mb(s, motion_x, motion_y);
2180
2181     score= put_bits_count(&s->pb);
2182     if(s->data_partitioning){
2183         score+= put_bits_count(&s->pb2);
2184         score+= put_bits_count(&s->tex_pb);
2185     }
2186
2187     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2188         ff_MPV_decode_mb(s, s->block);
2189
2190         score *= s->lambda2;
2191         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2192     }
2193
2194     if(*next_block){
2195         memcpy(s->dest, dest_backup, sizeof(s->dest));
2196     }
2197
2198     if(score<*dmin){
2199         *dmin= score;
2200         *next_block^=1;
2201
2202         copy_context_after_encode(best, s, type);
2203     }
2204 }
2205
2206 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2207     uint32_t *sq = ff_squareTbl + 256;
2208     int acc=0;
2209     int x,y;
2210
2211     if(w==16 && h==16)
2212         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2213     else if(w==8 && h==8)
2214         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2215
2216     for(y=0; y<h; y++){
2217         for(x=0; x<w; x++){
2218             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2219         }
2220     }
2221
2222     assert(acc>=0);
2223
2224     return acc;
2225 }
2226
2227 static int sse_mb(MpegEncContext *s){
2228     int w= 16;
2229     int h= 16;
2230
2231     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2232     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2233
2234     if(w==16 && h==16)
2235       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2236         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2237                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2238                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2239       }else{
2240         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2241                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2242                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2243       }
2244     else
2245         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2246                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2247                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2248 }
2249
2250 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2251     MpegEncContext *s= *(void**)arg;
2252
2253
2254     s->me.pre_pass=1;
2255     s->me.dia_size= s->avctx->pre_dia_size;
2256     s->first_slice_line=1;
2257     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2258         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2259             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2260         }
2261         s->first_slice_line=0;
2262     }
2263
2264     s->me.pre_pass=0;
2265
2266     return 0;
2267 }
2268
2269 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2270     MpegEncContext *s= *(void**)arg;
2271
2272     ff_check_alignment();
2273
2274     s->me.dia_size= s->avctx->dia_size;
2275     s->first_slice_line=1;
2276     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2277         s->mb_x=0; //for block init below
2278         ff_init_block_index(s);
2279         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2280             s->block_index[0]+=2;
2281             s->block_index[1]+=2;
2282             s->block_index[2]+=2;
2283             s->block_index[3]+=2;
2284
2285             /* compute motion vector & mb_type and store in context */
2286             if(s->pict_type==AV_PICTURE_TYPE_B)
2287                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2288             else
2289                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2290         }
2291         s->first_slice_line=0;
2292     }
2293     return 0;
2294 }
2295
2296 static int mb_var_thread(AVCodecContext *c, void *arg){
2297     MpegEncContext *s= *(void**)arg;
2298     int mb_x, mb_y;
2299
2300     ff_check_alignment();
2301
2302     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2303         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2304             int xx = mb_x * 16;
2305             int yy = mb_y * 16;
2306             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2307             int varc;
2308             int sum = s->dsp.pix_sum(pix, s->linesize);
2309
2310             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2311
2312             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2313             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2314             s->me.mb_var_sum_temp    += varc;
2315         }
2316     }
2317     return 0;
2318 }
2319
2320 static void write_slice_end(MpegEncContext *s){
2321     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2322         if(s->partitioned_frame){
2323             ff_mpeg4_merge_partitions(s);
2324         }
2325
2326         ff_mpeg4_stuffing(&s->pb);
2327     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2328         ff_mjpeg_encode_stuffing(&s->pb);
2329     }
2330
2331     avpriv_align_put_bits(&s->pb);
2332     flush_put_bits(&s->pb);
2333
2334     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2335         s->misc_bits+= get_bits_diff(s);
2336 }
2337
2338 static void write_mb_info(MpegEncContext *s)
2339 {
2340     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2341     int offset = put_bits_count(&s->pb);
2342     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2343     int gobn = s->mb_y / s->gob_index;
2344     int pred_x, pred_y;
2345     if (CONFIG_H263_ENCODER)
2346         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2347     bytestream_put_le32(&ptr, offset);
2348     bytestream_put_byte(&ptr, s->qscale);
2349     bytestream_put_byte(&ptr, gobn);
2350     bytestream_put_le16(&ptr, mba);
2351     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2352     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2353     /* 4MV not implemented */
2354     bytestream_put_byte(&ptr, 0); /* hmv2 */
2355     bytestream_put_byte(&ptr, 0); /* vmv2 */
2356 }
2357
2358 static void update_mb_info(MpegEncContext *s, int startcode)
2359 {
2360     if (!s->mb_info)
2361         return;
2362     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2363         s->mb_info_size += 12;
2364         s->prev_mb_info = s->last_mb_info;
2365     }
2366     if (startcode) {
2367         s->prev_mb_info = put_bits_count(&s->pb)/8;
2368         /* This might have incremented mb_info_size above, and we return without
2369          * actually writing any info into that slot yet. But in that case,
2370          * this will be called again at the start of the after writing the
2371          * start code, actually writing the mb info. */
2372         return;
2373     }
2374
2375     s->last_mb_info = put_bits_count(&s->pb)/8;
2376     if (!s->mb_info_size)
2377         s->mb_info_size += 12;
2378     write_mb_info(s);
2379 }
2380
2381 static int encode_thread(AVCodecContext *c, void *arg){
2382     MpegEncContext *s= *(void**)arg;
2383     int mb_x, mb_y, pdif = 0;
2384     int chr_h= 16>>s->chroma_y_shift;
2385     int i, j;
2386     MpegEncContext best_s, backup_s;
2387     uint8_t bit_buf[2][MAX_MB_BYTES];
2388     uint8_t bit_buf2[2][MAX_MB_BYTES];
2389     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2390     PutBitContext pb[2], pb2[2], tex_pb[2];
2391 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2392
2393     ff_check_alignment();
2394
2395     for(i=0; i<2; i++){
2396         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2397         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2398         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2399     }
2400
2401     s->last_bits= put_bits_count(&s->pb);
2402     s->mv_bits=0;
2403     s->misc_bits=0;
2404     s->i_tex_bits=0;
2405     s->p_tex_bits=0;
2406     s->i_count=0;
2407     s->f_count=0;
2408     s->b_count=0;
2409     s->skip_count=0;
2410
2411     for(i=0; i<3; i++){
2412         /* init last dc values */
2413         /* note: quant matrix value (8) is implied here */
2414         s->last_dc[i] = 128 << s->intra_dc_precision;
2415
2416         s->current_picture.f.error[i] = 0;
2417     }
2418     s->mb_skip_run = 0;
2419     memset(s->last_mv, 0, sizeof(s->last_mv));
2420
2421     s->last_mv_dir = 0;
2422
2423     switch(s->codec_id){
2424     case AV_CODEC_ID_H263:
2425     case AV_CODEC_ID_H263P:
2426     case AV_CODEC_ID_FLV1:
2427         if (CONFIG_H263_ENCODER)
2428             s->gob_index = ff_h263_get_gob_height(s);
2429         break;
2430     case AV_CODEC_ID_MPEG4:
2431         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2432             ff_mpeg4_init_partitions(s);
2433         break;
2434     }
2435
2436     s->resync_mb_x=0;
2437     s->resync_mb_y=0;
2438     s->first_slice_line = 1;
2439     s->ptr_lastgob = s->pb.buf;
2440     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2441 //    printf("row %d at %X\n", s->mb_y, (int)s);
2442         s->mb_x=0;
2443         s->mb_y= mb_y;
2444
2445         ff_set_qscale(s, s->qscale);
2446         ff_init_block_index(s);
2447
2448         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2449             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2450             int mb_type= s->mb_type[xy];
2451 //            int d;
2452             int dmin= INT_MAX;
2453             int dir;
2454
2455             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2456                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2457                 return -1;
2458             }
2459             if(s->data_partitioning){
2460                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2461                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2462                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2463                     return -1;
2464                 }
2465             }
2466
2467             s->mb_x = mb_x;
2468             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2469             ff_update_block_index(s);
2470
2471             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2472                 ff_h261_reorder_mb_index(s);
2473                 xy= s->mb_y*s->mb_stride + s->mb_x;
2474                 mb_type= s->mb_type[xy];
2475             }
2476
2477             /* write gob / video packet header  */
2478             if(s->rtp_mode){
2479                 int current_packet_size, is_gob_start;
2480
2481                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2482
2483                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2484
2485                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2486
2487                 switch(s->codec_id){
2488                 case AV_CODEC_ID_H263:
2489                 case AV_CODEC_ID_H263P:
2490                     if(!s->h263_slice_structured)
2491                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2492                     break;
2493                 case AV_CODEC_ID_MPEG2VIDEO:
2494                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2495                 case AV_CODEC_ID_MPEG1VIDEO:
2496                     if(s->mb_skip_run) is_gob_start=0;
2497                     break;
2498                 }
2499
2500                 if(is_gob_start){
2501                     if(s->start_mb_y != mb_y || mb_x!=0){
2502                         write_slice_end(s);
2503
2504                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2505                             ff_mpeg4_init_partitions(s);
2506                         }
2507                     }
2508
2509                     assert((put_bits_count(&s->pb)&7) == 0);
2510                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2511
2512                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2513                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2514                         int d= 100 / s->avctx->error_rate;
2515                         if(r % d == 0){
2516                             current_packet_size=0;
2517                             s->pb.buf_ptr= s->ptr_lastgob;
2518                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2519                         }
2520                     }
2521
2522                     if (s->avctx->rtp_callback){
2523                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2524                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2525                     }
2526                     update_mb_info(s, 1);
2527
2528                     switch(s->codec_id){
2529                     case AV_CODEC_ID_MPEG4:
2530                         if (CONFIG_MPEG4_ENCODER) {
2531                             ff_mpeg4_encode_video_packet_header(s);
2532                             ff_mpeg4_clean_buffers(s);
2533                         }
2534                     break;
2535                     case AV_CODEC_ID_MPEG1VIDEO:
2536                     case AV_CODEC_ID_MPEG2VIDEO:
2537                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2538                             ff_mpeg1_encode_slice_header(s);
2539                             ff_mpeg1_clean_buffers(s);
2540                         }
2541                     break;
2542                     case AV_CODEC_ID_H263:
2543                     case AV_CODEC_ID_H263P:
2544                         if (CONFIG_H263_ENCODER)
2545                             ff_h263_encode_gob_header(s, mb_y);
2546                     break;
2547                     }
2548
2549                     if(s->flags&CODEC_FLAG_PASS1){
2550                         int bits= put_bits_count(&s->pb);
2551                         s->misc_bits+= bits - s->last_bits;
2552                         s->last_bits= bits;
2553                     }
2554
2555                     s->ptr_lastgob += current_packet_size;
2556                     s->first_slice_line=1;
2557                     s->resync_mb_x=mb_x;
2558                     s->resync_mb_y=mb_y;
2559                 }
2560             }
2561
2562             if(  (s->resync_mb_x   == s->mb_x)
2563                && s->resync_mb_y+1 == s->mb_y){
2564                 s->first_slice_line=0;
2565             }
2566
2567             s->mb_skipped=0;
2568             s->dquant=0; //only for QP_RD
2569
2570             update_mb_info(s, 0);
2571
2572             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2573                 int next_block=0;
2574                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2575
2576                 copy_context_before_encode(&backup_s, s, -1);
2577                 backup_s.pb= s->pb;
2578                 best_s.data_partitioning= s->data_partitioning;
2579                 best_s.partitioned_frame= s->partitioned_frame;
2580                 if(s->data_partitioning){
2581                     backup_s.pb2= s->pb2;
2582                     backup_s.tex_pb= s->tex_pb;
2583                 }
2584
2585                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2586                     s->mv_dir = MV_DIR_FORWARD;
2587                     s->mv_type = MV_TYPE_16X16;
2588                     s->mb_intra= 0;
2589                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2590                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2591                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2592                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2593                 }
2594                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2595                     s->mv_dir = MV_DIR_FORWARD;
2596                     s->mv_type = MV_TYPE_FIELD;
2597                     s->mb_intra= 0;
2598                     for(i=0; i<2; i++){
2599                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2600                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2601                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2602                     }
2603                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2604                                  &dmin, &next_block, 0, 0);
2605                 }
2606                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2607                     s->mv_dir = MV_DIR_FORWARD;
2608                     s->mv_type = MV_TYPE_16X16;
2609                     s->mb_intra= 0;
2610                     s->mv[0][0][0] = 0;
2611                     s->mv[0][0][1] = 0;
2612                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2613                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2614                 }
2615                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2616                     s->mv_dir = MV_DIR_FORWARD;
2617                     s->mv_type = MV_TYPE_8X8;
2618                     s->mb_intra= 0;
2619                     for(i=0; i<4; i++){
2620                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2621                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2622                     }
2623                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2624                                  &dmin, &next_block, 0, 0);
2625                 }
2626                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2627                     s->mv_dir = MV_DIR_FORWARD;
2628                     s->mv_type = MV_TYPE_16X16;
2629                     s->mb_intra= 0;
2630                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2631                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2632                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2633                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2634                 }
2635                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2636                     s->mv_dir = MV_DIR_BACKWARD;
2637                     s->mv_type = MV_TYPE_16X16;
2638                     s->mb_intra= 0;
2639                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2640                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2641                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2642                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2643                 }
2644                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2645                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2646                     s->mv_type = MV_TYPE_16X16;
2647                     s->mb_intra= 0;
2648                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2649                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2650                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2651                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2652                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2653                                  &dmin, &next_block, 0, 0);
2654                 }
2655                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2656                     s->mv_dir = MV_DIR_FORWARD;
2657                     s->mv_type = MV_TYPE_FIELD;
2658                     s->mb_intra= 0;
2659                     for(i=0; i<2; i++){
2660                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2661                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2662                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2663                     }
2664                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2665                                  &dmin, &next_block, 0, 0);
2666                 }
2667                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2668                     s->mv_dir = MV_DIR_BACKWARD;
2669                     s->mv_type = MV_TYPE_FIELD;
2670                     s->mb_intra= 0;
2671                     for(i=0; i<2; i++){
2672                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2673                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2674                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2675                     }
2676                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2677                                  &dmin, &next_block, 0, 0);
2678                 }
2679                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2680                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2681                     s->mv_type = MV_TYPE_FIELD;
2682                     s->mb_intra= 0;
2683                     for(dir=0; dir<2; dir++){
2684                         for(i=0; i<2; i++){
2685                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2686                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2687                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2688                         }
2689                     }
2690                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2691                                  &dmin, &next_block, 0, 0);
2692                 }
2693                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2694                     s->mv_dir = 0;
2695                     s->mv_type = MV_TYPE_16X16;
2696                     s->mb_intra= 1;
2697                     s->mv[0][0][0] = 0;
2698                     s->mv[0][0][1] = 0;
2699                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2700                                  &dmin, &next_block, 0, 0);
2701                     if(s->h263_pred || s->h263_aic){
2702                         if(best_s.mb_intra)
2703                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2704                         else
2705                             ff_clean_intra_table_entries(s); //old mode?
2706                     }
2707                 }
2708
2709                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2710                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2711                         const int last_qp= backup_s.qscale;
2712                         int qpi, qp, dc[6];
2713                         DCTELEM ac[6][16];
2714                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2715                         static const int dquant_tab[4]={-1,1,-2,2};
2716
2717                         assert(backup_s.dquant == 0);
2718
2719                         //FIXME intra
2720                         s->mv_dir= best_s.mv_dir;
2721                         s->mv_type = MV_TYPE_16X16;
2722                         s->mb_intra= best_s.mb_intra;
2723                         s->mv[0][0][0] = best_s.mv[0][0][0];
2724                         s->mv[0][0][1] = best_s.mv[0][0][1];
2725                         s->mv[1][0][0] = best_s.mv[1][0][0];
2726                         s->mv[1][0][1] = best_s.mv[1][0][1];
2727
2728                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2729                         for(; qpi<4; qpi++){
2730                             int dquant= dquant_tab[qpi];
2731                             qp= last_qp + dquant;
2732                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2733                                 continue;
2734                             backup_s.dquant= dquant;
2735                             if(s->mb_intra && s->dc_val[0]){
2736                                 for(i=0; i<6; i++){
2737                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2738                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2739                                 }
2740                             }
2741
2742                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2743                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2744                             if(best_s.qscale != qp){
2745                                 if(s->mb_intra && s->dc_val[0]){
2746                                     for(i=0; i<6; i++){
2747                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2748                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2749                                     }
2750                                 }
2751                             }
2752                         }
2753                     }
2754                 }
2755                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2756                     int mx= s->b_direct_mv_table[xy][0];
2757                     int my= s->b_direct_mv_table[xy][1];
2758
2759                     backup_s.dquant = 0;
2760                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2761                     s->mb_intra= 0;
2762                     ff_mpeg4_set_direct_mv(s, mx, my);
2763                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2764                                  &dmin, &next_block, mx, my);
2765                 }
2766                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2767                     backup_s.dquant = 0;
2768                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2769                     s->mb_intra= 0;
2770                     ff_mpeg4_set_direct_mv(s, 0, 0);
2771                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2772                                  &dmin, &next_block, 0, 0);
2773                 }
2774                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2775                     int coded=0;
2776                     for(i=0; i<6; i++)
2777                         coded |= s->block_last_index[i];
2778                     if(coded){
2779                         int mx,my;
2780                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2781                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2782                             mx=my=0; //FIXME find the one we actually used
2783                             ff_mpeg4_set_direct_mv(s, mx, my);
2784                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2785                             mx= s->mv[1][0][0];
2786                             my= s->mv[1][0][1];
2787                         }else{
2788                             mx= s->mv[0][0][0];
2789                             my= s->mv[0][0][1];
2790                         }
2791
2792                         s->mv_dir= best_s.mv_dir;
2793                         s->mv_type = best_s.mv_type;
2794                         s->mb_intra= 0;
2795 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2796                         s->mv[0][0][1] = best_s.mv[0][0][1];
2797                         s->mv[1][0][0] = best_s.mv[1][0][0];
2798                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2799                         backup_s.dquant= 0;
2800                         s->skipdct=1;
2801                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2802                                         &dmin, &next_block, mx, my);
2803                         s->skipdct=0;
2804                     }
2805                 }
2806
2807                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2808
2809                 copy_context_after_encode(s, &best_s, -1);
2810
2811                 pb_bits_count= put_bits_count(&s->pb);
2812                 flush_put_bits(&s->pb);
2813                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2814                 s->pb= backup_s.pb;
2815
2816                 if(s->data_partitioning){
2817                     pb2_bits_count= put_bits_count(&s->pb2);
2818                     flush_put_bits(&s->pb2);
2819                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2820                     s->pb2= backup_s.pb2;
2821
2822                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2823                     flush_put_bits(&s->tex_pb);
2824                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2825                     s->tex_pb= backup_s.tex_pb;
2826                 }
2827                 s->last_bits= put_bits_count(&s->pb);
2828
2829                 if (CONFIG_H263_ENCODER &&
2830                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2831                     ff_h263_update_motion_val(s);
2832
2833                 if(next_block==0){ //FIXME 16 vs linesize16
2834                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2835                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2836                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2837                 }
2838
2839                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2840                     ff_MPV_decode_mb(s, s->block);
2841             } else {
2842                 int motion_x = 0, motion_y = 0;
2843                 s->mv_type=MV_TYPE_16X16;
2844                 // only one MB-Type possible
2845
2846                 switch(mb_type){
2847                 case CANDIDATE_MB_TYPE_INTRA:
2848                     s->mv_dir = 0;
2849                     s->mb_intra= 1;
2850                     motion_x= s->mv[0][0][0] = 0;
2851                     motion_y= s->mv[0][0][1] = 0;
2852                     break;
2853                 case CANDIDATE_MB_TYPE_INTER:
2854                     s->mv_dir = MV_DIR_FORWARD;
2855                     s->mb_intra= 0;
2856                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2857                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2858                     break;
2859                 case CANDIDATE_MB_TYPE_INTER_I:
2860                     s->mv_dir = MV_DIR_FORWARD;
2861                     s->mv_type = MV_TYPE_FIELD;
2862                     s->mb_intra= 0;
2863                     for(i=0; i<2; i++){
2864                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2865                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2866                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2867                     }
2868                     break;
2869                 case CANDIDATE_MB_TYPE_INTER4V:
2870                     s->mv_dir = MV_DIR_FORWARD;
2871                     s->mv_type = MV_TYPE_8X8;
2872                     s->mb_intra= 0;
2873                     for(i=0; i<4; i++){
2874                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2875                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2876                     }
2877                     break;
2878                 case CANDIDATE_MB_TYPE_DIRECT:
2879                     if (CONFIG_MPEG4_ENCODER) {
2880                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2881                         s->mb_intra= 0;
2882                         motion_x=s->b_direct_mv_table[xy][0];
2883                         motion_y=s->b_direct_mv_table[xy][1];
2884                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2885                     }
2886                     break;
2887                 case CANDIDATE_MB_TYPE_DIRECT0:
2888                     if (CONFIG_MPEG4_ENCODER) {
2889                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2890                         s->mb_intra= 0;
2891                         ff_mpeg4_set_direct_mv(s, 0, 0);
2892                     }
2893                     break;
2894                 case CANDIDATE_MB_TYPE_BIDIR:
2895                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2896                     s->mb_intra= 0;
2897                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2898                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2899                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2900                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2901                     break;
2902                 case CANDIDATE_MB_TYPE_BACKWARD:
2903                     s->mv_dir = MV_DIR_BACKWARD;
2904                     s->mb_intra= 0;
2905                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2906                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2907                     break;
2908                 case CANDIDATE_MB_TYPE_FORWARD:
2909                     s->mv_dir = MV_DIR_FORWARD;
2910                     s->mb_intra= 0;
2911                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2912                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2913 //                    printf(" %d %d ", motion_x, motion_y);
2914                     break;
2915                 case CANDIDATE_MB_TYPE_FORWARD_I:
2916                     s->mv_dir = MV_DIR_FORWARD;
2917                     s->mv_type = MV_TYPE_FIELD;
2918                     s->mb_intra= 0;
2919                     for(i=0; i<2; i++){
2920                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2921                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2922                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2923                     }
2924                     break;
2925                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2926                     s->mv_dir = MV_DIR_BACKWARD;
2927                     s->mv_type = MV_TYPE_FIELD;
2928                     s->mb_intra= 0;
2929                     for(i=0; i<2; i++){
2930                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2931                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2932                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2933                     }
2934                     break;
2935                 case CANDIDATE_MB_TYPE_BIDIR_I:
2936                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2937                     s->mv_type = MV_TYPE_FIELD;
2938                     s->mb_intra= 0;
2939                     for(dir=0; dir<2; dir++){
2940                         for(i=0; i<2; i++){
2941                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2942                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2943                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2944                         }
2945                     }
2946                     break;
2947                 default:
2948                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2949                 }
2950
2951                 encode_mb(s, motion_x, motion_y);
2952
2953                 // RAL: Update last macroblock type
2954                 s->last_mv_dir = s->mv_dir;
2955
2956                 if (CONFIG_H263_ENCODER &&
2957                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2958                     ff_h263_update_motion_val(s);
2959
2960                 ff_MPV_decode_mb(s, s->block);
2961             }
2962
2963             /* clean the MV table in IPS frames for direct mode in B frames */
2964             if(s->mb_intra /* && I,P,S_TYPE */){
2965                 s->p_mv_table[xy][0]=0;
2966                 s->p_mv_table[xy][1]=0;
2967             }
2968
2969             if(s->flags&CODEC_FLAG_PSNR){
2970                 int w= 16;
2971                 int h= 16;
2972
2973                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2974                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2975
2976                 s->current_picture.f.error[0] += sse(
2977                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2978                     s->dest[0], w, h, s->linesize);
2979                 s->current_picture.f.error[1] += sse(
2980                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2981                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2982                 s->current_picture.f.error[2] += sse(
2983                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2984                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2985             }
2986             if(s->loop_filter){
2987                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2988                     ff_h263_loop_filter(s);
2989             }
2990 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
2991         }
2992     }
2993
2994     //not beautiful here but we must write it before flushing so it has to be here
2995     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
2996         ff_msmpeg4_encode_ext_header(s);
2997
2998     write_slice_end(s);
2999
3000     /* Send the last GOB if RTP */
3001     if (s->avctx->rtp_callback) {
3002         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3003         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3004         /* Call the RTP callback to send the last GOB */
3005         emms_c();
3006         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3007     }
3008
3009     return 0;
3010 }
3011
3012 #define MERGE(field) dst->field += src->field; src->field=0
3013 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3014     MERGE(me.scene_change_score);
3015     MERGE(me.mc_mb_var_sum_temp);
3016     MERGE(me.mb_var_sum_temp);
3017 }
3018
3019 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3020     int i;
3021
3022     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3023     MERGE(dct_count[1]);
3024     MERGE(mv_bits);
3025     MERGE(i_tex_bits);
3026     MERGE(p_tex_bits);
3027     MERGE(i_count);
3028     MERGE(f_count);
3029     MERGE(b_count);
3030     MERGE(skip_count);
3031     MERGE(misc_bits);
3032     MERGE(error_count);
3033     MERGE(padding_bug_score);
3034     MERGE(current_picture.f.error[0]);
3035     MERGE(current_picture.f.error[1]);
3036     MERGE(current_picture.f.error[2]);
3037
3038     if(dst->avctx->noise_reduction){
3039         for(i=0; i<64; i++){
3040             MERGE(dct_error_sum[0][i]);
3041             MERGE(dct_error_sum[1][i]);
3042         }
3043     }
3044
3045     assert(put_bits_count(&src->pb) % 8 ==0);
3046     assert(put_bits_count(&dst->pb) % 8 ==0);
3047     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3048     flush_put_bits(&dst->pb);
3049 }
3050
3051 static int estimate_qp(MpegEncContext *s, int dry_run){
3052     if (s->next_lambda){
3053         s->current_picture_ptr->f.quality =
3054         s->current_picture.f.quality = s->next_lambda;
3055         if(!dry_run) s->next_lambda= 0;
3056     } else if (!s->fixed_qscale) {
3057         s->current_picture_ptr->f.quality =
3058         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3059         if (s->current_picture.f.quality < 0)
3060             return -1;
3061     }
3062
3063     if(s->adaptive_quant){
3064         switch(s->codec_id){
3065         case AV_CODEC_ID_MPEG4:
3066             if (CONFIG_MPEG4_ENCODER)
3067                 ff_clean_mpeg4_qscales(s);
3068             break;
3069         case AV_CODEC_ID_H263:
3070         case AV_CODEC_ID_H263P:
3071         case AV_CODEC_ID_FLV1:
3072             if (CONFIG_H263_ENCODER)
3073                 ff_clean_h263_qscales(s);
3074             break;
3075         default:
3076             ff_init_qscale_tab(s);
3077         }
3078
3079         s->lambda= s->lambda_table[0];
3080         //FIXME broken
3081     }else
3082         s->lambda = s->current_picture.f.quality;
3083 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3084     update_qscale(s);
3085     return 0;
3086 }
3087
3088 /* must be called before writing the header */
3089 static void set_frame_distances(MpegEncContext * s){
3090     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3091     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3092
3093     if(s->pict_type==AV_PICTURE_TYPE_B){
3094         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3095         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3096     }else{
3097         s->pp_time= s->time - s->last_non_b_time;
3098         s->last_non_b_time= s->time;
3099         assert(s->picture_number==0 || s->pp_time > 0);
3100     }
3101 }
3102
3103 static int encode_picture(MpegEncContext *s, int picture_number)
3104 {
3105     int i;
3106     int bits;
3107     int context_count = s->slice_context_count;
3108
3109     s->picture_number = picture_number;
3110
3111     /* Reset the average MB variance */
3112     s->me.mb_var_sum_temp    =
3113     s->me.mc_mb_var_sum_temp = 0;
3114
3115     /* we need to initialize some time vars before we can encode b-frames */
3116     // RAL: Condition added for MPEG1VIDEO
3117     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3118         set_frame_distances(s);
3119     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3120         ff_set_mpeg4_time(s);
3121
3122     s->me.scene_change_score=0;
3123
3124 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3125
3126     if(s->pict_type==AV_PICTURE_TYPE_I){
3127         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3128         else                        s->no_rounding=0;
3129     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3130         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3131             s->no_rounding ^= 1;
3132     }
3133
3134     if(s->flags & CODEC_FLAG_PASS2){
3135         if (estimate_qp(s,1) < 0)
3136             return -1;
3137         ff_get_2pass_fcode(s);
3138     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3139         if(s->pict_type==AV_PICTURE_TYPE_B)
3140             s->lambda= s->last_lambda_for[s->pict_type];
3141         else
3142             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3143         update_qscale(s);
3144     }
3145
3146     s->mb_intra=0; //for the rate distortion & bit compare functions
3147     for(i=1; i<context_count; i++){
3148         ff_update_duplicate_context(s->thread_context[i], s);
3149     }
3150
3151     if(ff_init_me(s)<0)
3152         return -1;
3153
3154     /* Estimate motion for every MB */
3155     if(s->pict_type != AV_PICTURE_TYPE_I){
3156         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3157         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3158         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3159             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3160                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3161             }
3162         }
3163
3164         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3165     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3166         /* I-Frame */
3167         for(i=0; i<s->mb_stride*s->mb_height; i++)
3168             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3169
3170         if(!s->fixed_qscale){
3171             /* finding spatial complexity for I-frame rate control */
3172             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3173         }
3174     }
3175     for(i=1; i<context_count; i++){
3176         merge_context_after_me(s, s->thread_context[i]);
3177     }
3178     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3179     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3180     emms_c();
3181
3182     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3183         s->pict_type= AV_PICTURE_TYPE_I;
3184         for(i=0; i<s->mb_stride*s->mb_height; i++)
3185             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3186 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3187     }
3188
3189     if(!s->umvplus){
3190         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3191             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3192
3193             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3194                 int a,b;
3195                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3196                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3197                 s->f_code= FFMAX3(s->f_code, a, b);
3198             }
3199
3200             ff_fix_long_p_mvs(s);
3201             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3202             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3203                 int j;
3204                 for(i=0; i<2; i++){
3205                     for(j=0; j<2; j++)
3206                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3207                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3208                 }
3209             }
3210         }
3211
3212         if(s->pict_type==AV_PICTURE_TYPE_B){
3213             int a, b;
3214
3215             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3216             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3217             s->f_code = FFMAX(a, b);
3218
3219             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3220             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3221             s->b_code = FFMAX(a, b);
3222
3223             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3224             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3225             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3226             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3227             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3228                 int dir, j;
3229                 for(dir=0; dir<2; dir++){
3230                     for(i=0; i<2; i++){
3231                         for(j=0; j<2; j++){
3232                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3233                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3234                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3235                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3236                         }
3237                     }
3238                 }
3239             }
3240         }
3241     }
3242
3243     if (estimate_qp(s, 0) < 0)
3244         return -1;
3245
3246     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3247         s->qscale= 3; //reduce clipping problems
3248
3249     if (s->out_format == FMT_MJPEG) {
3250         /* for mjpeg, we do include qscale in the matrix */
3251         for(i=1;i<64;i++){
3252             int j= s->dsp.idct_permutation[i];
3253
3254             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3255         }
3256         s->y_dc_scale_table=
3257         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3258         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3259         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3260                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3261         s->qscale= 8;
3262     }
3263
3264     //FIXME var duplication
3265     s->current_picture_ptr->f.key_frame =
3266     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3267     s->current_picture_ptr->f.pict_type =
3268     s->current_picture.f.pict_type = s->pict_type;
3269
3270     if (s->current_picture.f.key_frame)
3271         s->picture_in_gop_number=0;
3272
3273     s->last_bits= put_bits_count(&s->pb);
3274     switch(s->out_format) {
3275     case FMT_MJPEG:
3276         if (CONFIG_MJPEG_ENCODER)
3277             ff_mjpeg_encode_picture_header(s);
3278         break;
3279     case FMT_H261:
3280         if (CONFIG_H261_ENCODER)
3281             ff_h261_encode_picture_header(s, picture_number);
3282         break;
3283     case FMT_H263:
3284         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3285             ff_wmv2_encode_picture_header(s, picture_number);
3286         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3287             ff_msmpeg4_encode_picture_header(s, picture_number);
3288         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3289             ff_mpeg4_encode_picture_header(s, picture_number);
3290         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3291             ff_rv10_encode_picture_header(s, picture_number);
3292         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3293             ff_rv20_encode_picture_header(s, picture_number);
3294         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3295             ff_flv_encode_picture_header(s, picture_number);
3296         else if (CONFIG_H263_ENCODER)
3297             ff_h263_encode_picture_header(s, picture_number);
3298         break;
3299     case FMT_MPEG1:
3300         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3301             ff_mpeg1_encode_picture_header(s, picture_number);
3302         break;
3303     case FMT_H264:
3304         break;
3305     default:
3306         assert(0);
3307     }
3308     bits= put_bits_count(&s->pb);
3309     s->header_bits= bits - s->last_bits;
3310
3311     for(i=1; i<context_count; i++){
3312         update_duplicate_context_after_me(s->thread_context[i], s);
3313     }
3314     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3315     for(i=1; i<context_count; i++){
3316         merge_context_after_encode(s, s->thread_context[i]);
3317     }
3318     emms_c();
3319     return 0;
3320 }
3321
3322 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3323     const int intra= s->mb_intra;
3324     int i;
3325
3326     s->dct_count[intra]++;
3327
3328     for(i=0; i<64; i++){
3329         int level= block[i];
3330
3331         if(level){
3332             if(level>0){
3333                 s->dct_error_sum[intra][i] += level;
3334                 level -= s->dct_offset[intra][i];
3335                 if(level<0) level=0;
3336             }else{
3337                 s->dct_error_sum[intra][i] -= level;
3338                 level += s->dct_offset[intra][i];
3339                 if(level>0) level=0;
3340             }
3341             block[i]= level;
3342         }
3343     }
3344 }
3345
3346 static int dct_quantize_trellis_c(MpegEncContext *s,
3347                                   DCTELEM *block, int n,
3348                                   int qscale, int *overflow){
3349     const int *qmat;
3350     const uint8_t *scantable= s->intra_scantable.scantable;
3351     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3352     int max=0;
3353     unsigned int threshold1, threshold2;
3354     int bias=0;
3355     int run_tab[65];
3356     int level_tab[65];
3357     int score_tab[65];
3358     int survivor[65];
3359     int survivor_count;
3360     int last_run=0;
3361     int last_level=0;
3362     int last_score= 0;
3363     int last_i;
3364     int coeff[2][64];
3365     int coeff_count[64];
3366     int qmul, qadd, start_i, last_non_zero, i, dc;
3367     const int esc_length= s->ac_esc_length;
3368     uint8_t * length;
3369     uint8_t * last_length;
3370     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3371
3372     s->dsp.fdct (block);
3373
3374     if(s->dct_error_sum)
3375         s->denoise_dct(s, block);
3376     qmul= qscale*16;
3377     qadd= ((qscale-1)|1)*8;
3378
3379     if (s->mb_intra) {
3380         int q;
3381         if (!s->h263_aic) {
3382             if (n < 4)
3383                 q = s->y_dc_scale;
3384             else
3385                 q = s->c_dc_scale;
3386             q = q << 3;
3387         } else{
3388             /* For AIC we skip quant/dequant of INTRADC */
3389             q = 1 << 3;
3390             qadd=0;
3391         }
3392
3393         /* note: block[0] is assumed to be positive */
3394         block[0] = (block[0] + (q >> 1)) / q;
3395         start_i = 1;
3396         last_non_zero = 0;
3397         qmat = s->q_intra_matrix[qscale];
3398         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3399             bias= 1<<(QMAT_SHIFT-1);
3400         length     = s->intra_ac_vlc_length;
3401         last_length= s->intra_ac_vlc_last_length;
3402     } else {
3403         start_i = 0;
3404         last_non_zero = -1;
3405         qmat = s->q_inter_matrix[qscale];
3406         length     = s->inter_ac_vlc_length;
3407         last_length= s->inter_ac_vlc_last_length;
3408     }
3409     last_i= start_i;
3410
3411     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3412     threshold2= (threshold1<<1);
3413
3414     for(i=63; i>=start_i; i--) {
3415         const int j = scantable[i];
3416         int level = block[j] * qmat[j];
3417
3418         if(((unsigned)(level+threshold1))>threshold2){
3419             last_non_zero = i;
3420             break;
3421         }
3422     }
3423
3424     for(i=start_i; i<=last_non_zero; i++) {
3425         const int j = scantable[i];
3426         int level = block[j] * qmat[j];
3427
3428 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3429 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3430         if(((unsigned)(level+threshold1))>threshold2){
3431             if(level>0){
3432                 level= (bias + level)>>QMAT_SHIFT;
3433                 coeff[0][i]= level;
3434                 coeff[1][i]= level-1;
3435 //                coeff[2][k]= level-2;
3436             }else{
3437                 level= (bias - level)>>QMAT_SHIFT;
3438                 coeff[0][i]= -level;
3439                 coeff[1][i]= -level+1;
3440 //                coeff[2][k]= -level+2;
3441             }
3442             coeff_count[i]= FFMIN(level, 2);
3443             assert(coeff_count[i]);
3444             max |=level;
3445         }else{
3446             coeff[0][i]= (level>>31)|1;
3447             coeff_count[i]= 1;
3448         }
3449     }
3450
3451     *overflow= s->max_qcoeff < max; //overflow might have happened
3452
3453     if(last_non_zero < start_i){
3454         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3455         return last_non_zero;
3456     }
3457
3458     score_tab[start_i]= 0;
3459     survivor[0]= start_i;
3460     survivor_count= 1;
3461
3462     for(i=start_i; i<=last_non_zero; i++){
3463         int level_index, j, zero_distortion;
3464         int dct_coeff= FFABS(block[ scantable[i] ]);
3465         int best_score=256*256*256*120;
3466
3467         if (s->dsp.fdct == ff_fdct_ifast)
3468             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3469         zero_distortion= dct_coeff*dct_coeff;
3470
3471         for(level_index=0; level_index < coeff_count[i]; level_index++){
3472             int distortion;
3473             int level= coeff[level_index][i];
3474             const int alevel= FFABS(level);
3475             int unquant_coeff;
3476
3477             assert(level);
3478
3479             if(s->out_format == FMT_H263){
3480                 unquant_coeff= alevel*qmul + qadd;
3481             }else{ //MPEG1
3482                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3483                 if(s->mb_intra){
3484                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3485                         unquant_coeff =   (unquant_coeff - 1) | 1;
3486                 }else{
3487                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3488                         unquant_coeff =   (unquant_coeff - 1) | 1;
3489                 }
3490                 unquant_coeff<<= 3;
3491             }
3492
3493             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3494             level+=64;
3495             if((level&(~127)) == 0){
3496                 for(j=survivor_count-1; j>=0; j--){
3497                     int run= i - survivor[j];
3498                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3499                     score += score_tab[i-run];
3500
3501                     if(score < best_score){
3502                         best_score= score;
3503                         run_tab[i+1]= run;
3504                         level_tab[i+1]= level-64;
3505                     }
3506                 }
3507
3508                 if(s->out_format == FMT_H263){
3509                     for(j=survivor_count-1; j>=0; j--){
3510                         int run= i - survivor[j];
3511                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3512                         score += score_tab[i-run];
3513                         if(score < last_score){
3514                             last_score= score;
3515                             last_run= run;
3516                             last_level= level-64;
3517                             last_i= i+1;
3518                         }
3519                     }
3520                 }
3521             }else{
3522                 distortion += esc_length*lambda;
3523                 for(j=survivor_count-1; j>=0; j--){
3524                     int run= i - survivor[j];
3525                     int score= distortion + score_tab[i-run];
3526
3527                     if(score < best_score){
3528                         best_score= score;
3529                         run_tab[i+1]= run;
3530                         level_tab[i+1]= level-64;
3531                     }
3532                 }
3533
3534                 if(s->out_format == FMT_H263){
3535                   for(j=survivor_count-1; j>=0; j--){
3536                         int run= i - survivor[j];
3537                         int score= distortion + score_tab[i-run];
3538                         if(score < last_score){
3539                             last_score= score;
3540                             last_run= run;
3541                             last_level= level-64;
3542                             last_i= i+1;
3543                         }
3544                     }
3545                 }
3546             }
3547         }
3548
3549         score_tab[i+1]= best_score;
3550
3551         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3552         if(last_non_zero <= 27){
3553             for(; survivor_count; survivor_count--){
3554                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3555                     break;
3556             }
3557         }else{
3558             for(; survivor_count; survivor_count--){
3559                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3560                     break;
3561             }
3562         }
3563
3564         survivor[ survivor_count++ ]= i+1;
3565     }
3566
3567     if(s->out_format != FMT_H263){
3568         last_score= 256*256*256*120;
3569         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3570             int score= score_tab[i];
3571             if(i) score += lambda*2; //FIXME exacter?
3572
3573             if(score < last_score){
3574                 last_score= score;
3575                 last_i= i;
3576                 last_level= level_tab[i];
3577                 last_run= run_tab[i];
3578             }
3579         }
3580     }
3581
3582     s->coded_score[n] = last_score;
3583
3584     dc= FFABS(block[0]);
3585     last_non_zero= last_i - 1;
3586     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3587
3588     if(last_non_zero < start_i)
3589         return last_non_zero;
3590
3591     if(last_non_zero == 0 && start_i == 0){
3592         int best_level= 0;
3593         int best_score= dc * dc;
3594
3595         for(i=0; i<coeff_count[0]; i++){
3596             int level= coeff[i][0];
3597             int alevel= FFABS(level);
3598             int unquant_coeff, score, distortion;
3599
3600             if(s->out_format == FMT_H263){
3601                     unquant_coeff= (alevel*qmul + qadd)>>3;
3602             }else{ //MPEG1
3603                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3604                     unquant_coeff =   (unquant_coeff - 1) | 1;
3605             }
3606             unquant_coeff = (unquant_coeff + 4) >> 3;
3607             unquant_coeff<<= 3 + 3;
3608
3609             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3610             level+=64;
3611             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3612             else                    score= distortion + esc_length*lambda;
3613
3614             if(score < best_score){
3615                 best_score= score;
3616                 best_level= level - 64;
3617             }
3618         }
3619         block[0]= best_level;
3620         s->coded_score[n] = best_score - dc*dc;
3621         if(best_level == 0) return -1;
3622         else                return last_non_zero;
3623     }
3624
3625     i= last_i;
3626     assert(last_level);
3627
3628     block[ perm_scantable[last_non_zero] ]= last_level;
3629     i -= last_run + 1;
3630
3631     for(; i>start_i; i -= run_tab[i] + 1){
3632         block[ perm_scantable[i-1] ]= level_tab[i];
3633     }
3634
3635     return last_non_zero;
3636 }
3637
3638 //#define REFINE_STATS 1
3639 static int16_t basis[64][64];
3640
3641 static void build_basis(uint8_t *perm){
3642     int i, j, x, y;
3643     emms_c();
3644     for(i=0; i<8; i++){
3645         for(j=0; j<8; j++){
3646             for(y=0; y<8; y++){
3647                 for(x=0; x<8; x++){
3648                     double s= 0.25*(1<<BASIS_SHIFT);
3649                     int index= 8*i + j;
3650                     int perm_index= perm[index];
3651                     if(i==0) s*= sqrt(0.5);
3652                     if(j==0) s*= sqrt(0.5);
3653                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3654                 }
3655             }
3656         }
3657     }
3658 }
3659
3660 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3661                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3662                         int n, int qscale){
3663     int16_t rem[64];
3664     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3665     const uint8_t *scantable= s->intra_scantable.scantable;
3666     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3667 //    unsigned int threshold1, threshold2;
3668 //    int bias=0;
3669     int run_tab[65];
3670     int prev_run=0;
3671     int prev_level=0;
3672     int qmul, qadd, start_i, last_non_zero, i, dc;
3673     uint8_t * length;
3674     uint8_t * last_length;
3675     int lambda;
3676     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3677 #ifdef REFINE_STATS
3678 static int count=0;
3679 static int after_last=0;
3680 static int to_zero=0;
3681 static int from_zero=0;
3682 static int raise=0;
3683 static int lower=0;
3684 static int messed_sign=0;
3685 #endif
3686
3687     if(basis[0][0] == 0)
3688         build_basis(s->dsp.idct_permutation);
3689
3690     qmul= qscale*2;
3691     qadd= (qscale-1)|1;
3692     if (s->mb_intra) {
3693         if (!s->h263_aic) {
3694             if (n < 4)
3695                 q = s->y_dc_scale;
3696             else
3697                 q = s->c_dc_scale;
3698         } else{
3699             /* For AIC we skip quant/dequant of INTRADC */
3700             q = 1;
3701             qadd=0;
3702         }
3703         q <<= RECON_SHIFT-3;
3704         /* note: block[0] is assumed to be positive */
3705         dc= block[0]*q;
3706 //        block[0] = (block[0] + (q >> 1)) / q;
3707         start_i = 1;
3708 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3709 //            bias= 1<<(QMAT_SHIFT-1);
3710         length     = s->intra_ac_vlc_length;
3711         last_length= s->intra_ac_vlc_last_length;
3712     } else {
3713         dc= 0;
3714         start_i = 0;
3715         length     = s->inter_ac_vlc_length;
3716         last_length= s->inter_ac_vlc_last_length;
3717     }
3718     last_non_zero = s->block_last_index[n];
3719
3720 #ifdef REFINE_STATS
3721 {START_TIMER
3722 #endif
3723     dc += (1<<(RECON_SHIFT-1));
3724     for(i=0; i<64; i++){
3725         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3726     }
3727 #ifdef REFINE_STATS
3728 STOP_TIMER("memset rem[]")}
3729 #endif
3730     sum=0;
3731     for(i=0; i<64; i++){
3732         int one= 36;
3733         int qns=4;
3734         int w;
3735
3736         w= FFABS(weight[i]) + qns*one;
3737         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3738
3739         weight[i] = w;
3740 //        w=weight[i] = (63*qns + (w/2)) / w;
3741
3742         assert(w>0);
3743         assert(w<(1<<6));
3744         sum += w*w;
3745     }
3746     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3747 #ifdef REFINE_STATS
3748 {START_TIMER
3749 #endif
3750     run=0;
3751     rle_index=0;
3752     for(i=start_i; i<=last_non_zero; i++){
3753         int j= perm_scantable[i];
3754         const int level= block[j];
3755         int coeff;
3756
3757         if(level){
3758             if(level<0) coeff= qmul*level - qadd;
3759             else        coeff= qmul*level + qadd;
3760             run_tab[rle_index++]=run;
3761             run=0;
3762
3763             s->dsp.add_8x8basis(rem, basis[j], coeff);
3764         }else{
3765             run++;
3766         }
3767     }
3768 #ifdef REFINE_STATS
3769 if(last_non_zero>0){
3770 STOP_TIMER("init rem[]")
3771 }
3772 }
3773
3774 {START_TIMER
3775 #endif
3776     for(;;){
3777         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3778         int best_coeff=0;
3779         int best_change=0;
3780         int run2, best_unquant_change=0, analyze_gradient;
3781 #ifdef REFINE_STATS
3782 {START_TIMER
3783 #endif
3784         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3785
3786         if(analyze_gradient){
3787 #ifdef REFINE_STATS
3788 {START_TIMER
3789 #endif
3790             for(i=0; i<64; i++){
3791                 int w= weight[i];
3792
3793                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3794             }
3795 #ifdef REFINE_STATS
3796 STOP_TIMER("rem*w*w")}
3797 {START_TIMER
3798 #endif
3799             s->dsp.fdct(d1);
3800 #ifdef REFINE_STATS
3801 STOP_TIMER("dct")}
3802 #endif
3803         }
3804
3805         if(start_i){
3806             const int level= block[0];
3807             int change, old_coeff;
3808
3809             assert(s->mb_intra);
3810
3811             old_coeff= q*level;
3812
3813             for(change=-1; change<=1; change+=2){
3814                 int new_level= level + change;
3815                 int score, new_coeff;
3816
3817                 new_coeff= q*new_level;
3818                 if(new_coeff >= 2048 || new_coeff < 0)
3819                     continue;
3820
3821                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3822                 if(score<best_score){
3823                     best_score= score;
3824                     best_coeff= 0;
3825                     best_change= change;
3826                     best_unquant_change= new_coeff - old_coeff;
3827                 }
3828             }
3829         }
3830
3831         run=0;
3832         rle_index=0;
3833         run2= run_tab[rle_index++];
3834         prev_level=0;
3835         prev_run=0;
3836
3837         for(i=start_i; i<64; i++){
3838             int j= perm_scantable[i];
3839             const int level= block[j];
3840             int change, old_coeff;
3841
3842             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3843                 break;
3844
3845             if(level){
3846                 if(level<0) old_coeff= qmul*level - qadd;
3847                 else        old_coeff= qmul*level + qadd;
3848                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3849             }else{
3850                 old_coeff=0;
3851                 run2--;
3852                 assert(run2>=0 || i >= last_non_zero );
3853             }
3854
3855             for(change=-1; change<=1; change+=2){
3856                 int new_level= level + change;
3857                 int score, new_coeff, unquant_change;
3858
3859                 score=0;
3860                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3861                    continue;
3862
3863                 if(new_level){
3864                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3865                     else            new_coeff= qmul*new_level + qadd;
3866                     if(new_coeff >= 2048 || new_coeff <= -2048)
3867                         continue;
3868                     //FIXME check for overflow
3869
3870                     if(level){
3871                         if(level < 63 && level > -63){
3872                             if(i < last_non_zero)
3873                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3874                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3875                             else
3876                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3877                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3878                         }
3879                     }else{
3880                         assert(FFABS(new_level)==1);
3881
3882                         if(analyze_gradient){
3883                             int g= d1[ scantable[i] ];
3884                             if(g && (g^new_level) >= 0)
3885                                 continue;
3886                         }
3887
3888                         if(i < last_non_zero){
3889                             int next_i= i + run2 + 1;
3890                             int next_level= block[ perm_scantable[next_i] ] + 64;
3891
3892                             if(next_level&(~127))
3893                                 next_level= 0;
3894
3895                             if(next_i < last_non_zero)
3896                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3897                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3898                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3899                             else
3900                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3901                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3902                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3903                         }else{
3904                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3905                             if(prev_level){
3906                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3907                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3908                             }
3909                         }
3910                     }
3911                 }else{
3912                     new_coeff=0;
3913                     assert(FFABS(level)==1);
3914
3915                     if(i < last_non_zero){
3916                         int next_i= i + run2 + 1;
3917                         int next_level= block[ perm_scantable[next_i] ] + 64;
3918
3919                         if(next_level&(~127))
3920                             next_level= 0;
3921
3922                         if(next_i < last_non_zero)
3923                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3924                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3925                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3926                         else
3927                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3928                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3929                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3930                     }else{
3931                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3932                         if(prev_level){
3933                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3934                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3935                         }
3936                     }
3937                 }
3938
3939                 score *= lambda;
3940
3941                 unquant_change= new_coeff - old_coeff;
3942                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3943
3944                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3945                 if(score<best_score){
3946                     best_score= score;
3947                     best_coeff= i;
3948                     best_change= change;
3949                     best_unquant_change= unquant_change;
3950                 }
3951             }
3952             if(level){
3953                 prev_level= level + 64;
3954                 if(prev_level&(~127))
3955                     prev_level= 0;
3956                 prev_run= run;
3957                 run=0;
3958             }else{
3959                 run++;
3960             }
3961         }
3962 #ifdef REFINE_STATS
3963 STOP_TIMER("iterative step")}
3964 #endif
3965
3966         if(best_change){
3967             int j= perm_scantable[ best_coeff ];
3968
3969             block[j] += best_change;
3970
3971             if(best_coeff > last_non_zero){
3972                 last_non_zero= best_coeff;
3973                 assert(block[j]);
3974 #ifdef REFINE_STATS
3975 after_last++;
3976 #endif
3977             }else{
3978 #ifdef REFINE_STATS
3979 if(block[j]){
3980     if(block[j] - best_change){
3981         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3982             raise++;
3983         }else{
3984             lower++;
3985         }
3986     }else{
3987         from_zero++;
3988     }
3989 }else{
3990     to_zero++;
3991 }
3992 #endif
3993                 for(; last_non_zero>=start_i; last_non_zero--){
3994                     if(block[perm_scantable[last_non_zero]])
3995                         break;
3996                 }
3997             }
3998 #ifdef REFINE_STATS
3999 count++;
4000 if(256*256*256*64 % count == 0){
4001     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4002 }
4003 #endif
4004             run=0;
4005             rle_index=0;
4006             for(i=start_i; i<=last_non_zero; i++){
4007                 int j= perm_scantable[i];
4008                 const int level= block[j];
4009
4010                  if(level){
4011                      run_tab[rle_index++]=run;
4012                      run=0;
4013                  }else{
4014                      run++;
4015                  }
4016             }
4017
4018             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4019         }else{
4020             break;
4021         }
4022     }
4023 #ifdef REFINE_STATS
4024 if(last_non_zero>0){
4025 STOP_TIMER("iterative search")
4026 }
4027 }
4028 #endif
4029
4030     return last_non_zero;
4031 }
4032
4033 int ff_dct_quantize_c(MpegEncContext *s,
4034                         DCTELEM *block, int n,
4035                         int qscale, int *overflow)
4036 {
4037     int i, j, level, last_non_zero, q, start_i;
4038     const int *qmat;
4039     const uint8_t *scantable= s->intra_scantable.scantable;
4040     int bias;
4041     int max=0;
4042     unsigned int threshold1, threshold2;
4043
4044     s->dsp.fdct (block);
4045
4046     if(s->dct_error_sum)
4047         s->denoise_dct(s, block);
4048
4049     if (s->mb_intra) {
4050         if (!s->h263_aic) {
4051             if (n < 4)
4052                 q = s->y_dc_scale;
4053             else
4054                 q = s->c_dc_scale;
4055             q = q << 3;
4056         } else
4057             /* For AIC we skip quant/dequant of INTRADC */
4058             q = 1 << 3;
4059
4060         /* note: block[0] is assumed to be positive */
4061         block[0] = (block[0] + (q >> 1)) / q;
4062         start_i = 1;
4063         last_non_zero = 0;
4064         qmat = s->q_intra_matrix[qscale];
4065         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4066     } else {
4067         start_i = 0;
4068         last_non_zero = -1;
4069         qmat = s->q_inter_matrix[qscale];
4070         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4071     }
4072     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4073     threshold2= (threshold1<<1);
4074     for(i=63;i>=start_i;i--) {
4075         j = scantable[i];
4076         level = block[j] * qmat[j];
4077
4078         if(((unsigned)(level+threshold1))>threshold2){
4079             last_non_zero = i;
4080             break;
4081         }else{
4082             block[j]=0;
4083         }
4084     }
4085     for(i=start_i; i<=last_non_zero; i++) {
4086         j = scantable[i];
4087         level = block[j] * qmat[j];
4088
4089 //        if(   bias+level >= (1<<QMAT_SHIFT)
4090 //           || bias-level >= (1<<QMAT_SHIFT)){
4091         if(((unsigned)(level+threshold1))>threshold2){
4092             if(level>0){
4093                 level= (bias + level)>>QMAT_SHIFT;
4094                 block[j]= level;
4095             }else{
4096                 level= (bias - level)>>QMAT_SHIFT;
4097                 block[j]= -level;
4098             }
4099             max |=level;
4100         }else{
4101             block[j]=0;
4102         }
4103     }
4104     *overflow= s->max_qcoeff < max; //overflow might have happened
4105
4106     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4107     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4108         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4109
4110     return last_non_zero;
4111 }
4112
4113 #define OFFSET(x) offsetof(MpegEncContext, x)
4114 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4115 static const AVOption h263_options[] = {
4116     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4117     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4118     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { 0 }, 0, INT_MAX, VE },
4119     FF_MPV_COMMON_OPTS
4120     { NULL },
4121 };
4122
4123 static const AVClass h263_class = {
4124     .class_name = "H.263 encoder",
4125     .item_name  = av_default_item_name,
4126     .option     = h263_options,
4127     .version    = LIBAVUTIL_VERSION_INT,
4128 };
4129
4130 AVCodec ff_h263_encoder = {
4131     .name           = "h263",
4132     .type           = AVMEDIA_TYPE_VIDEO,
4133     .id             = AV_CODEC_ID_H263,
4134     .priv_data_size = sizeof(MpegEncContext),
4135     .init           = ff_MPV_encode_init,
4136     .encode2        = ff_MPV_encode_picture,
4137     .close          = ff_MPV_encode_end,
4138     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4139     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4140     .priv_class     = &h263_class,
4141 };
4142
4143 static const AVOption h263p_options[] = {
4144     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4145     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4146     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4147     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4148     FF_MPV_COMMON_OPTS
4149     { NULL },
4150 };
4151 static const AVClass h263p_class = {
4152     .class_name = "H.263p encoder",
4153     .item_name  = av_default_item_name,
4154     .option     = h263p_options,
4155     .version    = LIBAVUTIL_VERSION_INT,
4156 };
4157
4158 AVCodec ff_h263p_encoder = {
4159     .name           = "h263p",
4160     .type           = AVMEDIA_TYPE_VIDEO,
4161     .id             = AV_CODEC_ID_H263P,
4162     .priv_data_size = sizeof(MpegEncContext),
4163     .init           = ff_MPV_encode_init,
4164     .encode2        = ff_MPV_encode_picture,
4165     .close          = ff_MPV_encode_end,
4166     .capabilities   = CODEC_CAP_SLICE_THREADS,
4167     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4168     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4169     .priv_class     = &h263p_class,
4170 };
4171
4172 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4173
4174 AVCodec ff_msmpeg4v2_encoder = {
4175     .name           = "msmpeg4v2",
4176     .type           = AVMEDIA_TYPE_VIDEO,
4177     .id             = AV_CODEC_ID_MSMPEG4V2,
4178     .priv_data_size = sizeof(MpegEncContext),
4179     .init           = ff_MPV_encode_init,
4180     .encode2        = ff_MPV_encode_picture,
4181     .close          = ff_MPV_encode_end,
4182     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4183     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4184     .priv_class     = &msmpeg4v2_class,
4185 };
4186
4187 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4188
4189 AVCodec ff_msmpeg4v3_encoder = {
4190     .name           = "msmpeg4",
4191     .type           = AVMEDIA_TYPE_VIDEO,
4192     .id             = AV_CODEC_ID_MSMPEG4V3,
4193     .priv_data_size = sizeof(MpegEncContext),
4194     .init           = ff_MPV_encode_init,
4195     .encode2        = ff_MPV_encode_picture,
4196     .close          = ff_MPV_encode_end,
4197     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4198     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4199     .priv_class     = &msmpeg4v3_class,
4200 };
4201
4202 FF_MPV_GENERIC_CLASS(wmv1)
4203
4204 AVCodec ff_wmv1_encoder = {
4205     .name           = "wmv1",
4206     .type           = AVMEDIA_TYPE_VIDEO,
4207     .id             = AV_CODEC_ID_WMV1,
4208     .priv_data_size = sizeof(MpegEncContext),
4209     .init           = ff_MPV_encode_init,
4210     .encode2        = ff_MPV_encode_picture,
4211     .close          = ff_MPV_encode_end,
4212     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4213     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4214     .priv_class     = &wmv1_class,
4215 };