]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
17a2512554492e2f8c703e68ae9bab0acc507c8e
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "mpegvideo_common.h"
37 #include "h263.h"
38 #include "mjpegenc.h"
39 #include "msmpeg4.h"
40 #include "faandct.h"
41 #include "thread.h"
42 #include "aandcttab.h"
43 #include "flv.h"
44 #include "mpeg4video.h"
45 #include "internal.h"
46 #include "bytestream.h"
47 #include <limits.h>
48 #include "sp5x.h"
49
50 //#undef NDEBUG
51 //#include <assert.h>
52
53 static int encode_picture(MpegEncContext *s, int picture_number);
54 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
55 static int sse_mb(MpegEncContext *s);
56 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
57 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
58
59 /* enable all paranoid tests for rounding, overflows, etc... */
60 //#define PARANOID
61
62 //#define DEBUG
63
64 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
65 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
66
67 const AVOption ff_mpv_generic_options[] = {
68     FF_MPV_COMMON_OPTS
69     { NULL },
70 };
71
72 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
73                        uint16_t (*qmat16)[2][64],
74                        const uint16_t *quant_matrix,
75                        int bias, int qmin, int qmax, int intra)
76 {
77     int qscale;
78     int shift = 0;
79
80     for (qscale = qmin; qscale <= qmax; qscale++) {
81         int i;
82         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
83             dsp->fdct == ff_jpeg_fdct_islow_10 ||
84             dsp->fdct == ff_faandct) {
85             for (i = 0; i < 64; i++) {
86                 const int j = dsp->idct_permutation[i];
87                 /* 16 <= qscale * quant_matrix[i] <= 7905
88                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
89                  *             19952 <=              x  <= 249205026
90                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
91                  *           3444240 >= (1 << 36) / (x) >= 275 */
92
93                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
94                                         (qscale * quant_matrix[j]));
95             }
96         } else if (dsp->fdct == ff_fdct_ifast) {
97             for (i = 0; i < 64; i++) {
98                 const int j = dsp->idct_permutation[i];
99                 /* 16 <= qscale * quant_matrix[i] <= 7905
100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
101                  *             19952 <=              x  <= 249205026
102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
103                  *           3444240 >= (1 << 36) / (x) >= 275 */
104
105                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
106                                         (ff_aanscales[i] * qscale * quant_matrix[j]));
107             }
108         } else {
109             for (i = 0; i < 64; i++) {
110                 const int j = dsp->idct_permutation[i];
111                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
112                  * Assume x = qscale * quant_matrix[i]
113                  * So             16 <=              x  <= 7905
114                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
115                  * so          32768 >= (1 << 19) / (x) >= 67 */
116                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
117                                         (qscale * quant_matrix[j]));
118                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
119                 //                    (qscale * quant_matrix[i]);
120                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
121                                        (qscale * quant_matrix[j]);
122
123                 if (qmat16[qscale][0][i] == 0 ||
124                     qmat16[qscale][0][i] == 128 * 256)
125                     qmat16[qscale][0][i] = 128 * 256 - 1;
126                 qmat16[qscale][1][i] =
127                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
128                                 qmat16[qscale][0][i]);
129             }
130         }
131
132         for (i = intra; i < 64; i++) {
133             int64_t max = 8191;
134             if (dsp->fdct == ff_fdct_ifast) {
135                 max = (8191LL * ff_aanscales[i]) >> 14;
136             }
137             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
138                 shift++;
139             }
140         }
141     }
142     if (shift) {
143         av_log(NULL, AV_LOG_INFO,
144                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
145                QMAT_SHIFT - shift);
146     }
147 }
148
149 static inline void update_qscale(MpegEncContext *s)
150 {
151     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
152                 (FF_LAMBDA_SHIFT + 7);
153     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
154
155     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
156                  FF_LAMBDA_SHIFT;
157 }
158
159 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
160 {
161     int i;
162
163     if (matrix) {
164         put_bits(pb, 1, 1);
165         for (i = 0; i < 64; i++) {
166             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
167         }
168     } else
169         put_bits(pb, 1, 0);
170 }
171
172 /**
173  * init s->current_picture.qscale_table from s->lambda_table
174  */
175 void ff_init_qscale_tab(MpegEncContext *s)
176 {
177     int8_t * const qscale_table = s->current_picture.f.qscale_table;
178     int i;
179
180     for (i = 0; i < s->mb_num; i++) {
181         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
182         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
183         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
184                                                   s->avctx->qmax);
185     }
186 }
187
188 static void copy_picture_attributes(MpegEncContext *s,
189                                     AVFrame *dst,
190                                     AVFrame *src)
191 {
192     int i;
193
194     dst->pict_type              = src->pict_type;
195     dst->quality                = src->quality;
196     dst->coded_picture_number   = src->coded_picture_number;
197     dst->display_picture_number = src->display_picture_number;
198     //dst->reference              = src->reference;
199     dst->pts                    = src->pts;
200     dst->interlaced_frame       = src->interlaced_frame;
201     dst->top_field_first        = src->top_field_first;
202
203     if (s->avctx->me_threshold) {
204         if (!src->motion_val[0])
205             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
206         if (!src->mb_type)
207             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
208         if (!src->ref_index[0])
209             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
210         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
211             av_log(s->avctx, AV_LOG_ERROR,
212                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
213                    src->motion_subsample_log2, dst->motion_subsample_log2);
214
215         memcpy(dst->mb_type, src->mb_type,
216                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
217
218         for (i = 0; i < 2; i++) {
219             int stride = ((16 * s->mb_width ) >>
220                           src->motion_subsample_log2) + 1;
221             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
222
223             if (src->motion_val[i] &&
224                 src->motion_val[i] != dst->motion_val[i]) {
225                 memcpy(dst->motion_val[i], src->motion_val[i],
226                        2 * stride * height * sizeof(int16_t));
227             }
228             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
229                 memcpy(dst->ref_index[i], src->ref_index[i],
230                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
231             }
232         }
233     }
234 }
235
236 static void update_duplicate_context_after_me(MpegEncContext *dst,
237                                               MpegEncContext *src)
238 {
239 #define COPY(a) dst->a= src->a
240     COPY(pict_type);
241     COPY(current_picture);
242     COPY(f_code);
243     COPY(b_code);
244     COPY(qscale);
245     COPY(lambda);
246     COPY(lambda2);
247     COPY(picture_in_gop_number);
248     COPY(gop_picture_number);
249     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
250     COPY(progressive_frame);    // FIXME don't set in encode_header
251     COPY(partitioned_frame);    // FIXME don't set in encode_header
252 #undef COPY
253 }
254
255 /**
256  * Set the given MpegEncContext to defaults for encoding.
257  * the changed fields will not depend upon the prior state of the MpegEncContext.
258  */
259 static void MPV_encode_defaults(MpegEncContext *s)
260 {
261     int i;
262     ff_MPV_common_defaults(s);
263
264     for (i = -16; i < 16; i++) {
265         default_fcode_tab[i + MAX_MV] = 1;
266     }
267     s->me.mv_penalty = default_mv_penalty;
268     s->fcode_tab     = default_fcode_tab;
269 }
270
271 /* init video encoder */
272 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
273 {
274     MpegEncContext *s = avctx->priv_data;
275     int i;
276     int chroma_h_shift, chroma_v_shift;
277
278     MPV_encode_defaults(s);
279
280     switch (avctx->codec_id) {
281     case CODEC_ID_MPEG2VIDEO:
282         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
283             avctx->pix_fmt != PIX_FMT_YUV422P) {
284             av_log(avctx, AV_LOG_ERROR,
285                    "only YUV420 and YUV422 are supported\n");
286             return -1;
287         }
288         break;
289     case CODEC_ID_LJPEG:
290         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
291             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
292             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
293             avctx->pix_fmt != PIX_FMT_BGR0     &&
294             avctx->pix_fmt != PIX_FMT_BGRA     &&
295             avctx->pix_fmt != PIX_FMT_BGR24    &&
296             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
297               avctx->pix_fmt != PIX_FMT_YUV422P &&
298               avctx->pix_fmt != PIX_FMT_YUV444P) ||
299              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
300             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
301             return -1;
302         }
303         break;
304     case CODEC_ID_MJPEG:
305     case CODEC_ID_AMV:
306         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
307             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
308             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
309               avctx->pix_fmt != PIX_FMT_YUV422P) ||
310              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
311             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
312             return -1;
313         }
314         break;
315     default:
316         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
317             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
318             return -1;
319         }
320     }
321
322     switch (avctx->pix_fmt) {
323     case PIX_FMT_YUVJ422P:
324     case PIX_FMT_YUV422P:
325         s->chroma_format = CHROMA_422;
326         break;
327     case PIX_FMT_YUVJ420P:
328     case PIX_FMT_YUV420P:
329     default:
330         s->chroma_format = CHROMA_420;
331         break;
332     }
333
334     s->bit_rate = avctx->bit_rate;
335     s->width    = avctx->width;
336     s->height   = avctx->height;
337     if (avctx->gop_size > 600 &&
338         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
339         av_log(avctx, AV_LOG_WARNING,
340                "keyframe interval too large!, reducing it from %d to %d\n",
341                avctx->gop_size, 600);
342         avctx->gop_size = 600;
343     }
344     s->gop_size     = avctx->gop_size;
345     s->avctx        = avctx;
346     s->flags        = avctx->flags;
347     s->flags2       = avctx->flags2;
348     s->max_b_frames = avctx->max_b_frames;
349     s->codec_id     = avctx->codec->id;
350 #if FF_API_MPV_GLOBAL_OPTS
351     if (avctx->luma_elim_threshold)
352         s->luma_elim_threshold   = avctx->luma_elim_threshold;
353     if (avctx->chroma_elim_threshold)
354         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
355 #endif
356     s->strict_std_compliance = avctx->strict_std_compliance;
357     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
358     s->mpeg_quant         = avctx->mpeg_quant;
359     s->rtp_mode           = !!avctx->rtp_payload_size;
360     s->intra_dc_precision = avctx->intra_dc_precision;
361     s->user_specified_pts = AV_NOPTS_VALUE;
362
363     if (s->gop_size <= 1) {
364         s->intra_only = 1;
365         s->gop_size   = 12;
366     } else {
367         s->intra_only = 0;
368     }
369
370     s->me_method = avctx->me_method;
371
372     /* Fixed QSCALE */
373     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
374
375 #if FF_API_MPV_GLOBAL_OPTS
376     if (s->flags & CODEC_FLAG_QP_RD)
377         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
378 #endif
379
380     s->adaptive_quant = (s->avctx->lumi_masking ||
381                          s->avctx->dark_masking ||
382                          s->avctx->temporal_cplx_masking ||
383                          s->avctx->spatial_cplx_masking  ||
384                          s->avctx->p_masking      ||
385                          s->avctx->border_masking ||
386                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
387                         !s->fixed_qscale;
388
389     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
390
391     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
392         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
393         return -1;
394     }
395
396     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
397         av_log(avctx, AV_LOG_INFO,
398                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
399     }
400
401     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
402         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
403         return -1;
404     }
405
406     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
407         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
408         return -1;
409     }
410
411     if (avctx->rc_max_rate &&
412         avctx->rc_max_rate == avctx->bit_rate &&
413         avctx->rc_max_rate != avctx->rc_min_rate) {
414         av_log(avctx, AV_LOG_INFO,
415                "impossible bitrate constraints, this will fail\n");
416     }
417
418     if (avctx->rc_buffer_size &&
419         avctx->bit_rate * (int64_t)avctx->time_base.num >
420             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
421         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
422         return -1;
423     }
424
425     if (!s->fixed_qscale &&
426         avctx->bit_rate * av_q2d(avctx->time_base) >
427             avctx->bit_rate_tolerance) {
428         av_log(avctx, AV_LOG_ERROR,
429                "bitrate tolerance too small for bitrate\n");
430         return -1;
431     }
432
433     if (s->avctx->rc_max_rate &&
434         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
435         (s->codec_id == CODEC_ID_MPEG1VIDEO ||
436          s->codec_id == CODEC_ID_MPEG2VIDEO) &&
437         90000LL * (avctx->rc_buffer_size - 1) >
438             s->avctx->rc_max_rate * 0xFFFFLL) {
439         av_log(avctx, AV_LOG_INFO,
440                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
441                "specified vbv buffer is too large for the given bitrate!\n");
442     }
443
444     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != CODEC_ID_MPEG4 &&
445         s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P &&
446         s->codec_id != CODEC_ID_FLV1) {
447         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
448         return -1;
449     }
450
451     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
452         av_log(avctx, AV_LOG_ERROR,
453                "OBMC is only supported with simple mb decision\n");
454         return -1;
455     }
456
457     if (s->quarter_sample && s->codec_id != CODEC_ID_MPEG4) {
458         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
459         return -1;
460     }
461
462     if (s->max_b_frames                    &&
463         s->codec_id != CODEC_ID_MPEG4      &&
464         s->codec_id != CODEC_ID_MPEG1VIDEO &&
465         s->codec_id != CODEC_ID_MPEG2VIDEO) {
466         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
467         return -1;
468     }
469
470     if ((s->codec_id == CODEC_ID_MPEG4 ||
471          s->codec_id == CODEC_ID_H263  ||
472          s->codec_id == CODEC_ID_H263P) &&
473         (avctx->sample_aspect_ratio.num > 255 ||
474          avctx->sample_aspect_ratio.den > 255)) {
475         av_log(avctx, AV_LOG_WARNING,
476                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
477                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
478         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
479                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
480     }
481
482     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
483         s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO) {
484         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
485         return -1;
486     }
487
488     // FIXME mpeg2 uses that too
489     if (s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4) {
490         av_log(avctx, AV_LOG_ERROR,
491                "mpeg2 style quantization not supported by codec\n");
492         return -1;
493     }
494
495 #if FF_API_MPV_GLOBAL_OPTS
496     if (s->flags & CODEC_FLAG_CBP_RD)
497         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
498 #endif
499
500     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
501         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
502         return -1;
503     }
504
505     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
506         s->avctx->mb_decision != FF_MB_DECISION_RD) {
507         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
508         return -1;
509     }
510
511     if (s->avctx->scenechange_threshold < 1000000000 &&
512         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
513         av_log(avctx, AV_LOG_ERROR,
514                "closed gop with scene change detection are not supported yet, "
515                "set threshold to 1000000000\n");
516         return -1;
517     }
518
519     if (s->flags & CODEC_FLAG_LOW_DELAY) {
520         if (s->codec_id != CODEC_ID_MPEG2VIDEO) {
521             av_log(avctx, AV_LOG_ERROR,
522                   "low delay forcing is only available for mpeg2\n");
523             return -1;
524         }
525         if (s->max_b_frames != 0) {
526             av_log(avctx, AV_LOG_ERROR,
527                    "b frames cannot be used with low delay\n");
528             return -1;
529         }
530     }
531
532     if (s->q_scale_type == 1) {
533         if (avctx->qmax > 12) {
534             av_log(avctx, AV_LOG_ERROR,
535                    "non linear quant only supports qmax <= 12 currently\n");
536             return -1;
537         }
538     }
539
540     if (s->avctx->thread_count > 1         &&
541         s->codec_id != CODEC_ID_MPEG4      &&
542         s->codec_id != CODEC_ID_MPEG1VIDEO &&
543         s->codec_id != CODEC_ID_MPEG2VIDEO &&
544         (s->codec_id != CODEC_ID_H263P)) {
545         av_log(avctx, AV_LOG_ERROR,
546                "multi threaded encoding not supported by codec\n");
547         return -1;
548     }
549
550     if (s->avctx->thread_count < 1) {
551         av_log(avctx, AV_LOG_ERROR,
552                "automatic thread number detection not supported by codec, "
553                "patch welcome\n");
554         return -1;
555     }
556
557     if (s->avctx->thread_count > 1)
558         s->rtp_mode = 1;
559
560     if (!avctx->time_base.den || !avctx->time_base.num) {
561         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
562         return -1;
563     }
564
565     i = (INT_MAX / 2 + 128) >> 8;
566     if (avctx->me_threshold >= i) {
567         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
568                i - 1);
569         return -1;
570     }
571     if (avctx->mb_threshold >= i) {
572         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
573                i - 1);
574         return -1;
575     }
576
577     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
578         av_log(avctx, AV_LOG_INFO,
579                "notice: b_frame_strategy only affects the first pass\n");
580         avctx->b_frame_strategy = 0;
581     }
582
583     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
584     if (i > 1) {
585         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
586         avctx->time_base.den /= i;
587         avctx->time_base.num /= i;
588         //return -1;
589     }
590
591     if (s->mpeg_quant || s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || s->codec_id == CODEC_ID_MJPEG || s->codec_id==CODEC_ID_AMV) {
592         // (a + x * 3 / 8) / x
593         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
594         s->inter_quant_bias = 0;
595     } else {
596         s->intra_quant_bias = 0;
597         // (a - x / 4) / x
598         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
599     }
600
601     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
602         s->intra_quant_bias = avctx->intra_quant_bias;
603     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
604         s->inter_quant_bias = avctx->inter_quant_bias;
605
606     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
607
608     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
609                                   &chroma_v_shift);
610
611     if (avctx->codec_id == CODEC_ID_MPEG4 &&
612         s->avctx->time_base.den > (1 << 16) - 1) {
613         av_log(avctx, AV_LOG_ERROR,
614                "timebase %d/%d not supported by MPEG 4 standard, "
615                "the maximum admitted value for the timebase denominator "
616                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
617                (1 << 16) - 1);
618         return -1;
619     }
620     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
621
622 #if FF_API_MPV_GLOBAL_OPTS
623     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
624         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
625     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
626         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
627     if (avctx->quantizer_noise_shaping)
628         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
629 #endif
630
631     switch (avctx->codec->id) {
632     case CODEC_ID_MPEG1VIDEO:
633         s->out_format = FMT_MPEG1;
634         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
635         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
636         break;
637     case CODEC_ID_MPEG2VIDEO:
638         s->out_format = FMT_MPEG1;
639         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
640         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
641         s->rtp_mode   = 1;
642         break;
643     case CODEC_ID_LJPEG:
644     case CODEC_ID_MJPEG:
645     case CODEC_ID_AMV:
646         s->out_format = FMT_MJPEG;
647         s->intra_only = 1; /* force intra only for jpeg */
648         if (avctx->codec->id == CODEC_ID_LJPEG &&
649             (avctx->pix_fmt == PIX_FMT_BGR0
650              || s->avctx->pix_fmt == PIX_FMT_BGRA
651              || s->avctx->pix_fmt == PIX_FMT_BGR24)) {
652             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
653             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
654             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
655         } else {
656             s->mjpeg_vsample[0] = 2;
657             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
658             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
659             s->mjpeg_hsample[0] = 2;
660             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
661             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
662         }
663         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
664             ff_mjpeg_encode_init(s) < 0)
665             return -1;
666         avctx->delay = 0;
667         s->low_delay = 1;
668         break;
669     case CODEC_ID_H261:
670         if (!CONFIG_H261_ENCODER)
671             return -1;
672         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
673             av_log(avctx, AV_LOG_ERROR,
674                    "The specified picture size of %dx%d is not valid for the "
675                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
676                     s->width, s->height);
677             return -1;
678         }
679         s->out_format = FMT_H261;
680         avctx->delay  = 0;
681         s->low_delay  = 1;
682         break;
683     case CODEC_ID_H263:
684         if (!CONFIG_H263_ENCODER)
685             return -1;
686         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
687                              s->width, s->height) == 8) {
688             av_log(avctx, AV_LOG_ERROR,
689                    "The specified picture size of %dx%d is not valid for "
690                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
691                    "352x288, 704x576, and 1408x1152. "
692                    "Try H.263+.\n", s->width, s->height);
693             return -1;
694         }
695         s->out_format = FMT_H263;
696         avctx->delay  = 0;
697         s->low_delay  = 1;
698         break;
699     case CODEC_ID_H263P:
700         s->out_format = FMT_H263;
701         s->h263_plus  = 1;
702         /* Fx */
703         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
704         s->modified_quant  = s->h263_aic;
705         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
706         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
707
708         /* /Fx */
709         /* These are just to be sure */
710         avctx->delay = 0;
711         s->low_delay = 1;
712         break;
713     case CODEC_ID_FLV1:
714         s->out_format      = FMT_H263;
715         s->h263_flv        = 2; /* format = 1; 11-bit codes */
716         s->unrestricted_mv = 1;
717         s->rtp_mode  = 0; /* don't allow GOB */
718         avctx->delay = 0;
719         s->low_delay = 1;
720         break;
721     case CODEC_ID_RV10:
722         s->out_format = FMT_H263;
723         avctx->delay  = 0;
724         s->low_delay  = 1;
725         break;
726     case CODEC_ID_RV20:
727         s->out_format      = FMT_H263;
728         avctx->delay       = 0;
729         s->low_delay       = 1;
730         s->modified_quant  = 1;
731         s->h263_aic        = 1;
732         s->h263_plus       = 1;
733         s->loop_filter     = 1;
734         s->unrestricted_mv = 0;
735         break;
736     case CODEC_ID_MPEG4:
737         s->out_format      = FMT_H263;
738         s->h263_pred       = 1;
739         s->unrestricted_mv = 1;
740         s->low_delay       = s->max_b_frames ? 0 : 1;
741         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
742         break;
743     case CODEC_ID_MSMPEG4V2:
744         s->out_format      = FMT_H263;
745         s->h263_pred       = 1;
746         s->unrestricted_mv = 1;
747         s->msmpeg4_version = 2;
748         avctx->delay       = 0;
749         s->low_delay       = 1;
750         break;
751     case CODEC_ID_MSMPEG4V3:
752         s->out_format        = FMT_H263;
753         s->h263_pred         = 1;
754         s->unrestricted_mv   = 1;
755         s->msmpeg4_version   = 3;
756         s->flipflop_rounding = 1;
757         avctx->delay         = 0;
758         s->low_delay         = 1;
759         break;
760     case CODEC_ID_WMV1:
761         s->out_format        = FMT_H263;
762         s->h263_pred         = 1;
763         s->unrestricted_mv   = 1;
764         s->msmpeg4_version   = 4;
765         s->flipflop_rounding = 1;
766         avctx->delay         = 0;
767         s->low_delay         = 1;
768         break;
769     case CODEC_ID_WMV2:
770         s->out_format        = FMT_H263;
771         s->h263_pred         = 1;
772         s->unrestricted_mv   = 1;
773         s->msmpeg4_version   = 5;
774         s->flipflop_rounding = 1;
775         avctx->delay         = 0;
776         s->low_delay         = 1;
777         break;
778     default:
779         return -1;
780     }
781
782     avctx->has_b_frames = !s->low_delay;
783
784     s->encoding = 1;
785
786     s->progressive_frame    =
787     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
788                                                 CODEC_FLAG_INTERLACED_ME) ||
789                                 s->alternate_scan);
790
791     /* init */
792     if (ff_MPV_common_init(s) < 0)
793         return -1;
794
795     if (!s->dct_quantize)
796         s->dct_quantize = ff_dct_quantize_c;
797     if (!s->denoise_dct)
798         s->denoise_dct  = denoise_dct_c;
799     s->fast_dct_quantize = s->dct_quantize;
800     if (avctx->trellis)
801         s->dct_quantize  = dct_quantize_trellis_c;
802
803     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
804         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
805
806     s->quant_precision = 5;
807
808     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
809     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
810
811     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
812         ff_h261_encode_init(s);
813     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
814         ff_h263_encode_init(s);
815     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
816         ff_msmpeg4_encode_init(s);
817     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
818         && s->out_format == FMT_MPEG1)
819         ff_mpeg1_encode_init(s);
820
821     /* init q matrix */
822     for (i = 0; i < 64; i++) {
823         int j = s->dsp.idct_permutation[i];
824         if (CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4 &&
825             s->mpeg_quant) {
826             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
827             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
828         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
829             s->intra_matrix[j] =
830             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
831         } else {
832             /* mpeg1/2 */
833             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
834             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
835         }
836         if (s->avctx->intra_matrix)
837             s->intra_matrix[j] = s->avctx->intra_matrix[i];
838         if (s->avctx->inter_matrix)
839             s->inter_matrix[j] = s->avctx->inter_matrix[i];
840     }
841
842     /* precompute matrix */
843     /* for mjpeg, we do include qscale in the matrix */
844     if (s->out_format != FMT_MJPEG) {
845         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
846                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
847                           31, 1);
848         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
849                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
850                           31, 0);
851     }
852
853     if (ff_rate_control_init(s) < 0)
854         return -1;
855
856     return 0;
857 }
858
859 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
860 {
861     MpegEncContext *s = avctx->priv_data;
862
863     ff_rate_control_uninit(s);
864
865     ff_MPV_common_end(s);
866     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
867         s->out_format == FMT_MJPEG)
868         ff_mjpeg_encode_close(s);
869
870     av_freep(&avctx->extradata);
871
872     return 0;
873 }
874
875 static int get_sae(uint8_t *src, int ref, int stride)
876 {
877     int x,y;
878     int acc = 0;
879
880     for (y = 0; y < 16; y++) {
881         for (x = 0; x < 16; x++) {
882             acc += FFABS(src[x + y * stride] - ref);
883         }
884     }
885
886     return acc;
887 }
888
889 static int get_intra_count(MpegEncContext *s, uint8_t *src,
890                            uint8_t *ref, int stride)
891 {
892     int x, y, w, h;
893     int acc = 0;
894
895     w = s->width  & ~15;
896     h = s->height & ~15;
897
898     for (y = 0; y < h; y += 16) {
899         for (x = 0; x < w; x += 16) {
900             int offset = x + y * stride;
901             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
902                                      16);
903             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
904             int sae  = get_sae(src + offset, mean, stride);
905
906             acc += sae + 500 < sad;
907         }
908     }
909     return acc;
910 }
911
912
913 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
914 {
915     AVFrame *pic = NULL;
916     int64_t pts;
917     int i;
918     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
919                                                  (s->low_delay ? 0 : 1);
920     int direct = 1;
921
922     if (pic_arg) {
923         pts = pic_arg->pts;
924         pic_arg->display_picture_number = s->input_picture_number++;
925
926         if (pts != AV_NOPTS_VALUE) {
927             if (s->user_specified_pts != AV_NOPTS_VALUE) {
928                 int64_t time = pts;
929                 int64_t last = s->user_specified_pts;
930
931                 if (time <= last) {
932                     av_log(s->avctx, AV_LOG_ERROR,
933                            "Error, Invalid timestamp=%"PRId64", "
934                            "last=%"PRId64"\n", pts, s->user_specified_pts);
935                     return -1;
936                 }
937
938                 if (!s->low_delay && pic_arg->display_picture_number == 1)
939                     s->dts_delta = time - last;
940             }
941             s->user_specified_pts = pts;
942         } else {
943             if (s->user_specified_pts != AV_NOPTS_VALUE) {
944                 s->user_specified_pts =
945                 pts = s->user_specified_pts + 1;
946                 av_log(s->avctx, AV_LOG_INFO,
947                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
948                        pts);
949             } else {
950                 pts = pic_arg->display_picture_number;
951             }
952         }
953     }
954
955   if (pic_arg) {
956     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
957         direct = 0;
958     if (pic_arg->linesize[0] != s->linesize)
959         direct = 0;
960     if (pic_arg->linesize[1] != s->uvlinesize)
961         direct = 0;
962     if (pic_arg->linesize[2] != s->uvlinesize)
963         direct = 0;
964
965     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
966     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
967
968     if (direct) {
969         i = ff_find_unused_picture(s, 1);
970         if (i < 0)
971             return i;
972
973         pic = &s->picture[i].f;
974         pic->reference = 3;
975
976         for (i = 0; i < 4; i++) {
977             pic->data[i]     = pic_arg->data[i];
978             pic->linesize[i] = pic_arg->linesize[i];
979         }
980         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
981             return -1;
982         }
983     } else {
984         i = ff_find_unused_picture(s, 0);
985         if (i < 0)
986             return i;
987
988         pic = &s->picture[i].f;
989         pic->reference = 3;
990
991         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
992             return -1;
993         }
994
995         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
996             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
997             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
998             // empty
999         } else {
1000             int h_chroma_shift, v_chroma_shift;
1001             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
1002                                           &v_chroma_shift);
1003
1004             for (i = 0; i < 3; i++) {
1005                 int src_stride = pic_arg->linesize[i];
1006                 int dst_stride = i ? s->uvlinesize : s->linesize;
1007                 int h_shift = i ? h_chroma_shift : 0;
1008                 int v_shift = i ? v_chroma_shift : 0;
1009                 int w = s->width  >> h_shift;
1010                 int h = s->height >> v_shift;
1011                 uint8_t *src = pic_arg->data[i];
1012                 uint8_t *dst = pic->data[i];
1013
1014                 if(s->codec_id == CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)){
1015                     h= ((s->height+15)/16*16)>>v_shift;
1016                 }
1017
1018                 if (!s->avctx->rc_buffer_size)
1019                     dst += INPLACE_OFFSET;
1020
1021                 if (src_stride == dst_stride)
1022                     memcpy(dst, src, src_stride * h);
1023                 else {
1024                     while (h--) {
1025                         memcpy(dst, src, w);
1026                         dst += dst_stride;
1027                         src += src_stride;
1028                     }
1029                 }
1030             }
1031         }
1032     }
1033     copy_picture_attributes(s, pic, pic_arg);
1034     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1035   }
1036
1037     /* shift buffer entries */
1038     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1039         s->input_picture[i - 1] = s->input_picture[i];
1040
1041     s->input_picture[encoding_delay] = (Picture*) pic;
1042
1043     return 0;
1044 }
1045
1046 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1047 {
1048     int x, y, plane;
1049     int score = 0;
1050     int64_t score64 = 0;
1051
1052     for (plane = 0; plane < 3; plane++) {
1053         const int stride = p->f.linesize[plane];
1054         const int bw = plane ? 1 : 2;
1055         for (y = 0; y < s->mb_height * bw; y++) {
1056             for (x = 0; x < s->mb_width * bw; x++) {
1057                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1058                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1059                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1060                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1061
1062                 switch (s->avctx->frame_skip_exp) {
1063                 case 0: score    =  FFMAX(score, v);          break;
1064                 case 1: score   += FFABS(v);                  break;
1065                 case 2: score   += v * v;                     break;
1066                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1067                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1068                 }
1069             }
1070         }
1071     }
1072
1073     if (score)
1074         score64 = score;
1075
1076     if (score64 < s->avctx->frame_skip_threshold)
1077         return 1;
1078     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1079         return 1;
1080     return 0;
1081 }
1082
1083 static int estimate_best_b_count(MpegEncContext *s)
1084 {
1085     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1086     AVCodecContext *c = avcodec_alloc_context3(NULL);
1087     AVFrame input[FF_MAX_B_FRAMES + 2];
1088     const int scale = s->avctx->brd_scale;
1089     int i, j, out_size, p_lambda, b_lambda, lambda2;
1090     int outbuf_size  = s->width * s->height; // FIXME
1091     uint8_t *outbuf  = av_malloc(outbuf_size);
1092     int64_t best_rd  = INT64_MAX;
1093     int best_b_count = -1;
1094
1095     assert(scale >= 0 && scale <= 3);
1096
1097     //emms_c();
1098     //s->next_picture_ptr->quality;
1099     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1100     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1101     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1102     if (!b_lambda) // FIXME we should do this somewhere else
1103         b_lambda = p_lambda;
1104     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1105                FF_LAMBDA_SHIFT;
1106
1107     c->width        = s->width  >> scale;
1108     c->height       = s->height >> scale;
1109     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1110                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1111     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1112     c->mb_decision  = s->avctx->mb_decision;
1113     c->me_cmp       = s->avctx->me_cmp;
1114     c->mb_cmp       = s->avctx->mb_cmp;
1115     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1116     c->pix_fmt      = PIX_FMT_YUV420P;
1117     c->time_base    = s->avctx->time_base;
1118     c->max_b_frames = s->max_b_frames;
1119
1120     if (avcodec_open2(c, codec, NULL) < 0)
1121         return -1;
1122
1123     for (i = 0; i < s->max_b_frames + 2; i++) {
1124         int ysize = c->width * c->height;
1125         int csize = (c->width / 2) * (c->height / 2);
1126         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1127                                                 s->next_picture_ptr;
1128
1129         avcodec_get_frame_defaults(&input[i]);
1130         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1131         input[i].data[1]     = input[i].data[0] + ysize;
1132         input[i].data[2]     = input[i].data[1] + csize;
1133         input[i].linesize[0] = c->width;
1134         input[i].linesize[1] =
1135         input[i].linesize[2] = c->width / 2;
1136
1137         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1138             pre_input = *pre_input_ptr;
1139
1140             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1141                 pre_input.f.data[0] += INPLACE_OFFSET;
1142                 pre_input.f.data[1] += INPLACE_OFFSET;
1143                 pre_input.f.data[2] += INPLACE_OFFSET;
1144             }
1145
1146             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1147                                  pre_input.f.data[0], pre_input.f.linesize[0],
1148                                  c->width,      c->height);
1149             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1150                                  pre_input.f.data[1], pre_input.f.linesize[1],
1151                                  c->width >> 1, c->height >> 1);
1152             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1153                                  pre_input.f.data[2], pre_input.f.linesize[2],
1154                                  c->width >> 1, c->height >> 1);
1155         }
1156     }
1157
1158     for (j = 0; j < s->max_b_frames + 1; j++) {
1159         int64_t rd = 0;
1160
1161         if (!s->input_picture[j])
1162             break;
1163
1164         c->error[0] = c->error[1] = c->error[2] = 0;
1165
1166         input[0].pict_type = AV_PICTURE_TYPE_I;
1167         input[0].quality   = 1 * FF_QP2LAMBDA;
1168         out_size           = avcodec_encode_video(c, outbuf,
1169                                                   outbuf_size, &input[0]);
1170         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1171
1172         for (i = 0; i < s->max_b_frames + 1; i++) {
1173             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1174
1175             input[i + 1].pict_type = is_p ?
1176                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1177             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1178             out_size = avcodec_encode_video(c, outbuf, outbuf_size,
1179                                             &input[i + 1]);
1180             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1181         }
1182
1183         /* get the delayed frames */
1184         while (out_size) {
1185             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
1186             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1187         }
1188
1189         rd += c->error[0] + c->error[1] + c->error[2];
1190
1191         if (rd < best_rd) {
1192             best_rd = rd;
1193             best_b_count = j;
1194         }
1195     }
1196
1197     av_freep(&outbuf);
1198     avcodec_close(c);
1199     av_freep(&c);
1200
1201     for (i = 0; i < s->max_b_frames + 2; i++) {
1202         av_freep(&input[i].data[0]);
1203     }
1204
1205     return best_b_count;
1206 }
1207
1208 static int select_input_picture(MpegEncContext *s)
1209 {
1210     int i;
1211
1212     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1213         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1214     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1215
1216     /* set next picture type & ordering */
1217     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1218         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1219             s->next_picture_ptr == NULL || s->intra_only) {
1220             s->reordered_input_picture[0] = s->input_picture[0];
1221             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1222             s->reordered_input_picture[0]->f.coded_picture_number =
1223                 s->coded_picture_number++;
1224         } else {
1225             int b_frames;
1226
1227             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1228                 if (s->picture_in_gop_number < s->gop_size &&
1229                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1230                     // FIXME check that te gop check above is +-1 correct
1231                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1232                     //       s->input_picture[0]->f.data[0],
1233                     //       s->input_picture[0]->pts);
1234
1235                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1236                         for (i = 0; i < 4; i++)
1237                             s->input_picture[0]->f.data[i] = NULL;
1238                         s->input_picture[0]->f.type = 0;
1239                     } else {
1240                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1241                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1242
1243                         s->avctx->release_buffer(s->avctx,
1244                                                  &s->input_picture[0]->f);
1245                     }
1246
1247                     emms_c();
1248                     ff_vbv_update(s, 0);
1249
1250                     goto no_output_pic;
1251                 }
1252             }
1253
1254             if (s->flags & CODEC_FLAG_PASS2) {
1255                 for (i = 0; i < s->max_b_frames + 1; i++) {
1256                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1257
1258                     if (pict_num >= s->rc_context.num_entries)
1259                         break;
1260                     if (!s->input_picture[i]) {
1261                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1262                         break;
1263                     }
1264
1265                     s->input_picture[i]->f.pict_type =
1266                         s->rc_context.entry[pict_num].new_pict_type;
1267                 }
1268             }
1269
1270             if (s->avctx->b_frame_strategy == 0) {
1271                 b_frames = s->max_b_frames;
1272                 while (b_frames && !s->input_picture[b_frames])
1273                     b_frames--;
1274             } else if (s->avctx->b_frame_strategy == 1) {
1275                 for (i = 1; i < s->max_b_frames + 1; i++) {
1276                     if (s->input_picture[i] &&
1277                         s->input_picture[i]->b_frame_score == 0) {
1278                         s->input_picture[i]->b_frame_score =
1279                             get_intra_count(s,
1280                                             s->input_picture[i    ]->f.data[0],
1281                                             s->input_picture[i - 1]->f.data[0],
1282                                             s->linesize) + 1;
1283                     }
1284                 }
1285                 for (i = 0; i < s->max_b_frames + 1; i++) {
1286                     if (s->input_picture[i] == NULL ||
1287                         s->input_picture[i]->b_frame_score - 1 >
1288                             s->mb_num / s->avctx->b_sensitivity)
1289                         break;
1290                 }
1291
1292                 b_frames = FFMAX(0, i - 1);
1293
1294                 /* reset scores */
1295                 for (i = 0; i < b_frames + 1; i++) {
1296                     s->input_picture[i]->b_frame_score = 0;
1297                 }
1298             } else if (s->avctx->b_frame_strategy == 2) {
1299                 b_frames = estimate_best_b_count(s);
1300             } else {
1301                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1302                 b_frames = 0;
1303             }
1304
1305             emms_c();
1306             //static int b_count = 0;
1307             //b_count += b_frames;
1308             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1309
1310             for (i = b_frames - 1; i >= 0; i--) {
1311                 int type = s->input_picture[i]->f.pict_type;
1312                 if (type && type != AV_PICTURE_TYPE_B)
1313                     b_frames = i;
1314             }
1315             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1316                 b_frames == s->max_b_frames) {
1317                 av_log(s->avctx, AV_LOG_ERROR,
1318                        "warning, too many b frames in a row\n");
1319             }
1320
1321             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1322                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1323                     s->gop_size > s->picture_in_gop_number) {
1324                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1325                 } else {
1326                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1327                         b_frames = 0;
1328                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1329                 }
1330             }
1331
1332             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1333                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1334                 b_frames--;
1335
1336             s->reordered_input_picture[0] = s->input_picture[b_frames];
1337             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1338                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1339             s->reordered_input_picture[0]->f.coded_picture_number =
1340                 s->coded_picture_number++;
1341             for (i = 0; i < b_frames; i++) {
1342                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1343                 s->reordered_input_picture[i + 1]->f.pict_type =
1344                     AV_PICTURE_TYPE_B;
1345                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1346                     s->coded_picture_number++;
1347             }
1348         }
1349     }
1350 no_output_pic:
1351     if (s->reordered_input_picture[0]) {
1352         s->reordered_input_picture[0]->f.reference =
1353            s->reordered_input_picture[0]->f.pict_type !=
1354                AV_PICTURE_TYPE_B ? 3 : 0;
1355
1356         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1357
1358         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1359             s->avctx->rc_buffer_size) {
1360             // input is a shared pix, so we can't modifiy it -> alloc a new
1361             // one & ensure that the shared one is reuseable
1362
1363             Picture *pic;
1364             int i = ff_find_unused_picture(s, 0);
1365             if (i < 0)
1366                 return i;
1367             pic = &s->picture[i];
1368
1369             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1370             if (ff_alloc_picture(s, pic, 0) < 0) {
1371                 return -1;
1372             }
1373
1374             /* mark us unused / free shared pic */
1375             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1376                 s->avctx->release_buffer(s->avctx,
1377                                          &s->reordered_input_picture[0]->f);
1378             for (i = 0; i < 4; i++)
1379                 s->reordered_input_picture[0]->f.data[i] = NULL;
1380             s->reordered_input_picture[0]->f.type = 0;
1381
1382             copy_picture_attributes(s, &pic->f,
1383                                     &s->reordered_input_picture[0]->f);
1384
1385             s->current_picture_ptr = pic;
1386         } else {
1387             // input is not a shared pix -> reuse buffer for current_pix
1388
1389             assert(s->reordered_input_picture[0]->f.type ==
1390                        FF_BUFFER_TYPE_USER ||
1391                    s->reordered_input_picture[0]->f.type ==
1392                        FF_BUFFER_TYPE_INTERNAL);
1393
1394             s->current_picture_ptr = s->reordered_input_picture[0];
1395             for (i = 0; i < 4; i++) {
1396                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1397             }
1398         }
1399         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1400
1401         s->picture_number = s->new_picture.f.display_picture_number;
1402         //printf("dpn:%d\n", s->picture_number);
1403     } else {
1404         memset(&s->new_picture, 0, sizeof(Picture));
1405     }
1406     return 0;
1407 }
1408
1409 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1410                           AVFrame *pic_arg, int *got_packet)
1411 {
1412     MpegEncContext *s = avctx->priv_data;
1413     int i, stuffing_count, ret;
1414     int context_count = s->slice_context_count;
1415
1416     s->picture_in_gop_number++;
1417
1418     if (load_input_picture(s, pic_arg) < 0)
1419         return -1;
1420
1421     if (select_input_picture(s) < 0) {
1422         return -1;
1423     }
1424
1425     /* output? */
1426     if (s->new_picture.f.data[0]) {
1427         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1428             return ret;
1429         if (s->mb_info) {
1430             s->mb_info_ptr = av_packet_new_side_data(pkt,
1431                                  AV_PKT_DATA_H263_MB_INFO,
1432                                  s->mb_width*s->mb_height*12);
1433             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1434         }
1435
1436         for (i = 0; i < context_count; i++) {
1437             int start_y = s->thread_context[i]->start_mb_y;
1438             int   end_y = s->thread_context[i]->  end_mb_y;
1439             int h       = s->mb_height;
1440             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1441             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1442
1443             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1444         }
1445
1446         s->pict_type = s->new_picture.f.pict_type;
1447         //emms_c();
1448         //printf("qs:%f %f %d\n", s->new_picture.quality,
1449         //       s->current_picture.quality, s->qscale);
1450         ff_MPV_frame_start(s, avctx);
1451 vbv_retry:
1452         if (encode_picture(s, s->picture_number) < 0)
1453             return -1;
1454
1455         avctx->header_bits = s->header_bits;
1456         avctx->mv_bits     = s->mv_bits;
1457         avctx->misc_bits   = s->misc_bits;
1458         avctx->i_tex_bits  = s->i_tex_bits;
1459         avctx->p_tex_bits  = s->p_tex_bits;
1460         avctx->i_count     = s->i_count;
1461         // FIXME f/b_count in avctx
1462         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1463         avctx->skip_count  = s->skip_count;
1464
1465         ff_MPV_frame_end(s);
1466
1467         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1468             ff_mjpeg_encode_picture_trailer(s);
1469
1470         if (avctx->rc_buffer_size) {
1471             RateControlContext *rcc = &s->rc_context;
1472             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1473
1474             if (put_bits_count(&s->pb) > max_size &&
1475                 s->lambda < s->avctx->lmax) {
1476                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1477                                        (s->qscale + 1) / s->qscale);
1478                 if (s->adaptive_quant) {
1479                     int i;
1480                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1481                         s->lambda_table[i] =
1482                             FFMAX(s->lambda_table[i] + 1,
1483                                   s->lambda_table[i] * (s->qscale + 1) /
1484                                   s->qscale);
1485                 }
1486                 s->mb_skipped = 0;        // done in MPV_frame_start()
1487                 // done in encode_picture() so we must undo it
1488                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1489                     if (s->flipflop_rounding          ||
1490                         s->codec_id == CODEC_ID_H263P ||
1491                         s->codec_id == CODEC_ID_MPEG4)
1492                         s->no_rounding ^= 1;
1493                 }
1494                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1495                     s->time_base       = s->last_time_base;
1496                     s->last_non_b_time = s->time - s->pp_time;
1497                 }
1498                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1499                 for (i = 0; i < context_count; i++) {
1500                     PutBitContext *pb = &s->thread_context[i]->pb;
1501                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1502                 }
1503                 goto vbv_retry;
1504             }
1505
1506             assert(s->avctx->rc_max_rate);
1507         }
1508
1509         if (s->flags & CODEC_FLAG_PASS1)
1510             ff_write_pass1_stats(s);
1511
1512         for (i = 0; i < 4; i++) {
1513             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1514             avctx->error[i] += s->current_picture_ptr->f.error[i];
1515         }
1516
1517         if (s->flags & CODEC_FLAG_PASS1)
1518             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1519                    avctx->i_tex_bits + avctx->p_tex_bits ==
1520                        put_bits_count(&s->pb));
1521         flush_put_bits(&s->pb);
1522         s->frame_bits  = put_bits_count(&s->pb);
1523
1524         stuffing_count = ff_vbv_update(s, s->frame_bits);
1525         if (stuffing_count) {
1526             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1527                     stuffing_count + 50) {
1528                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1529                 return -1;
1530             }
1531
1532             switch (s->codec_id) {
1533             case CODEC_ID_MPEG1VIDEO:
1534             case CODEC_ID_MPEG2VIDEO:
1535                 while (stuffing_count--) {
1536                     put_bits(&s->pb, 8, 0);
1537                 }
1538             break;
1539             case CODEC_ID_MPEG4:
1540                 put_bits(&s->pb, 16, 0);
1541                 put_bits(&s->pb, 16, 0x1C3);
1542                 stuffing_count -= 4;
1543                 while (stuffing_count--) {
1544                     put_bits(&s->pb, 8, 0xFF);
1545                 }
1546             break;
1547             default:
1548                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1549             }
1550             flush_put_bits(&s->pb);
1551             s->frame_bits  = put_bits_count(&s->pb);
1552         }
1553
1554         /* update mpeg1/2 vbv_delay for CBR */
1555         if (s->avctx->rc_max_rate                          &&
1556             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1557             s->out_format == FMT_MPEG1                     &&
1558             90000LL * (avctx->rc_buffer_size - 1) <=
1559                 s->avctx->rc_max_rate * 0xFFFFLL) {
1560             int vbv_delay, min_delay;
1561             double inbits  = s->avctx->rc_max_rate *
1562                              av_q2d(s->avctx->time_base);
1563             int    minbits = s->frame_bits - 8 *
1564                              (s->vbv_delay_ptr - s->pb.buf - 1);
1565             double bits    = s->rc_context.buffer_index + minbits - inbits;
1566
1567             if (bits < 0)
1568                 av_log(s->avctx, AV_LOG_ERROR,
1569                        "Internal error, negative bits\n");
1570
1571             assert(s->repeat_first_field == 0);
1572
1573             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1574             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1575                         s->avctx->rc_max_rate;
1576
1577             vbv_delay = FFMAX(vbv_delay, min_delay);
1578
1579             assert(vbv_delay < 0xFFFF);
1580
1581             s->vbv_delay_ptr[0] &= 0xF8;
1582             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1583             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1584             s->vbv_delay_ptr[2] &= 0x07;
1585             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1586             avctx->vbv_delay     = vbv_delay * 300;
1587         }
1588         s->total_bits     += s->frame_bits;
1589         avctx->frame_bits  = s->frame_bits;
1590
1591         pkt->pts = s->current_picture.f.pts;
1592         if (!s->low_delay) {
1593             if (!s->current_picture.f.coded_picture_number)
1594                 pkt->dts = pkt->pts - s->dts_delta;
1595             else
1596                 pkt->dts = s->reordered_pts;
1597             s->reordered_pts = s->input_picture[0]->f.pts;
1598         } else
1599             pkt->dts = pkt->pts;
1600         if (s->current_picture.f.key_frame)
1601             pkt->flags |= AV_PKT_FLAG_KEY;
1602         if (s->mb_info)
1603             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1604     } else {
1605         assert((put_bits_ptr(&s->pb) == s->pb.buf));
1606         s->frame_bits = 0;
1607     }
1608     assert((s->frame_bits & 7) == 0);
1609
1610     pkt->size = s->frame_bits / 8;
1611     *got_packet = !!pkt->size;
1612     return 0;
1613 }
1614
1615 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1616                                                 int n, int threshold)
1617 {
1618     static const char tab[64] = {
1619         3, 2, 2, 1, 1, 1, 1, 1,
1620         1, 1, 1, 1, 1, 1, 1, 1,
1621         1, 1, 1, 1, 1, 1, 1, 1,
1622         0, 0, 0, 0, 0, 0, 0, 0,
1623         0, 0, 0, 0, 0, 0, 0, 0,
1624         0, 0, 0, 0, 0, 0, 0, 0,
1625         0, 0, 0, 0, 0, 0, 0, 0,
1626         0, 0, 0, 0, 0, 0, 0, 0
1627     };
1628     int score = 0;
1629     int run = 0;
1630     int i;
1631     DCTELEM *block = s->block[n];
1632     const int last_index = s->block_last_index[n];
1633     int skip_dc;
1634
1635     if (threshold < 0) {
1636         skip_dc = 0;
1637         threshold = -threshold;
1638     } else
1639         skip_dc = 1;
1640
1641     /* Are all we could set to zero already zero? */
1642     if (last_index <= skip_dc - 1)
1643         return;
1644
1645     for (i = 0; i <= last_index; i++) {
1646         const int j = s->intra_scantable.permutated[i];
1647         const int level = FFABS(block[j]);
1648         if (level == 1) {
1649             if (skip_dc && i == 0)
1650                 continue;
1651             score += tab[run];
1652             run = 0;
1653         } else if (level > 1) {
1654             return;
1655         } else {
1656             run++;
1657         }
1658     }
1659     if (score >= threshold)
1660         return;
1661     for (i = skip_dc; i <= last_index; i++) {
1662         const int j = s->intra_scantable.permutated[i];
1663         block[j] = 0;
1664     }
1665     if (block[0])
1666         s->block_last_index[n] = 0;
1667     else
1668         s->block_last_index[n] = -1;
1669 }
1670
1671 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1672                                int last_index)
1673 {
1674     int i;
1675     const int maxlevel = s->max_qcoeff;
1676     const int minlevel = s->min_qcoeff;
1677     int overflow = 0;
1678
1679     if (s->mb_intra) {
1680         i = 1; // skip clipping of intra dc
1681     } else
1682         i = 0;
1683
1684     for (; i <= last_index; i++) {
1685         const int j = s->intra_scantable.permutated[i];
1686         int level = block[j];
1687
1688         if (level > maxlevel) {
1689             level = maxlevel;
1690             overflow++;
1691         } else if (level < minlevel) {
1692             level = minlevel;
1693             overflow++;
1694         }
1695
1696         block[j] = level;
1697     }
1698
1699     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1700         av_log(s->avctx, AV_LOG_INFO,
1701                "warning, clipping %d dct coefficients to %d..%d\n",
1702                overflow, minlevel, maxlevel);
1703 }
1704
1705 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1706 {
1707     int x, y;
1708     // FIXME optimize
1709     for (y = 0; y < 8; y++) {
1710         for (x = 0; x < 8; x++) {
1711             int x2, y2;
1712             int sum = 0;
1713             int sqr = 0;
1714             int count = 0;
1715
1716             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1717                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1718                     int v = ptr[x2 + y2 * stride];
1719                     sum += v;
1720                     sqr += v * v;
1721                     count++;
1722                 }
1723             }
1724             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1725         }
1726     }
1727 }
1728
1729 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1730                                                 int motion_x, int motion_y,
1731                                                 int mb_block_height,
1732                                                 int mb_block_count)
1733 {
1734     int16_t weight[8][64];
1735     DCTELEM orig[8][64];
1736     const int mb_x = s->mb_x;
1737     const int mb_y = s->mb_y;
1738     int i;
1739     int skip_dct[8];
1740     int dct_offset = s->linesize * 8; // default for progressive frames
1741     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1742     int wrap_y, wrap_c;
1743
1744     for (i = 0; i < mb_block_count; i++)
1745         skip_dct[i] = s->skipdct;
1746
1747     if (s->adaptive_quant) {
1748         const int last_qp = s->qscale;
1749         const int mb_xy = mb_x + mb_y * s->mb_stride;
1750
1751         s->lambda = s->lambda_table[mb_xy];
1752         update_qscale(s);
1753
1754         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1755             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1756             s->dquant = s->qscale - last_qp;
1757
1758             if (s->out_format == FMT_H263) {
1759                 s->dquant = av_clip(s->dquant, -2, 2);
1760
1761                 if (s->codec_id == CODEC_ID_MPEG4) {
1762                     if (!s->mb_intra) {
1763                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1764                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1765                                 s->dquant = 0;
1766                         }
1767                         if (s->mv_type == MV_TYPE_8X8)
1768                             s->dquant = 0;
1769                     }
1770                 }
1771             }
1772         }
1773         ff_set_qscale(s, last_qp + s->dquant);
1774     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1775         ff_set_qscale(s, s->qscale + s->dquant);
1776
1777     wrap_y = s->linesize;
1778     wrap_c = s->uvlinesize;
1779     ptr_y  = s->new_picture.f.data[0] +
1780              (mb_y * 16 * wrap_y)              + mb_x * 16;
1781     ptr_cb = s->new_picture.f.data[1] +
1782              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1783     ptr_cr = s->new_picture.f.data[2] +
1784              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1785
1786     if((mb_x*16+16 > s->width || mb_y*16+16 > s->height) && s->codec_id != CODEC_ID_AMV){
1787         uint8_t *ebuf = s->edge_emu_buffer + 32;
1788         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1789                                 mb_y * 16, s->width, s->height);
1790         ptr_y = ebuf;
1791         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1792                                 mb_block_height, mb_x * 8, mb_y * 8,
1793                                 s->width >> 1, s->height >> 1);
1794         ptr_cb = ebuf + 18 * wrap_y;
1795         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1796                                 mb_block_height, mb_x * 8, mb_y * 8,
1797                                 s->width >> 1, s->height >> 1);
1798         ptr_cr = ebuf + 18 * wrap_y + 8;
1799     }
1800
1801     if (s->mb_intra) {
1802         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1803             int progressive_score, interlaced_score;
1804
1805             s->interlaced_dct = 0;
1806             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1807                                                     NULL, wrap_y, 8) +
1808                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1809                                                     NULL, wrap_y, 8) - 400;
1810
1811             if (progressive_score > 0) {
1812                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1813                                                        NULL, wrap_y * 2, 8) +
1814                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1815                                                        NULL, wrap_y * 2, 8);
1816                 if (progressive_score > interlaced_score) {
1817                     s->interlaced_dct = 1;
1818
1819                     dct_offset = wrap_y;
1820                     wrap_y <<= 1;
1821                     if (s->chroma_format == CHROMA_422)
1822                         wrap_c <<= 1;
1823                 }
1824             }
1825         }
1826
1827         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1828         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1829         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1830         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1831
1832         if (s->flags & CODEC_FLAG_GRAY) {
1833             skip_dct[4] = 1;
1834             skip_dct[5] = 1;
1835         } else {
1836             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1837             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1838             if (!s->chroma_y_shift) { /* 422 */
1839                 s->dsp.get_pixels(s->block[6],
1840                                   ptr_cb + (dct_offset >> 1), wrap_c);
1841                 s->dsp.get_pixels(s->block[7],
1842                                   ptr_cr + (dct_offset >> 1), wrap_c);
1843             }
1844         }
1845     } else {
1846         op_pixels_func (*op_pix)[4];
1847         qpel_mc_func (*op_qpix)[16];
1848         uint8_t *dest_y, *dest_cb, *dest_cr;
1849
1850         dest_y  = s->dest[0];
1851         dest_cb = s->dest[1];
1852         dest_cr = s->dest[2];
1853
1854         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1855             op_pix  = s->dsp.put_pixels_tab;
1856             op_qpix = s->dsp.put_qpel_pixels_tab;
1857         } else {
1858             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1859             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1860         }
1861
1862         if (s->mv_dir & MV_DIR_FORWARD) {
1863             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data,
1864                        op_pix, op_qpix);
1865             op_pix  = s->dsp.avg_pixels_tab;
1866             op_qpix = s->dsp.avg_qpel_pixels_tab;
1867         }
1868         if (s->mv_dir & MV_DIR_BACKWARD) {
1869             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data,
1870                        op_pix, op_qpix);
1871         }
1872
1873         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1874             int progressive_score, interlaced_score;
1875
1876             s->interlaced_dct = 0;
1877             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1878                                                     ptr_y,              wrap_y,
1879                                                     8) +
1880                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1881                                                     ptr_y + wrap_y * 8, wrap_y,
1882                                                     8) - 400;
1883
1884             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1885                 progressive_score -= 400;
1886
1887             if (progressive_score > 0) {
1888                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1889                                                        ptr_y,
1890                                                        wrap_y * 2, 8) +
1891                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1892                                                        ptr_y + wrap_y,
1893                                                        wrap_y * 2, 8);
1894
1895                 if (progressive_score > interlaced_score) {
1896                     s->interlaced_dct = 1;
1897
1898                     dct_offset = wrap_y;
1899                     wrap_y <<= 1;
1900                     if (s->chroma_format == CHROMA_422)
1901                         wrap_c <<= 1;
1902                 }
1903             }
1904         }
1905
1906         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1907         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1908         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1909                            dest_y + dct_offset, wrap_y);
1910         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1911                            dest_y + dct_offset + 8, wrap_y);
1912
1913         if (s->flags & CODEC_FLAG_GRAY) {
1914             skip_dct[4] = 1;
1915             skip_dct[5] = 1;
1916         } else {
1917             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1918             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1919             if (!s->chroma_y_shift) { /* 422 */
1920                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1921                                    dest_cb + (dct_offset >> 1), wrap_c);
1922                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1923                                    dest_cr + (dct_offset >> 1), wrap_c);
1924             }
1925         }
1926         /* pre quantization */
1927         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1928                 2 * s->qscale * s->qscale) {
1929             // FIXME optimize
1930             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1931                               wrap_y, 8) < 20 * s->qscale)
1932                 skip_dct[0] = 1;
1933             if (s->dsp.sad[1](NULL, ptr_y + 8,
1934                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1935                 skip_dct[1] = 1;
1936             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1937                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1938                 skip_dct[2] = 1;
1939             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1940                               dest_y + dct_offset + 8,
1941                               wrap_y, 8) < 20 * s->qscale)
1942                 skip_dct[3] = 1;
1943             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1944                               wrap_c, 8) < 20 * s->qscale)
1945                 skip_dct[4] = 1;
1946             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1947                               wrap_c, 8) < 20 * s->qscale)
1948                 skip_dct[5] = 1;
1949             if (!s->chroma_y_shift) { /* 422 */
1950                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1951                                   dest_cb + (dct_offset >> 1),
1952                                   wrap_c, 8) < 20 * s->qscale)
1953                     skip_dct[6] = 1;
1954                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1955                                   dest_cr + (dct_offset >> 1),
1956                                   wrap_c, 8) < 20 * s->qscale)
1957                     skip_dct[7] = 1;
1958             }
1959         }
1960     }
1961
1962     if (s->quantizer_noise_shaping) {
1963         if (!skip_dct[0])
1964             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1965         if (!skip_dct[1])
1966             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1967         if (!skip_dct[2])
1968             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1969         if (!skip_dct[3])
1970             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1971         if (!skip_dct[4])
1972             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1973         if (!skip_dct[5])
1974             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1975         if (!s->chroma_y_shift) { /* 422 */
1976             if (!skip_dct[6])
1977                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1978                                   wrap_c);
1979             if (!skip_dct[7])
1980                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1981                                   wrap_c);
1982         }
1983         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
1984     }
1985
1986     /* DCT & quantize */
1987     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1988     {
1989         for (i = 0; i < mb_block_count; i++) {
1990             if (!skip_dct[i]) {
1991                 int overflow;
1992                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1993                 // FIXME we could decide to change to quantizer instead of
1994                 // clipping
1995                 // JS: I don't think that would be a good idea it could lower
1996                 //     quality instead of improve it. Just INTRADC clipping
1997                 //     deserves changes in quantizer
1998                 if (overflow)
1999                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2000             } else
2001                 s->block_last_index[i] = -1;
2002         }
2003         if (s->quantizer_noise_shaping) {
2004             for (i = 0; i < mb_block_count; i++) {
2005                 if (!skip_dct[i]) {
2006                     s->block_last_index[i] =
2007                         dct_quantize_refine(s, s->block[i], weight[i],
2008                                             orig[i], i, s->qscale);
2009                 }
2010             }
2011         }
2012
2013         if (s->luma_elim_threshold && !s->mb_intra)
2014             for (i = 0; i < 4; i++)
2015                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2016         if (s->chroma_elim_threshold && !s->mb_intra)
2017             for (i = 4; i < mb_block_count; i++)
2018                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2019
2020         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2021             for (i = 0; i < mb_block_count; i++) {
2022                 if (s->block_last_index[i] == -1)
2023                     s->coded_score[i] = INT_MAX / 256;
2024             }
2025         }
2026     }
2027
2028     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2029         s->block_last_index[4] =
2030         s->block_last_index[5] = 0;
2031         s->block[4][0] =
2032         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2033     }
2034
2035     // non c quantize code returns incorrect block_last_index FIXME
2036     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2037         for (i = 0; i < mb_block_count; i++) {
2038             int j;
2039             if (s->block_last_index[i] > 0) {
2040                 for (j = 63; j > 0; j--) {
2041                     if (s->block[i][s->intra_scantable.permutated[j]])
2042                         break;
2043                 }
2044                 s->block_last_index[i] = j;
2045             }
2046         }
2047     }
2048
2049     /* huffman encode */
2050     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2051     case CODEC_ID_MPEG1VIDEO:
2052     case CODEC_ID_MPEG2VIDEO:
2053         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2054             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2055         break;
2056     case CODEC_ID_MPEG4:
2057         if (CONFIG_MPEG4_ENCODER)
2058             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2059         break;
2060     case CODEC_ID_MSMPEG4V2:
2061     case CODEC_ID_MSMPEG4V3:
2062     case CODEC_ID_WMV1:
2063         if (CONFIG_MSMPEG4_ENCODER)
2064             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2065         break;
2066     case CODEC_ID_WMV2:
2067         if (CONFIG_WMV2_ENCODER)
2068             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2069         break;
2070     case CODEC_ID_H261:
2071         if (CONFIG_H261_ENCODER)
2072             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2073         break;
2074     case CODEC_ID_H263:
2075     case CODEC_ID_H263P:
2076     case CODEC_ID_FLV1:
2077     case CODEC_ID_RV10:
2078     case CODEC_ID_RV20:
2079         if (CONFIG_H263_ENCODER)
2080             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2081         break;
2082     case CODEC_ID_MJPEG:
2083     case CODEC_ID_AMV:
2084         if (CONFIG_MJPEG_ENCODER)
2085             ff_mjpeg_encode_mb(s, s->block);
2086         break;
2087     default:
2088         assert(0);
2089     }
2090 }
2091
2092 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2093 {
2094     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2095     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2096 }
2097
2098 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2099     int i;
2100
2101     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2102
2103     /* mpeg1 */
2104     d->mb_skip_run= s->mb_skip_run;
2105     for(i=0; i<3; i++)
2106         d->last_dc[i] = s->last_dc[i];
2107
2108     /* statistics */
2109     d->mv_bits= s->mv_bits;
2110     d->i_tex_bits= s->i_tex_bits;
2111     d->p_tex_bits= s->p_tex_bits;
2112     d->i_count= s->i_count;
2113     d->f_count= s->f_count;
2114     d->b_count= s->b_count;
2115     d->skip_count= s->skip_count;
2116     d->misc_bits= s->misc_bits;
2117     d->last_bits= 0;
2118
2119     d->mb_skipped= 0;
2120     d->qscale= s->qscale;
2121     d->dquant= s->dquant;
2122
2123     d->esc3_level_length= s->esc3_level_length;
2124 }
2125
2126 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2127     int i;
2128
2129     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2130     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2131
2132     /* mpeg1 */
2133     d->mb_skip_run= s->mb_skip_run;
2134     for(i=0; i<3; i++)
2135         d->last_dc[i] = s->last_dc[i];
2136
2137     /* statistics */
2138     d->mv_bits= s->mv_bits;
2139     d->i_tex_bits= s->i_tex_bits;
2140     d->p_tex_bits= s->p_tex_bits;
2141     d->i_count= s->i_count;
2142     d->f_count= s->f_count;
2143     d->b_count= s->b_count;
2144     d->skip_count= s->skip_count;
2145     d->misc_bits= s->misc_bits;
2146
2147     d->mb_intra= s->mb_intra;
2148     d->mb_skipped= s->mb_skipped;
2149     d->mv_type= s->mv_type;
2150     d->mv_dir= s->mv_dir;
2151     d->pb= s->pb;
2152     if(s->data_partitioning){
2153         d->pb2= s->pb2;
2154         d->tex_pb= s->tex_pb;
2155     }
2156     d->block= s->block;
2157     for(i=0; i<8; i++)
2158         d->block_last_index[i]= s->block_last_index[i];
2159     d->interlaced_dct= s->interlaced_dct;
2160     d->qscale= s->qscale;
2161
2162     d->esc3_level_length= s->esc3_level_length;
2163 }
2164
2165 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2166                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2167                            int *dmin, int *next_block, int motion_x, int motion_y)
2168 {
2169     int score;
2170     uint8_t *dest_backup[3];
2171
2172     copy_context_before_encode(s, backup, type);
2173
2174     s->block= s->blocks[*next_block];
2175     s->pb= pb[*next_block];
2176     if(s->data_partitioning){
2177         s->pb2   = pb2   [*next_block];
2178         s->tex_pb= tex_pb[*next_block];
2179     }
2180
2181     if(*next_block){
2182         memcpy(dest_backup, s->dest, sizeof(s->dest));
2183         s->dest[0] = s->rd_scratchpad;
2184         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2185         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2186         assert(s->linesize >= 32); //FIXME
2187     }
2188
2189     encode_mb(s, motion_x, motion_y);
2190
2191     score= put_bits_count(&s->pb);
2192     if(s->data_partitioning){
2193         score+= put_bits_count(&s->pb2);
2194         score+= put_bits_count(&s->tex_pb);
2195     }
2196
2197     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2198         ff_MPV_decode_mb(s, s->block);
2199
2200         score *= s->lambda2;
2201         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2202     }
2203
2204     if(*next_block){
2205         memcpy(s->dest, dest_backup, sizeof(s->dest));
2206     }
2207
2208     if(score<*dmin){
2209         *dmin= score;
2210         *next_block^=1;
2211
2212         copy_context_after_encode(best, s, type);
2213     }
2214 }
2215
2216 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2217     uint32_t *sq = ff_squareTbl + 256;
2218     int acc=0;
2219     int x,y;
2220
2221     if(w==16 && h==16)
2222         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2223     else if(w==8 && h==8)
2224         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2225
2226     for(y=0; y<h; y++){
2227         for(x=0; x<w; x++){
2228             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2229         }
2230     }
2231
2232     assert(acc>=0);
2233
2234     return acc;
2235 }
2236
2237 static int sse_mb(MpegEncContext *s){
2238     int w= 16;
2239     int h= 16;
2240
2241     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2242     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2243
2244     if(w==16 && h==16)
2245       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2246         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2247                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2248                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2249       }else{
2250         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2251                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2252                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2253       }
2254     else
2255         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2256                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2257                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2258 }
2259
2260 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2261     MpegEncContext *s= *(void**)arg;
2262
2263
2264     s->me.pre_pass=1;
2265     s->me.dia_size= s->avctx->pre_dia_size;
2266     s->first_slice_line=1;
2267     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2268         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2269             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2270         }
2271         s->first_slice_line=0;
2272     }
2273
2274     s->me.pre_pass=0;
2275
2276     return 0;
2277 }
2278
2279 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2280     MpegEncContext *s= *(void**)arg;
2281
2282     ff_check_alignment();
2283
2284     s->me.dia_size= s->avctx->dia_size;
2285     s->first_slice_line=1;
2286     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2287         s->mb_x=0; //for block init below
2288         ff_init_block_index(s);
2289         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2290             s->block_index[0]+=2;
2291             s->block_index[1]+=2;
2292             s->block_index[2]+=2;
2293             s->block_index[3]+=2;
2294
2295             /* compute motion vector & mb_type and store in context */
2296             if(s->pict_type==AV_PICTURE_TYPE_B)
2297                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2298             else
2299                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2300         }
2301         s->first_slice_line=0;
2302     }
2303     return 0;
2304 }
2305
2306 static int mb_var_thread(AVCodecContext *c, void *arg){
2307     MpegEncContext *s= *(void**)arg;
2308     int mb_x, mb_y;
2309
2310     ff_check_alignment();
2311
2312     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2313         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2314             int xx = mb_x * 16;
2315             int yy = mb_y * 16;
2316             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2317             int varc;
2318             int sum = s->dsp.pix_sum(pix, s->linesize);
2319
2320             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2321
2322             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2323             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2324             s->me.mb_var_sum_temp    += varc;
2325         }
2326     }
2327     return 0;
2328 }
2329
2330 static void write_slice_end(MpegEncContext *s){
2331     if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4){
2332         if(s->partitioned_frame){
2333             ff_mpeg4_merge_partitions(s);
2334         }
2335
2336         ff_mpeg4_stuffing(&s->pb);
2337     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2338         ff_mjpeg_encode_stuffing(&s->pb);
2339     }
2340
2341     avpriv_align_put_bits(&s->pb);
2342     flush_put_bits(&s->pb);
2343
2344     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2345         s->misc_bits+= get_bits_diff(s);
2346 }
2347
2348 static void write_mb_info(MpegEncContext *s)
2349 {
2350     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2351     int offset = put_bits_count(&s->pb);
2352     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2353     int gobn = s->mb_y / s->gob_index;
2354     int pred_x, pred_y;
2355     if (CONFIG_H263_ENCODER)
2356         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2357     bytestream_put_le32(&ptr, offset);
2358     bytestream_put_byte(&ptr, s->qscale);
2359     bytestream_put_byte(&ptr, gobn);
2360     bytestream_put_le16(&ptr, mba);
2361     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2362     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2363     /* 4MV not implemented */
2364     bytestream_put_byte(&ptr, 0); /* hmv2 */
2365     bytestream_put_byte(&ptr, 0); /* vmv2 */
2366 }
2367
2368 static void update_mb_info(MpegEncContext *s, int startcode)
2369 {
2370     if (!s->mb_info)
2371         return;
2372     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2373         s->mb_info_size += 12;
2374         s->prev_mb_info = s->last_mb_info;
2375     }
2376     if (startcode) {
2377         s->prev_mb_info = put_bits_count(&s->pb)/8;
2378         /* This might have incremented mb_info_size above, and we return without
2379          * actually writing any info into that slot yet. But in that case,
2380          * this will be called again at the start of the after writing the
2381          * start code, actually writing the mb info. */
2382         return;
2383     }
2384
2385     s->last_mb_info = put_bits_count(&s->pb)/8;
2386     if (!s->mb_info_size)
2387         s->mb_info_size += 12;
2388     write_mb_info(s);
2389 }
2390
2391 static int encode_thread(AVCodecContext *c, void *arg){
2392     MpegEncContext *s= *(void**)arg;
2393     int mb_x, mb_y, pdif = 0;
2394     int chr_h= 16>>s->chroma_y_shift;
2395     int i, j;
2396     MpegEncContext best_s, backup_s;
2397     uint8_t bit_buf[2][MAX_MB_BYTES];
2398     uint8_t bit_buf2[2][MAX_MB_BYTES];
2399     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2400     PutBitContext pb[2], pb2[2], tex_pb[2];
2401 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2402
2403     ff_check_alignment();
2404
2405     for(i=0; i<2; i++){
2406         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2407         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2408         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2409     }
2410
2411     s->last_bits= put_bits_count(&s->pb);
2412     s->mv_bits=0;
2413     s->misc_bits=0;
2414     s->i_tex_bits=0;
2415     s->p_tex_bits=0;
2416     s->i_count=0;
2417     s->f_count=0;
2418     s->b_count=0;
2419     s->skip_count=0;
2420
2421     for(i=0; i<3; i++){
2422         /* init last dc values */
2423         /* note: quant matrix value (8) is implied here */
2424         s->last_dc[i] = 128 << s->intra_dc_precision;
2425
2426         s->current_picture.f.error[i] = 0;
2427     }
2428     if(s->codec_id==CODEC_ID_AMV){
2429         s->last_dc[0] = 128*8/13;
2430         s->last_dc[1] = 128*8/14;
2431         s->last_dc[2] = 128*8/14;
2432     }
2433     s->mb_skip_run = 0;
2434     memset(s->last_mv, 0, sizeof(s->last_mv));
2435
2436     s->last_mv_dir = 0;
2437
2438     switch(s->codec_id){
2439     case CODEC_ID_H263:
2440     case CODEC_ID_H263P:
2441     case CODEC_ID_FLV1:
2442         if (CONFIG_H263_ENCODER)
2443             s->gob_index = ff_h263_get_gob_height(s);
2444         break;
2445     case CODEC_ID_MPEG4:
2446         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2447             ff_mpeg4_init_partitions(s);
2448         break;
2449     }
2450
2451     s->resync_mb_x=0;
2452     s->resync_mb_y=0;
2453     s->first_slice_line = 1;
2454     s->ptr_lastgob = s->pb.buf;
2455     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2456 //    printf("row %d at %X\n", s->mb_y, (int)s);
2457         s->mb_x=0;
2458         s->mb_y= mb_y;
2459
2460         ff_set_qscale(s, s->qscale);
2461         ff_init_block_index(s);
2462
2463         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2464             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2465             int mb_type= s->mb_type[xy];
2466 //            int d;
2467             int dmin= INT_MAX;
2468             int dir;
2469
2470             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2471                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2472                 return -1;
2473             }
2474             if(s->data_partitioning){
2475                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2476                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2477                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2478                     return -1;
2479                 }
2480             }
2481
2482             s->mb_x = mb_x;
2483             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2484             ff_update_block_index(s);
2485
2486             if(CONFIG_H261_ENCODER && s->codec_id == CODEC_ID_H261){
2487                 ff_h261_reorder_mb_index(s);
2488                 xy= s->mb_y*s->mb_stride + s->mb_x;
2489                 mb_type= s->mb_type[xy];
2490             }
2491
2492             /* write gob / video packet header  */
2493             if(s->rtp_mode){
2494                 int current_packet_size, is_gob_start;
2495
2496                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2497
2498                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2499
2500                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2501
2502                 switch(s->codec_id){
2503                 case CODEC_ID_H263:
2504                 case CODEC_ID_H263P:
2505                     if(!s->h263_slice_structured)
2506                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2507                     break;
2508                 case CODEC_ID_MPEG2VIDEO:
2509                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2510                 case CODEC_ID_MPEG1VIDEO:
2511                     if(s->mb_skip_run) is_gob_start=0;
2512                     break;
2513                 }
2514
2515                 if(is_gob_start){
2516                     if(s->start_mb_y != mb_y || mb_x!=0){
2517                         write_slice_end(s);
2518
2519                         if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
2520                             ff_mpeg4_init_partitions(s);
2521                         }
2522                     }
2523
2524                     assert((put_bits_count(&s->pb)&7) == 0);
2525                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2526
2527                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2528                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2529                         int d= 100 / s->avctx->error_rate;
2530                         if(r % d == 0){
2531                             current_packet_size=0;
2532                             s->pb.buf_ptr= s->ptr_lastgob;
2533                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2534                         }
2535                     }
2536
2537                     if (s->avctx->rtp_callback){
2538                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2539                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2540                     }
2541                     update_mb_info(s, 1);
2542
2543                     switch(s->codec_id){
2544                     case CODEC_ID_MPEG4:
2545                         if (CONFIG_MPEG4_ENCODER) {
2546                             ff_mpeg4_encode_video_packet_header(s);
2547                             ff_mpeg4_clean_buffers(s);
2548                         }
2549                     break;
2550                     case CODEC_ID_MPEG1VIDEO:
2551                     case CODEC_ID_MPEG2VIDEO:
2552                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2553                             ff_mpeg1_encode_slice_header(s);
2554                             ff_mpeg1_clean_buffers(s);
2555                         }
2556                     break;
2557                     case CODEC_ID_H263:
2558                     case CODEC_ID_H263P:
2559                         if (CONFIG_H263_ENCODER)
2560                             ff_h263_encode_gob_header(s, mb_y);
2561                     break;
2562                     }
2563
2564                     if(s->flags&CODEC_FLAG_PASS1){
2565                         int bits= put_bits_count(&s->pb);
2566                         s->misc_bits+= bits - s->last_bits;
2567                         s->last_bits= bits;
2568                     }
2569
2570                     s->ptr_lastgob += current_packet_size;
2571                     s->first_slice_line=1;
2572                     s->resync_mb_x=mb_x;
2573                     s->resync_mb_y=mb_y;
2574                 }
2575             }
2576
2577             if(  (s->resync_mb_x   == s->mb_x)
2578                && s->resync_mb_y+1 == s->mb_y){
2579                 s->first_slice_line=0;
2580             }
2581
2582             s->mb_skipped=0;
2583             s->dquant=0; //only for QP_RD
2584
2585             update_mb_info(s, 0);
2586
2587             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2588                 int next_block=0;
2589                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2590
2591                 copy_context_before_encode(&backup_s, s, -1);
2592                 backup_s.pb= s->pb;
2593                 best_s.data_partitioning= s->data_partitioning;
2594                 best_s.partitioned_frame= s->partitioned_frame;
2595                 if(s->data_partitioning){
2596                     backup_s.pb2= s->pb2;
2597                     backup_s.tex_pb= s->tex_pb;
2598                 }
2599
2600                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2601                     s->mv_dir = MV_DIR_FORWARD;
2602                     s->mv_type = MV_TYPE_16X16;
2603                     s->mb_intra= 0;
2604                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2605                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2606                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2607                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2608                 }
2609                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2610                     s->mv_dir = MV_DIR_FORWARD;
2611                     s->mv_type = MV_TYPE_FIELD;
2612                     s->mb_intra= 0;
2613                     for(i=0; i<2; i++){
2614                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2615                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2616                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2617                     }
2618                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2619                                  &dmin, &next_block, 0, 0);
2620                 }
2621                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2622                     s->mv_dir = MV_DIR_FORWARD;
2623                     s->mv_type = MV_TYPE_16X16;
2624                     s->mb_intra= 0;
2625                     s->mv[0][0][0] = 0;
2626                     s->mv[0][0][1] = 0;
2627                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2628                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2629                 }
2630                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2631                     s->mv_dir = MV_DIR_FORWARD;
2632                     s->mv_type = MV_TYPE_8X8;
2633                     s->mb_intra= 0;
2634                     for(i=0; i<4; i++){
2635                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2636                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2637                     }
2638                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2639                                  &dmin, &next_block, 0, 0);
2640                 }
2641                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2642                     s->mv_dir = MV_DIR_FORWARD;
2643                     s->mv_type = MV_TYPE_16X16;
2644                     s->mb_intra= 0;
2645                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2646                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2647                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2648                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2649                 }
2650                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2651                     s->mv_dir = MV_DIR_BACKWARD;
2652                     s->mv_type = MV_TYPE_16X16;
2653                     s->mb_intra= 0;
2654                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2655                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2656                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2657                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2658                 }
2659                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2660                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2661                     s->mv_type = MV_TYPE_16X16;
2662                     s->mb_intra= 0;
2663                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2664                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2665                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2666                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2667                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2668                                  &dmin, &next_block, 0, 0);
2669                 }
2670                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2671                     s->mv_dir = MV_DIR_FORWARD;
2672                     s->mv_type = MV_TYPE_FIELD;
2673                     s->mb_intra= 0;
2674                     for(i=0; i<2; i++){
2675                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2676                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2677                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2678                     }
2679                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2680                                  &dmin, &next_block, 0, 0);
2681                 }
2682                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2683                     s->mv_dir = MV_DIR_BACKWARD;
2684                     s->mv_type = MV_TYPE_FIELD;
2685                     s->mb_intra= 0;
2686                     for(i=0; i<2; i++){
2687                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2688                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2689                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2690                     }
2691                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2692                                  &dmin, &next_block, 0, 0);
2693                 }
2694                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2695                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2696                     s->mv_type = MV_TYPE_FIELD;
2697                     s->mb_intra= 0;
2698                     for(dir=0; dir<2; dir++){
2699                         for(i=0; i<2; i++){
2700                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2701                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2702                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2703                         }
2704                     }
2705                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2706                                  &dmin, &next_block, 0, 0);
2707                 }
2708                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2709                     s->mv_dir = 0;
2710                     s->mv_type = MV_TYPE_16X16;
2711                     s->mb_intra= 1;
2712                     s->mv[0][0][0] = 0;
2713                     s->mv[0][0][1] = 0;
2714                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2715                                  &dmin, &next_block, 0, 0);
2716                     if(s->h263_pred || s->h263_aic){
2717                         if(best_s.mb_intra)
2718                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2719                         else
2720                             ff_clean_intra_table_entries(s); //old mode?
2721                     }
2722                 }
2723
2724                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2725                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2726                         const int last_qp= backup_s.qscale;
2727                         int qpi, qp, dc[6];
2728                         DCTELEM ac[6][16];
2729                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2730                         static const int dquant_tab[4]={-1,1,-2,2};
2731
2732                         assert(backup_s.dquant == 0);
2733
2734                         //FIXME intra
2735                         s->mv_dir= best_s.mv_dir;
2736                         s->mv_type = MV_TYPE_16X16;
2737                         s->mb_intra= best_s.mb_intra;
2738                         s->mv[0][0][0] = best_s.mv[0][0][0];
2739                         s->mv[0][0][1] = best_s.mv[0][0][1];
2740                         s->mv[1][0][0] = best_s.mv[1][0][0];
2741                         s->mv[1][0][1] = best_s.mv[1][0][1];
2742
2743                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2744                         for(; qpi<4; qpi++){
2745                             int dquant= dquant_tab[qpi];
2746                             qp= last_qp + dquant;
2747                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2748                                 continue;
2749                             backup_s.dquant= dquant;
2750                             if(s->mb_intra && s->dc_val[0]){
2751                                 for(i=0; i<6; i++){
2752                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2753                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2754                                 }
2755                             }
2756
2757                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2758                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2759                             if(best_s.qscale != qp){
2760                                 if(s->mb_intra && s->dc_val[0]){
2761                                     for(i=0; i<6; i++){
2762                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2763                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2764                                     }
2765                                 }
2766                             }
2767                         }
2768                     }
2769                 }
2770                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2771                     int mx= s->b_direct_mv_table[xy][0];
2772                     int my= s->b_direct_mv_table[xy][1];
2773
2774                     backup_s.dquant = 0;
2775                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2776                     s->mb_intra= 0;
2777                     ff_mpeg4_set_direct_mv(s, mx, my);
2778                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2779                                  &dmin, &next_block, mx, my);
2780                 }
2781                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2782                     backup_s.dquant = 0;
2783                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2784                     s->mb_intra= 0;
2785                     ff_mpeg4_set_direct_mv(s, 0, 0);
2786                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2787                                  &dmin, &next_block, 0, 0);
2788                 }
2789                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2790                     int coded=0;
2791                     for(i=0; i<6; i++)
2792                         coded |= s->block_last_index[i];
2793                     if(coded){
2794                         int mx,my;
2795                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2796                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2797                             mx=my=0; //FIXME find the one we actually used
2798                             ff_mpeg4_set_direct_mv(s, mx, my);
2799                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2800                             mx= s->mv[1][0][0];
2801                             my= s->mv[1][0][1];
2802                         }else{
2803                             mx= s->mv[0][0][0];
2804                             my= s->mv[0][0][1];
2805                         }
2806
2807                         s->mv_dir= best_s.mv_dir;
2808                         s->mv_type = best_s.mv_type;
2809                         s->mb_intra= 0;
2810 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2811                         s->mv[0][0][1] = best_s.mv[0][0][1];
2812                         s->mv[1][0][0] = best_s.mv[1][0][0];
2813                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2814                         backup_s.dquant= 0;
2815                         s->skipdct=1;
2816                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2817                                         &dmin, &next_block, mx, my);
2818                         s->skipdct=0;
2819                     }
2820                 }
2821
2822                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2823
2824                 copy_context_after_encode(s, &best_s, -1);
2825
2826                 pb_bits_count= put_bits_count(&s->pb);
2827                 flush_put_bits(&s->pb);
2828                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2829                 s->pb= backup_s.pb;
2830
2831                 if(s->data_partitioning){
2832                     pb2_bits_count= put_bits_count(&s->pb2);
2833                     flush_put_bits(&s->pb2);
2834                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2835                     s->pb2= backup_s.pb2;
2836
2837                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2838                     flush_put_bits(&s->tex_pb);
2839                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2840                     s->tex_pb= backup_s.tex_pb;
2841                 }
2842                 s->last_bits= put_bits_count(&s->pb);
2843
2844                 if (CONFIG_H263_ENCODER &&
2845                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2846                     ff_h263_update_motion_val(s);
2847
2848                 if(next_block==0){ //FIXME 16 vs linesize16
2849                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2850                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2851                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2852                 }
2853
2854                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2855                     ff_MPV_decode_mb(s, s->block);
2856             } else {
2857                 int motion_x = 0, motion_y = 0;
2858                 s->mv_type=MV_TYPE_16X16;
2859                 // only one MB-Type possible
2860
2861                 switch(mb_type){
2862                 case CANDIDATE_MB_TYPE_INTRA:
2863                     s->mv_dir = 0;
2864                     s->mb_intra= 1;
2865                     motion_x= s->mv[0][0][0] = 0;
2866                     motion_y= s->mv[0][0][1] = 0;
2867                     break;
2868                 case CANDIDATE_MB_TYPE_INTER:
2869                     s->mv_dir = MV_DIR_FORWARD;
2870                     s->mb_intra= 0;
2871                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2872                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2873                     break;
2874                 case CANDIDATE_MB_TYPE_INTER_I:
2875                     s->mv_dir = MV_DIR_FORWARD;
2876                     s->mv_type = MV_TYPE_FIELD;
2877                     s->mb_intra= 0;
2878                     for(i=0; i<2; i++){
2879                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2880                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2881                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2882                     }
2883                     break;
2884                 case CANDIDATE_MB_TYPE_INTER4V:
2885                     s->mv_dir = MV_DIR_FORWARD;
2886                     s->mv_type = MV_TYPE_8X8;
2887                     s->mb_intra= 0;
2888                     for(i=0; i<4; i++){
2889                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2890                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2891                     }
2892                     break;
2893                 case CANDIDATE_MB_TYPE_DIRECT:
2894                     if (CONFIG_MPEG4_ENCODER) {
2895                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2896                         s->mb_intra= 0;
2897                         motion_x=s->b_direct_mv_table[xy][0];
2898                         motion_y=s->b_direct_mv_table[xy][1];
2899                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2900                     }
2901                     break;
2902                 case CANDIDATE_MB_TYPE_DIRECT0:
2903                     if (CONFIG_MPEG4_ENCODER) {
2904                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2905                         s->mb_intra= 0;
2906                         ff_mpeg4_set_direct_mv(s, 0, 0);
2907                     }
2908                     break;
2909                 case CANDIDATE_MB_TYPE_BIDIR:
2910                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2911                     s->mb_intra= 0;
2912                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2913                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2914                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2915                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2916                     break;
2917                 case CANDIDATE_MB_TYPE_BACKWARD:
2918                     s->mv_dir = MV_DIR_BACKWARD;
2919                     s->mb_intra= 0;
2920                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2921                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2922                     break;
2923                 case CANDIDATE_MB_TYPE_FORWARD:
2924                     s->mv_dir = MV_DIR_FORWARD;
2925                     s->mb_intra= 0;
2926                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2927                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2928 //                    printf(" %d %d ", motion_x, motion_y);
2929                     break;
2930                 case CANDIDATE_MB_TYPE_FORWARD_I:
2931                     s->mv_dir = MV_DIR_FORWARD;
2932                     s->mv_type = MV_TYPE_FIELD;
2933                     s->mb_intra= 0;
2934                     for(i=0; i<2; i++){
2935                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2936                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2937                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2938                     }
2939                     break;
2940                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2941                     s->mv_dir = MV_DIR_BACKWARD;
2942                     s->mv_type = MV_TYPE_FIELD;
2943                     s->mb_intra= 0;
2944                     for(i=0; i<2; i++){
2945                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2946                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2947                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2948                     }
2949                     break;
2950                 case CANDIDATE_MB_TYPE_BIDIR_I:
2951                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2952                     s->mv_type = MV_TYPE_FIELD;
2953                     s->mb_intra= 0;
2954                     for(dir=0; dir<2; dir++){
2955                         for(i=0; i<2; i++){
2956                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2957                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2958                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2959                         }
2960                     }
2961                     break;
2962                 default:
2963                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2964                 }
2965
2966                 encode_mb(s, motion_x, motion_y);
2967
2968                 // RAL: Update last macroblock type
2969                 s->last_mv_dir = s->mv_dir;
2970
2971                 if (CONFIG_H263_ENCODER &&
2972                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2973                     ff_h263_update_motion_val(s);
2974
2975                 ff_MPV_decode_mb(s, s->block);
2976             }
2977
2978             /* clean the MV table in IPS frames for direct mode in B frames */
2979             if(s->mb_intra /* && I,P,S_TYPE */){
2980                 s->p_mv_table[xy][0]=0;
2981                 s->p_mv_table[xy][1]=0;
2982             }
2983
2984             if(s->flags&CODEC_FLAG_PSNR){
2985                 int w= 16;
2986                 int h= 16;
2987
2988                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2989                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2990
2991                 s->current_picture.f.error[0] += sse(
2992                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2993                     s->dest[0], w, h, s->linesize);
2994                 s->current_picture.f.error[1] += sse(
2995                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2996                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2997                 s->current_picture.f.error[2] += sse(
2998                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2999                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3000             }
3001             if(s->loop_filter){
3002                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3003                     ff_h263_loop_filter(s);
3004             }
3005 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
3006         }
3007     }
3008
3009     //not beautiful here but we must write it before flushing so it has to be here
3010     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3011         ff_msmpeg4_encode_ext_header(s);
3012
3013     write_slice_end(s);
3014
3015     /* Send the last GOB if RTP */
3016     if (s->avctx->rtp_callback) {
3017         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3018         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3019         /* Call the RTP callback to send the last GOB */
3020         emms_c();
3021         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3022     }
3023
3024     return 0;
3025 }
3026
3027 #define MERGE(field) dst->field += src->field; src->field=0
3028 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3029     MERGE(me.scene_change_score);
3030     MERGE(me.mc_mb_var_sum_temp);
3031     MERGE(me.mb_var_sum_temp);
3032 }
3033
3034 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3035     int i;
3036
3037     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3038     MERGE(dct_count[1]);
3039     MERGE(mv_bits);
3040     MERGE(i_tex_bits);
3041     MERGE(p_tex_bits);
3042     MERGE(i_count);
3043     MERGE(f_count);
3044     MERGE(b_count);
3045     MERGE(skip_count);
3046     MERGE(misc_bits);
3047     MERGE(error_count);
3048     MERGE(padding_bug_score);
3049     MERGE(current_picture.f.error[0]);
3050     MERGE(current_picture.f.error[1]);
3051     MERGE(current_picture.f.error[2]);
3052
3053     if(dst->avctx->noise_reduction){
3054         for(i=0; i<64; i++){
3055             MERGE(dct_error_sum[0][i]);
3056             MERGE(dct_error_sum[1][i]);
3057         }
3058     }
3059
3060     assert(put_bits_count(&src->pb) % 8 ==0);
3061     assert(put_bits_count(&dst->pb) % 8 ==0);
3062     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3063     flush_put_bits(&dst->pb);
3064 }
3065
3066 static int estimate_qp(MpegEncContext *s, int dry_run){
3067     if (s->next_lambda){
3068         s->current_picture_ptr->f.quality =
3069         s->current_picture.f.quality = s->next_lambda;
3070         if(!dry_run) s->next_lambda= 0;
3071     } else if (!s->fixed_qscale) {
3072         s->current_picture_ptr->f.quality =
3073         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3074         if (s->current_picture.f.quality < 0)
3075             return -1;
3076     }
3077
3078     if(s->adaptive_quant){
3079         switch(s->codec_id){
3080         case CODEC_ID_MPEG4:
3081             if (CONFIG_MPEG4_ENCODER)
3082                 ff_clean_mpeg4_qscales(s);
3083             break;
3084         case CODEC_ID_H263:
3085         case CODEC_ID_H263P:
3086         case CODEC_ID_FLV1:
3087             if (CONFIG_H263_ENCODER)
3088                 ff_clean_h263_qscales(s);
3089             break;
3090         default:
3091             ff_init_qscale_tab(s);
3092         }
3093
3094         s->lambda= s->lambda_table[0];
3095         //FIXME broken
3096     }else
3097         s->lambda = s->current_picture.f.quality;
3098 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3099     update_qscale(s);
3100     return 0;
3101 }
3102
3103 /* must be called before writing the header */
3104 static void set_frame_distances(MpegEncContext * s){
3105     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3106     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3107
3108     if(s->pict_type==AV_PICTURE_TYPE_B){
3109         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3110         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3111     }else{
3112         s->pp_time= s->time - s->last_non_b_time;
3113         s->last_non_b_time= s->time;
3114         assert(s->picture_number==0 || s->pp_time > 0);
3115     }
3116 }
3117
3118 static int encode_picture(MpegEncContext *s, int picture_number)
3119 {
3120     int i;
3121     int bits;
3122     int context_count = s->slice_context_count;
3123
3124     s->picture_number = picture_number;
3125
3126     /* Reset the average MB variance */
3127     s->me.mb_var_sum_temp    =
3128     s->me.mc_mb_var_sum_temp = 0;
3129
3130     /* we need to initialize some time vars before we can encode b-frames */
3131     // RAL: Condition added for MPEG1VIDEO
3132     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3133         set_frame_distances(s);
3134     if(CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4)
3135         ff_set_mpeg4_time(s);
3136
3137     s->me.scene_change_score=0;
3138
3139 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3140
3141     if(s->pict_type==AV_PICTURE_TYPE_I){
3142         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3143         else                        s->no_rounding=0;
3144     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3145         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3146             s->no_rounding ^= 1;
3147     }
3148
3149     if(s->flags & CODEC_FLAG_PASS2){
3150         if (estimate_qp(s,1) < 0)
3151             return -1;
3152         ff_get_2pass_fcode(s);
3153     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3154         if(s->pict_type==AV_PICTURE_TYPE_B)
3155             s->lambda= s->last_lambda_for[s->pict_type];
3156         else
3157             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3158         update_qscale(s);
3159     }
3160
3161     if(s->codec_id != CODEC_ID_AMV){
3162         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3163         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3164         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3165         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3166     }
3167
3168     s->mb_intra=0; //for the rate distortion & bit compare functions
3169     for(i=1; i<context_count; i++){
3170         ff_update_duplicate_context(s->thread_context[i], s);
3171     }
3172
3173     if(ff_init_me(s)<0)
3174         return -1;
3175
3176     /* Estimate motion for every MB */
3177     if(s->pict_type != AV_PICTURE_TYPE_I){
3178         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3179         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3180         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3181             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3182                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3183             }
3184         }
3185
3186         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3187     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3188         /* I-Frame */
3189         for(i=0; i<s->mb_stride*s->mb_height; i++)
3190             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3191
3192         if(!s->fixed_qscale){
3193             /* finding spatial complexity for I-frame rate control */
3194             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3195         }
3196     }
3197     for(i=1; i<context_count; i++){
3198         merge_context_after_me(s, s->thread_context[i]);
3199     }
3200     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3201     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3202     emms_c();
3203
3204     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3205         s->pict_type= AV_PICTURE_TYPE_I;
3206         for(i=0; i<s->mb_stride*s->mb_height; i++)
3207             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3208 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3209     }
3210
3211     if(!s->umvplus){
3212         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3213             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3214
3215             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3216                 int a,b;
3217                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3218                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3219                 s->f_code= FFMAX3(s->f_code, a, b);
3220             }
3221
3222             ff_fix_long_p_mvs(s);
3223             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3224             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3225                 int j;
3226                 for(i=0; i<2; i++){
3227                     for(j=0; j<2; j++)
3228                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3229                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3230                 }
3231             }
3232         }
3233
3234         if(s->pict_type==AV_PICTURE_TYPE_B){
3235             int a, b;
3236
3237             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3238             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3239             s->f_code = FFMAX(a, b);
3240
3241             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3242             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3243             s->b_code = FFMAX(a, b);
3244
3245             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3246             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3247             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3248             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3249             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3250                 int dir, j;
3251                 for(dir=0; dir<2; dir++){
3252                     for(i=0; i<2; i++){
3253                         for(j=0; j<2; j++){
3254                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3255                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3256                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3257                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3258                         }
3259                     }
3260                 }
3261             }
3262         }
3263     }
3264
3265     if (estimate_qp(s, 0) < 0)
3266         return -1;
3267
3268     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3269         s->qscale= 3; //reduce clipping problems
3270
3271     if (s->out_format == FMT_MJPEG) {
3272         /* for mjpeg, we do include qscale in the matrix */
3273         for(i=1;i<64;i++){
3274             int j= s->dsp.idct_permutation[i];
3275
3276             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3277         }
3278         s->y_dc_scale_table=
3279         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3280         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3281         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3282                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3283         s->qscale= 8;
3284     }
3285     if(s->codec_id == CODEC_ID_AMV){
3286         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3287         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3288         for(i=1;i<64;i++){
3289             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3290
3291             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3292             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3293         }
3294         s->y_dc_scale_table= y;
3295         s->c_dc_scale_table= c;
3296         s->intra_matrix[0] = 13;
3297         s->chroma_intra_matrix[0] = 14;
3298         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3299                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3300         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3301                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3302         s->qscale= 8;
3303     }
3304
3305     //FIXME var duplication
3306     s->current_picture_ptr->f.key_frame =
3307     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3308     s->current_picture_ptr->f.pict_type =
3309     s->current_picture.f.pict_type = s->pict_type;
3310
3311     if (s->current_picture.f.key_frame)
3312         s->picture_in_gop_number=0;
3313
3314     s->last_bits= put_bits_count(&s->pb);
3315     switch(s->out_format) {
3316     case FMT_MJPEG:
3317         if (CONFIG_MJPEG_ENCODER)
3318             ff_mjpeg_encode_picture_header(s);
3319         break;
3320     case FMT_H261:
3321         if (CONFIG_H261_ENCODER)
3322             ff_h261_encode_picture_header(s, picture_number);
3323         break;
3324     case FMT_H263:
3325         if (CONFIG_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
3326             ff_wmv2_encode_picture_header(s, picture_number);
3327         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3328             ff_msmpeg4_encode_picture_header(s, picture_number);
3329         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3330             ff_mpeg4_encode_picture_header(s, picture_number);
3331         else if (CONFIG_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
3332             ff_rv10_encode_picture_header(s, picture_number);
3333         else if (CONFIG_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
3334             ff_rv20_encode_picture_header(s, picture_number);
3335         else if (CONFIG_FLV_ENCODER && s->codec_id == CODEC_ID_FLV1)
3336             ff_flv_encode_picture_header(s, picture_number);
3337         else if (CONFIG_H263_ENCODER)
3338             ff_h263_encode_picture_header(s, picture_number);
3339         break;
3340     case FMT_MPEG1:
3341         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3342             ff_mpeg1_encode_picture_header(s, picture_number);
3343         break;
3344     case FMT_H264:
3345         break;
3346     default:
3347         assert(0);
3348     }
3349     bits= put_bits_count(&s->pb);
3350     s->header_bits= bits - s->last_bits;
3351
3352     for(i=1; i<context_count; i++){
3353         update_duplicate_context_after_me(s->thread_context[i], s);
3354     }
3355     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3356     for(i=1; i<context_count; i++){
3357         merge_context_after_encode(s, s->thread_context[i]);
3358     }
3359     emms_c();
3360     return 0;
3361 }
3362
3363 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3364     const int intra= s->mb_intra;
3365     int i;
3366
3367     s->dct_count[intra]++;
3368
3369     for(i=0; i<64; i++){
3370         int level= block[i];
3371
3372         if(level){
3373             if(level>0){
3374                 s->dct_error_sum[intra][i] += level;
3375                 level -= s->dct_offset[intra][i];
3376                 if(level<0) level=0;
3377             }else{
3378                 s->dct_error_sum[intra][i] -= level;
3379                 level += s->dct_offset[intra][i];
3380                 if(level>0) level=0;
3381             }
3382             block[i]= level;
3383         }
3384     }
3385 }
3386
3387 static int dct_quantize_trellis_c(MpegEncContext *s,
3388                                   DCTELEM *block, int n,
3389                                   int qscale, int *overflow){
3390     const int *qmat;
3391     const uint8_t *scantable= s->intra_scantable.scantable;
3392     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3393     int max=0;
3394     unsigned int threshold1, threshold2;
3395     int bias=0;
3396     int run_tab[65];
3397     int level_tab[65];
3398     int score_tab[65];
3399     int survivor[65];
3400     int survivor_count;
3401     int last_run=0;
3402     int last_level=0;
3403     int last_score= 0;
3404     int last_i;
3405     int coeff[2][64];
3406     int coeff_count[64];
3407     int qmul, qadd, start_i, last_non_zero, i, dc;
3408     const int esc_length= s->ac_esc_length;
3409     uint8_t * length;
3410     uint8_t * last_length;
3411     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3412
3413     s->dsp.fdct (block);
3414
3415     if(s->dct_error_sum)
3416         s->denoise_dct(s, block);
3417     qmul= qscale*16;
3418     qadd= ((qscale-1)|1)*8;
3419
3420     if (s->mb_intra) {
3421         int q;
3422         if (!s->h263_aic) {
3423             if (n < 4)
3424                 q = s->y_dc_scale;
3425             else
3426                 q = s->c_dc_scale;
3427             q = q << 3;
3428         } else{
3429             /* For AIC we skip quant/dequant of INTRADC */
3430             q = 1 << 3;
3431             qadd=0;
3432         }
3433
3434         /* note: block[0] is assumed to be positive */
3435         block[0] = (block[0] + (q >> 1)) / q;
3436         start_i = 1;
3437         last_non_zero = 0;
3438         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3439         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3440             bias= 1<<(QMAT_SHIFT-1);
3441         length     = s->intra_ac_vlc_length;
3442         last_length= s->intra_ac_vlc_last_length;
3443     } else {
3444         start_i = 0;
3445         last_non_zero = -1;
3446         qmat = s->q_inter_matrix[qscale];
3447         length     = s->inter_ac_vlc_length;
3448         last_length= s->inter_ac_vlc_last_length;
3449     }
3450     last_i= start_i;
3451
3452     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3453     threshold2= (threshold1<<1);
3454
3455     for(i=63; i>=start_i; i--) {
3456         const int j = scantable[i];
3457         int level = block[j] * qmat[j];
3458
3459         if(((unsigned)(level+threshold1))>threshold2){
3460             last_non_zero = i;
3461             break;
3462         }
3463     }
3464
3465     for(i=start_i; i<=last_non_zero; i++) {
3466         const int j = scantable[i];
3467         int level = block[j] * qmat[j];
3468
3469 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3470 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3471         if(((unsigned)(level+threshold1))>threshold2){
3472             if(level>0){
3473                 level= (bias + level)>>QMAT_SHIFT;
3474                 coeff[0][i]= level;
3475                 coeff[1][i]= level-1;
3476 //                coeff[2][k]= level-2;
3477             }else{
3478                 level= (bias - level)>>QMAT_SHIFT;
3479                 coeff[0][i]= -level;
3480                 coeff[1][i]= -level+1;
3481 //                coeff[2][k]= -level+2;
3482             }
3483             coeff_count[i]= FFMIN(level, 2);
3484             assert(coeff_count[i]);
3485             max |=level;
3486         }else{
3487             coeff[0][i]= (level>>31)|1;
3488             coeff_count[i]= 1;
3489         }
3490     }
3491
3492     *overflow= s->max_qcoeff < max; //overflow might have happened
3493
3494     if(last_non_zero < start_i){
3495         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3496         return last_non_zero;
3497     }
3498
3499     score_tab[start_i]= 0;
3500     survivor[0]= start_i;
3501     survivor_count= 1;
3502
3503     for(i=start_i; i<=last_non_zero; i++){
3504         int level_index, j, zero_distortion;
3505         int dct_coeff= FFABS(block[ scantable[i] ]);
3506         int best_score=256*256*256*120;
3507
3508         if (s->dsp.fdct == ff_fdct_ifast)
3509             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3510         zero_distortion= dct_coeff*dct_coeff;
3511
3512         for(level_index=0; level_index < coeff_count[i]; level_index++){
3513             int distortion;
3514             int level= coeff[level_index][i];
3515             const int alevel= FFABS(level);
3516             int unquant_coeff;
3517
3518             assert(level);
3519
3520             if(s->out_format == FMT_H263){
3521                 unquant_coeff= alevel*qmul + qadd;
3522             }else{ //MPEG1
3523                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3524                 if(s->mb_intra){
3525                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3526                         unquant_coeff =   (unquant_coeff - 1) | 1;
3527                 }else{
3528                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3529                         unquant_coeff =   (unquant_coeff - 1) | 1;
3530                 }
3531                 unquant_coeff<<= 3;
3532             }
3533
3534             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3535             level+=64;
3536             if((level&(~127)) == 0){
3537                 for(j=survivor_count-1; j>=0; j--){
3538                     int run= i - survivor[j];
3539                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3540                     score += score_tab[i-run];
3541
3542                     if(score < best_score){
3543                         best_score= score;
3544                         run_tab[i+1]= run;
3545                         level_tab[i+1]= level-64;
3546                     }
3547                 }
3548
3549                 if(s->out_format == FMT_H263){
3550                     for(j=survivor_count-1; j>=0; j--){
3551                         int run= i - survivor[j];
3552                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3553                         score += score_tab[i-run];
3554                         if(score < last_score){
3555                             last_score= score;
3556                             last_run= run;
3557                             last_level= level-64;
3558                             last_i= i+1;
3559                         }
3560                     }
3561                 }
3562             }else{
3563                 distortion += esc_length*lambda;
3564                 for(j=survivor_count-1; j>=0; j--){
3565                     int run= i - survivor[j];
3566                     int score= distortion + score_tab[i-run];
3567
3568                     if(score < best_score){
3569                         best_score= score;
3570                         run_tab[i+1]= run;
3571                         level_tab[i+1]= level-64;
3572                     }
3573                 }
3574
3575                 if(s->out_format == FMT_H263){
3576                   for(j=survivor_count-1; j>=0; j--){
3577                         int run= i - survivor[j];
3578                         int score= distortion + score_tab[i-run];
3579                         if(score < last_score){
3580                             last_score= score;
3581                             last_run= run;
3582                             last_level= level-64;
3583                             last_i= i+1;
3584                         }
3585                     }
3586                 }
3587             }
3588         }
3589
3590         score_tab[i+1]= best_score;
3591
3592         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3593         if(last_non_zero <= 27){
3594             for(; survivor_count; survivor_count--){
3595                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3596                     break;
3597             }
3598         }else{
3599             for(; survivor_count; survivor_count--){
3600                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3601                     break;
3602             }
3603         }
3604
3605         survivor[ survivor_count++ ]= i+1;
3606     }
3607
3608     if(s->out_format != FMT_H263){
3609         last_score= 256*256*256*120;
3610         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3611             int score= score_tab[i];
3612             if(i) score += lambda*2; //FIXME exacter?
3613
3614             if(score < last_score){
3615                 last_score= score;
3616                 last_i= i;
3617                 last_level= level_tab[i];
3618                 last_run= run_tab[i];
3619             }
3620         }
3621     }
3622
3623     s->coded_score[n] = last_score;
3624
3625     dc= FFABS(block[0]);
3626     last_non_zero= last_i - 1;
3627     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3628
3629     if(last_non_zero < start_i)
3630         return last_non_zero;
3631
3632     if(last_non_zero == 0 && start_i == 0){
3633         int best_level= 0;
3634         int best_score= dc * dc;
3635
3636         for(i=0; i<coeff_count[0]; i++){
3637             int level= coeff[i][0];
3638             int alevel= FFABS(level);
3639             int unquant_coeff, score, distortion;
3640
3641             if(s->out_format == FMT_H263){
3642                     unquant_coeff= (alevel*qmul + qadd)>>3;
3643             }else{ //MPEG1
3644                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3645                     unquant_coeff =   (unquant_coeff - 1) | 1;
3646             }
3647             unquant_coeff = (unquant_coeff + 4) >> 3;
3648             unquant_coeff<<= 3 + 3;
3649
3650             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3651             level+=64;
3652             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3653             else                    score= distortion + esc_length*lambda;
3654
3655             if(score < best_score){
3656                 best_score= score;
3657                 best_level= level - 64;
3658             }
3659         }
3660         block[0]= best_level;
3661         s->coded_score[n] = best_score - dc*dc;
3662         if(best_level == 0) return -1;
3663         else                return last_non_zero;
3664     }
3665
3666     i= last_i;
3667     assert(last_level);
3668
3669     block[ perm_scantable[last_non_zero] ]= last_level;
3670     i -= last_run + 1;
3671
3672     for(; i>start_i; i -= run_tab[i] + 1){
3673         block[ perm_scantable[i-1] ]= level_tab[i];
3674     }
3675
3676     return last_non_zero;
3677 }
3678
3679 //#define REFINE_STATS 1
3680 static int16_t basis[64][64];
3681
3682 static void build_basis(uint8_t *perm){
3683     int i, j, x, y;
3684     emms_c();
3685     for(i=0; i<8; i++){
3686         for(j=0; j<8; j++){
3687             for(y=0; y<8; y++){
3688                 for(x=0; x<8; x++){
3689                     double s= 0.25*(1<<BASIS_SHIFT);
3690                     int index= 8*i + j;
3691                     int perm_index= perm[index];
3692                     if(i==0) s*= sqrt(0.5);
3693                     if(j==0) s*= sqrt(0.5);
3694                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3695                 }
3696             }
3697         }
3698     }
3699 }
3700
3701 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3702                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3703                         int n, int qscale){
3704     int16_t rem[64];
3705     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3706     const uint8_t *scantable= s->intra_scantable.scantable;
3707     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3708 //    unsigned int threshold1, threshold2;
3709 //    int bias=0;
3710     int run_tab[65];
3711     int prev_run=0;
3712     int prev_level=0;
3713     int qmul, qadd, start_i, last_non_zero, i, dc;
3714     uint8_t * length;
3715     uint8_t * last_length;
3716     int lambda;
3717     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3718 #ifdef REFINE_STATS
3719 static int count=0;
3720 static int after_last=0;
3721 static int to_zero=0;
3722 static int from_zero=0;
3723 static int raise=0;
3724 static int lower=0;
3725 static int messed_sign=0;
3726 #endif
3727
3728     if(basis[0][0] == 0)
3729         build_basis(s->dsp.idct_permutation);
3730
3731     qmul= qscale*2;
3732     qadd= (qscale-1)|1;
3733     if (s->mb_intra) {
3734         if (!s->h263_aic) {
3735             if (n < 4)
3736                 q = s->y_dc_scale;
3737             else
3738                 q = s->c_dc_scale;
3739         } else{
3740             /* For AIC we skip quant/dequant of INTRADC */
3741             q = 1;
3742             qadd=0;
3743         }
3744         q <<= RECON_SHIFT-3;
3745         /* note: block[0] is assumed to be positive */
3746         dc= block[0]*q;
3747 //        block[0] = (block[0] + (q >> 1)) / q;
3748         start_i = 1;
3749 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3750 //            bias= 1<<(QMAT_SHIFT-1);
3751         length     = s->intra_ac_vlc_length;
3752         last_length= s->intra_ac_vlc_last_length;
3753     } else {
3754         dc= 0;
3755         start_i = 0;
3756         length     = s->inter_ac_vlc_length;
3757         last_length= s->inter_ac_vlc_last_length;
3758     }
3759     last_non_zero = s->block_last_index[n];
3760
3761 #ifdef REFINE_STATS
3762 {START_TIMER
3763 #endif
3764     dc += (1<<(RECON_SHIFT-1));
3765     for(i=0; i<64; i++){
3766         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3767     }
3768 #ifdef REFINE_STATS
3769 STOP_TIMER("memset rem[]")}
3770 #endif
3771     sum=0;
3772     for(i=0; i<64; i++){
3773         int one= 36;
3774         int qns=4;
3775         int w;
3776
3777         w= FFABS(weight[i]) + qns*one;
3778         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3779
3780         weight[i] = w;
3781 //        w=weight[i] = (63*qns + (w/2)) / w;
3782
3783         assert(w>0);
3784         assert(w<(1<<6));
3785         sum += w*w;
3786     }
3787     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3788 #ifdef REFINE_STATS
3789 {START_TIMER
3790 #endif
3791     run=0;
3792     rle_index=0;
3793     for(i=start_i; i<=last_non_zero; i++){
3794         int j= perm_scantable[i];
3795         const int level= block[j];
3796         int coeff;
3797
3798         if(level){
3799             if(level<0) coeff= qmul*level - qadd;
3800             else        coeff= qmul*level + qadd;
3801             run_tab[rle_index++]=run;
3802             run=0;
3803
3804             s->dsp.add_8x8basis(rem, basis[j], coeff);
3805         }else{
3806             run++;
3807         }
3808     }
3809 #ifdef REFINE_STATS
3810 if(last_non_zero>0){
3811 STOP_TIMER("init rem[]")
3812 }
3813 }
3814
3815 {START_TIMER
3816 #endif
3817     for(;;){
3818         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3819         int best_coeff=0;
3820         int best_change=0;
3821         int run2, best_unquant_change=0, analyze_gradient;
3822 #ifdef REFINE_STATS
3823 {START_TIMER
3824 #endif
3825         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3826
3827         if(analyze_gradient){
3828 #ifdef REFINE_STATS
3829 {START_TIMER
3830 #endif
3831             for(i=0; i<64; i++){
3832                 int w= weight[i];
3833
3834                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3835             }
3836 #ifdef REFINE_STATS
3837 STOP_TIMER("rem*w*w")}
3838 {START_TIMER
3839 #endif
3840             s->dsp.fdct(d1);
3841 #ifdef REFINE_STATS
3842 STOP_TIMER("dct")}
3843 #endif
3844         }
3845
3846         if(start_i){
3847             const int level= block[0];
3848             int change, old_coeff;
3849
3850             assert(s->mb_intra);
3851
3852             old_coeff= q*level;
3853
3854             for(change=-1; change<=1; change+=2){
3855                 int new_level= level + change;
3856                 int score, new_coeff;
3857
3858                 new_coeff= q*new_level;
3859                 if(new_coeff >= 2048 || new_coeff < 0)
3860                     continue;
3861
3862                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3863                 if(score<best_score){
3864                     best_score= score;
3865                     best_coeff= 0;
3866                     best_change= change;
3867                     best_unquant_change= new_coeff - old_coeff;
3868                 }
3869             }
3870         }
3871
3872         run=0;
3873         rle_index=0;
3874         run2= run_tab[rle_index++];
3875         prev_level=0;
3876         prev_run=0;
3877
3878         for(i=start_i; i<64; i++){
3879             int j= perm_scantable[i];
3880             const int level= block[j];
3881             int change, old_coeff;
3882
3883             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3884                 break;
3885
3886             if(level){
3887                 if(level<0) old_coeff= qmul*level - qadd;
3888                 else        old_coeff= qmul*level + qadd;
3889                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3890             }else{
3891                 old_coeff=0;
3892                 run2--;
3893                 assert(run2>=0 || i >= last_non_zero );
3894             }
3895
3896             for(change=-1; change<=1; change+=2){
3897                 int new_level= level + change;
3898                 int score, new_coeff, unquant_change;
3899
3900                 score=0;
3901                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3902                    continue;
3903
3904                 if(new_level){
3905                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3906                     else            new_coeff= qmul*new_level + qadd;
3907                     if(new_coeff >= 2048 || new_coeff <= -2048)
3908                         continue;
3909                     //FIXME check for overflow
3910
3911                     if(level){
3912                         if(level < 63 && level > -63){
3913                             if(i < last_non_zero)
3914                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3915                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3916                             else
3917                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3918                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3919                         }
3920                     }else{
3921                         assert(FFABS(new_level)==1);
3922
3923                         if(analyze_gradient){
3924                             int g= d1[ scantable[i] ];
3925                             if(g && (g^new_level) >= 0)
3926                                 continue;
3927                         }
3928
3929                         if(i < last_non_zero){
3930                             int next_i= i + run2 + 1;
3931                             int next_level= block[ perm_scantable[next_i] ] + 64;
3932
3933                             if(next_level&(~127))
3934                                 next_level= 0;
3935
3936                             if(next_i < last_non_zero)
3937                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3938                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3939                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3940                             else
3941                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3942                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3943                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3944                         }else{
3945                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3946                             if(prev_level){
3947                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3948                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3949                             }
3950                         }
3951                     }
3952                 }else{
3953                     new_coeff=0;
3954                     assert(FFABS(level)==1);
3955
3956                     if(i < last_non_zero){
3957                         int next_i= i + run2 + 1;
3958                         int next_level= block[ perm_scantable[next_i] ] + 64;
3959
3960                         if(next_level&(~127))
3961                             next_level= 0;
3962
3963                         if(next_i < last_non_zero)
3964                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3965                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3966                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3967                         else
3968                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3969                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3970                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3971                     }else{
3972                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3973                         if(prev_level){
3974                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3975                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3976                         }
3977                     }
3978                 }
3979
3980                 score *= lambda;
3981
3982                 unquant_change= new_coeff - old_coeff;
3983                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3984
3985                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3986                 if(score<best_score){
3987                     best_score= score;
3988                     best_coeff= i;
3989                     best_change= change;
3990                     best_unquant_change= unquant_change;
3991                 }
3992             }
3993             if(level){
3994                 prev_level= level + 64;
3995                 if(prev_level&(~127))
3996                     prev_level= 0;
3997                 prev_run= run;
3998                 run=0;
3999             }else{
4000                 run++;
4001             }
4002         }
4003 #ifdef REFINE_STATS
4004 STOP_TIMER("iterative step")}
4005 #endif
4006
4007         if(best_change){
4008             int j= perm_scantable[ best_coeff ];
4009
4010             block[j] += best_change;
4011
4012             if(best_coeff > last_non_zero){
4013                 last_non_zero= best_coeff;
4014                 assert(block[j]);
4015 #ifdef REFINE_STATS
4016 after_last++;
4017 #endif
4018             }else{
4019 #ifdef REFINE_STATS
4020 if(block[j]){
4021     if(block[j] - best_change){
4022         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4023             raise++;
4024         }else{
4025             lower++;
4026         }
4027     }else{
4028         from_zero++;
4029     }
4030 }else{
4031     to_zero++;
4032 }
4033 #endif
4034                 for(; last_non_zero>=start_i; last_non_zero--){
4035                     if(block[perm_scantable[last_non_zero]])
4036                         break;
4037                 }
4038             }
4039 #ifdef REFINE_STATS
4040 count++;
4041 if(256*256*256*64 % count == 0){
4042     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4043 }
4044 #endif
4045             run=0;
4046             rle_index=0;
4047             for(i=start_i; i<=last_non_zero; i++){
4048                 int j= perm_scantable[i];
4049                 const int level= block[j];
4050
4051                  if(level){
4052                      run_tab[rle_index++]=run;
4053                      run=0;
4054                  }else{
4055                      run++;
4056                  }
4057             }
4058
4059             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4060         }else{
4061             break;
4062         }
4063     }
4064 #ifdef REFINE_STATS
4065 if(last_non_zero>0){
4066 STOP_TIMER("iterative search")
4067 }
4068 }
4069 #endif
4070
4071     return last_non_zero;
4072 }
4073
4074 int ff_dct_quantize_c(MpegEncContext *s,
4075                         DCTELEM *block, int n,
4076                         int qscale, int *overflow)
4077 {
4078     int i, j, level, last_non_zero, q, start_i;
4079     const int *qmat;
4080     const uint8_t *scantable= s->intra_scantable.scantable;
4081     int bias;
4082     int max=0;
4083     unsigned int threshold1, threshold2;
4084
4085     s->dsp.fdct (block);
4086
4087     if(s->dct_error_sum)
4088         s->denoise_dct(s, block);
4089
4090     if (s->mb_intra) {
4091         if (!s->h263_aic) {
4092             if (n < 4)
4093                 q = s->y_dc_scale;
4094             else
4095                 q = s->c_dc_scale;
4096             q = q << 3;
4097         } else
4098             /* For AIC we skip quant/dequant of INTRADC */
4099             q = 1 << 3;
4100
4101         /* note: block[0] is assumed to be positive */
4102         block[0] = (block[0] + (q >> 1)) / q;
4103         start_i = 1;
4104         last_non_zero = 0;
4105         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4106         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4107     } else {
4108         start_i = 0;
4109         last_non_zero = -1;
4110         qmat = s->q_inter_matrix[qscale];
4111         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4112     }
4113     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4114     threshold2= (threshold1<<1);
4115     for(i=63;i>=start_i;i--) {
4116         j = scantable[i];
4117         level = block[j] * qmat[j];
4118
4119         if(((unsigned)(level+threshold1))>threshold2){
4120             last_non_zero = i;
4121             break;
4122         }else{
4123             block[j]=0;
4124         }
4125     }
4126     for(i=start_i; i<=last_non_zero; i++) {
4127         j = scantable[i];
4128         level = block[j] * qmat[j];
4129
4130 //        if(   bias+level >= (1<<QMAT_SHIFT)
4131 //           || bias-level >= (1<<QMAT_SHIFT)){
4132         if(((unsigned)(level+threshold1))>threshold2){
4133             if(level>0){
4134                 level= (bias + level)>>QMAT_SHIFT;
4135                 block[j]= level;
4136             }else{
4137                 level= (bias - level)>>QMAT_SHIFT;
4138                 block[j]= -level;
4139             }
4140             max |=level;
4141         }else{
4142             block[j]=0;
4143         }
4144     }
4145     *overflow= s->max_qcoeff < max; //overflow might have happened
4146
4147     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4148     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4149         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4150
4151     return last_non_zero;
4152 }
4153
4154 #define OFFSET(x) offsetof(MpegEncContext, x)
4155 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4156 static const AVOption h263_options[] = {
4157     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4158     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4159     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { 0 }, 0, INT_MAX, VE },
4160     FF_MPV_COMMON_OPTS
4161     { NULL },
4162 };
4163
4164 static const AVClass h263_class = {
4165     .class_name = "H.263 encoder",
4166     .item_name  = av_default_item_name,
4167     .option     = h263_options,
4168     .version    = LIBAVUTIL_VERSION_INT,
4169 };
4170
4171 AVCodec ff_h263_encoder = {
4172     .name           = "h263",
4173     .type           = AVMEDIA_TYPE_VIDEO,
4174     .id             = CODEC_ID_H263,
4175     .priv_data_size = sizeof(MpegEncContext),
4176     .init           = ff_MPV_encode_init,
4177     .encode2        = ff_MPV_encode_picture,
4178     .close          = ff_MPV_encode_end,
4179     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4180     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4181     .priv_class     = &h263_class,
4182 };
4183
4184 static const AVOption h263p_options[] = {
4185     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4186     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4187     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4188     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4189     FF_MPV_COMMON_OPTS
4190     { NULL },
4191 };
4192 static const AVClass h263p_class = {
4193     .class_name = "H.263p encoder",
4194     .item_name  = av_default_item_name,
4195     .option     = h263p_options,
4196     .version    = LIBAVUTIL_VERSION_INT,
4197 };
4198
4199 AVCodec ff_h263p_encoder = {
4200     .name           = "h263p",
4201     .type           = AVMEDIA_TYPE_VIDEO,
4202     .id             = CODEC_ID_H263P,
4203     .priv_data_size = sizeof(MpegEncContext),
4204     .init           = ff_MPV_encode_init,
4205     .encode2        = ff_MPV_encode_picture,
4206     .close          = ff_MPV_encode_end,
4207     .capabilities = CODEC_CAP_SLICE_THREADS,
4208     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4209     .long_name= NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4210     .priv_class     = &h263p_class,
4211 };
4212
4213 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4214
4215 AVCodec ff_msmpeg4v2_encoder = {
4216     .name           = "msmpeg4v2",
4217     .type           = AVMEDIA_TYPE_VIDEO,
4218     .id             = CODEC_ID_MSMPEG4V2,
4219     .priv_data_size = sizeof(MpegEncContext),
4220     .init           = ff_MPV_encode_init,
4221     .encode2        = ff_MPV_encode_picture,
4222     .close          = ff_MPV_encode_end,
4223     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4224     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4225     .priv_class     = &msmpeg4v2_class,
4226 };
4227
4228 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4229
4230 AVCodec ff_msmpeg4v3_encoder = {
4231     .name           = "msmpeg4",
4232     .type           = AVMEDIA_TYPE_VIDEO,
4233     .id             = CODEC_ID_MSMPEG4V3,
4234     .priv_data_size = sizeof(MpegEncContext),
4235     .init           = ff_MPV_encode_init,
4236     .encode2        = ff_MPV_encode_picture,
4237     .close          = ff_MPV_encode_end,
4238     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4239     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4240     .priv_class     = &msmpeg4v3_class,
4241 };
4242
4243 FF_MPV_GENERIC_CLASS(wmv1)
4244
4245 AVCodec ff_wmv1_encoder = {
4246     .name           = "wmv1",
4247     .type           = AVMEDIA_TYPE_VIDEO,
4248     .id             = CODEC_ID_WMV1,
4249     .priv_data_size = sizeof(MpegEncContext),
4250     .init           = ff_MPV_encode_init,
4251     .encode2        = ff_MPV_encode_picture,
4252     .close          = ff_MPV_encode_end,
4253     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4254     .long_name= NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4255     .priv_class     = &wmv1_class,
4256 };