]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge remote-tracking branch 'qatar/master'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "mpegvideo_common.h"
37 #include "h263.h"
38 #include "mjpegenc.h"
39 #include "msmpeg4.h"
40 #include "faandct.h"
41 #include "thread.h"
42 #include "aandcttab.h"
43 #include "flv.h"
44 #include "mpeg4video.h"
45 #include "internal.h"
46 #include <limits.h>
47 #include "sp5x.h"
48
49 //#undef NDEBUG
50 //#include <assert.h>
51
52 static int encode_picture(MpegEncContext *s, int picture_number);
53 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
54 static int sse_mb(MpegEncContext *s);
55 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
56 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
57
58 /* enable all paranoid tests for rounding, overflows, etc... */
59 //#define PARANOID
60
61 //#define DEBUG
62
63 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
64 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
65
66 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
67                        uint16_t (*qmat16)[2][64],
68                        const uint16_t *quant_matrix,
69                        int bias, int qmin, int qmax, int intra)
70 {
71     int qscale;
72     int shift = 0;
73
74     for (qscale = qmin; qscale <= qmax; qscale++) {
75         int i;
76         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
77             dsp->fdct == ff_jpeg_fdct_islow_10
78 #ifdef FAAN_POSTSCALE
79             || dsp->fdct == ff_faandct
80 #endif
81             ) {
82             for (i = 0; i < 64; i++) {
83                 const int j = dsp->idct_permutation[i];
84                 /* 16 <= qscale * quant_matrix[i] <= 7905
85                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
86                  *             19952 <=              x  <= 249205026
87                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
88                  *           3444240 >= (1 << 36) / (x) >= 275 */
89
90                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
91                                         (qscale * quant_matrix[j]));
92             }
93         } else if (dsp->fdct == fdct_ifast
94 #ifndef FAAN_POSTSCALE
95                    || dsp->fdct == ff_faandct
96 #endif
97                    ) {
98             for (i = 0; i < 64; i++) {
99                 const int j = dsp->idct_permutation[i];
100                 /* 16 <= qscale * quant_matrix[i] <= 7905
101                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
102                  *             19952 <=              x  <= 249205026
103                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
104                  *           3444240 >= (1 << 36) / (x) >= 275 */
105
106                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
107                                         (ff_aanscales[i] * qscale * quant_matrix[j]));
108             }
109         } else {
110             for (i = 0; i < 64; i++) {
111                 const int j = dsp->idct_permutation[i];
112                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
113                  * Assume x = qscale * quant_matrix[i]
114                  * So             16 <=              x  <= 7905
115                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
116                  * so          32768 >= (1 << 19) / (x) >= 67 */
117                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
118                                         (qscale * quant_matrix[j]));
119                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
120                 //                    (qscale * quant_matrix[i]);
121                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
122                                        (qscale * quant_matrix[j]);
123
124                 if (qmat16[qscale][0][i] == 0 ||
125                     qmat16[qscale][0][i] == 128 * 256)
126                     qmat16[qscale][0][i] = 128 * 256 - 1;
127                 qmat16[qscale][1][i] =
128                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
129                                 qmat16[qscale][0][i]);
130             }
131         }
132
133         for (i = intra; i < 64; i++) {
134             int64_t max = 8191;
135             if (dsp->fdct == fdct_ifast
136 #ifndef FAAN_POSTSCALE
137                 || dsp->fdct == ff_faandct
138 #endif
139                ) {
140                 max = (8191LL * ff_aanscales[i]) >> 14;
141             }
142             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
143                 shift++;
144             }
145         }
146     }
147     if (shift) {
148         av_log(NULL, AV_LOG_INFO,
149                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
150                QMAT_SHIFT - shift);
151     }
152 }
153
154 static inline void update_qscale(MpegEncContext *s)
155 {
156     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
157                 (FF_LAMBDA_SHIFT + 7);
158     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
159
160     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
161                  FF_LAMBDA_SHIFT;
162 }
163
164 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
165 {
166     int i;
167
168     if (matrix) {
169         put_bits(pb, 1, 1);
170         for (i = 0; i < 64; i++) {
171             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
172         }
173     } else
174         put_bits(pb, 1, 0);
175 }
176
177 /**
178  * init s->current_picture.qscale_table from s->lambda_table
179  */
180 void ff_init_qscale_tab(MpegEncContext *s)
181 {
182     int8_t * const qscale_table = s->current_picture.f.qscale_table;
183     int i;
184
185     for (i = 0; i < s->mb_num; i++) {
186         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
187         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
188         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
189                                                   s->avctx->qmax);
190     }
191 }
192
193 static void copy_picture_attributes(MpegEncContext *s,
194                                     AVFrame *dst,
195                                     AVFrame *src)
196 {
197     int i;
198
199     dst->pict_type              = src->pict_type;
200     dst->quality                = src->quality;
201     dst->coded_picture_number   = src->coded_picture_number;
202     dst->display_picture_number = src->display_picture_number;
203     //dst->reference              = src->reference;
204     dst->pts                    = src->pts;
205     dst->interlaced_frame       = src->interlaced_frame;
206     dst->top_field_first        = src->top_field_first;
207
208     if (s->avctx->me_threshold) {
209         if (!src->motion_val[0])
210             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
211         if (!src->mb_type)
212             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
213         if (!src->ref_index[0])
214             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
215         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
216             av_log(s->avctx, AV_LOG_ERROR,
217                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
218                    src->motion_subsample_log2, dst->motion_subsample_log2);
219
220         memcpy(dst->mb_type, src->mb_type,
221                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
222
223         for (i = 0; i < 2; i++) {
224             int stride = ((16 * s->mb_width ) >>
225                           src->motion_subsample_log2) + 1;
226             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
227
228             if (src->motion_val[i] &&
229                 src->motion_val[i] != dst->motion_val[i]) {
230                 memcpy(dst->motion_val[i], src->motion_val[i],
231                        2 * stride * height * sizeof(int16_t));
232             }
233             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
234                 memcpy(dst->ref_index[i], src->ref_index[i],
235                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
236             }
237         }
238     }
239 }
240
241 static void update_duplicate_context_after_me(MpegEncContext *dst,
242                                               MpegEncContext *src)
243 {
244 #define COPY(a) dst->a= src->a
245     COPY(pict_type);
246     COPY(current_picture);
247     COPY(f_code);
248     COPY(b_code);
249     COPY(qscale);
250     COPY(lambda);
251     COPY(lambda2);
252     COPY(picture_in_gop_number);
253     COPY(gop_picture_number);
254     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
255     COPY(progressive_frame);    // FIXME don't set in encode_header
256     COPY(partitioned_frame);    // FIXME don't set in encode_header
257 #undef COPY
258 }
259
260 /**
261  * Set the given MpegEncContext to defaults for encoding.
262  * the changed fields will not depend upon the prior state of the MpegEncContext.
263  */
264 static void MPV_encode_defaults(MpegEncContext *s)
265 {
266     int i;
267     MPV_common_defaults(s);
268
269     for (i = -16; i < 16; i++) {
270         default_fcode_tab[i + MAX_MV] = 1;
271     }
272     s->me.mv_penalty = default_mv_penalty;
273     s->fcode_tab     = default_fcode_tab;
274 }
275
276 /* init video encoder */
277 av_cold int MPV_encode_init(AVCodecContext *avctx)
278 {
279     MpegEncContext *s = avctx->priv_data;
280     int i;
281     int chroma_h_shift, chroma_v_shift;
282
283     MPV_encode_defaults(s);
284
285     switch (avctx->codec_id) {
286     case CODEC_ID_MPEG2VIDEO:
287         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
288             avctx->pix_fmt != PIX_FMT_YUV422P) {
289             av_log(avctx, AV_LOG_ERROR,
290                    "only YUV420 and YUV422 are supported\n");
291             return -1;
292         }
293         break;
294     case CODEC_ID_LJPEG:
295         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
296             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
297             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
298             avctx->pix_fmt != PIX_FMT_BGRA     &&
299             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
300               avctx->pix_fmt != PIX_FMT_YUV422P &&
301               avctx->pix_fmt != PIX_FMT_YUV444P) ||
302              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
303             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
304             return -1;
305         }
306         break;
307     case CODEC_ID_MJPEG:
308     case CODEC_ID_AMV:
309         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
310             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
311             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
312               avctx->pix_fmt != PIX_FMT_YUV422P) ||
313              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
314             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
315             return -1;
316         }
317         break;
318     default:
319         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
320             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
321             return -1;
322         }
323     }
324
325     switch (avctx->pix_fmt) {
326     case PIX_FMT_YUVJ422P:
327     case PIX_FMT_YUV422P:
328         s->chroma_format = CHROMA_422;
329         break;
330     case PIX_FMT_YUVJ420P:
331     case PIX_FMT_YUV420P:
332     default:
333         s->chroma_format = CHROMA_420;
334         break;
335     }
336
337     s->bit_rate = avctx->bit_rate;
338     s->width    = avctx->width;
339     s->height   = avctx->height;
340     if (avctx->gop_size > 600 &&
341         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
342         av_log(avctx, AV_LOG_ERROR,
343                "Warning keyframe interval too large! reducing it ...\n");
344         avctx->gop_size = 600;
345     }
346     s->gop_size     = avctx->gop_size;
347     s->avctx        = avctx;
348     s->flags        = avctx->flags;
349     s->flags2       = avctx->flags2;
350     s->max_b_frames = avctx->max_b_frames;
351     s->codec_id     = avctx->codec->id;
352     s->luma_elim_threshold   = avctx->luma_elim_threshold;
353     s->chroma_elim_threshold = avctx->chroma_elim_threshold;
354     s->strict_std_compliance = avctx->strict_std_compliance;
355 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
356     if (avctx->flags & CODEC_FLAG_PART)
357         s->data_partitioning = 1;
358 #endif
359     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
360     s->mpeg_quant         = avctx->mpeg_quant;
361     s->rtp_mode           = !!avctx->rtp_payload_size;
362     s->intra_dc_precision = avctx->intra_dc_precision;
363     s->user_specified_pts = AV_NOPTS_VALUE;
364
365     if (s->gop_size <= 1) {
366         s->intra_only = 1;
367         s->gop_size   = 12;
368     } else {
369         s->intra_only = 0;
370     }
371
372     s->me_method = avctx->me_method;
373
374     /* Fixed QSCALE */
375     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
376
377     s->adaptive_quant = (s->avctx->lumi_masking ||
378                          s->avctx->dark_masking ||
379                          s->avctx->temporal_cplx_masking ||
380                          s->avctx->spatial_cplx_masking  ||
381                          s->avctx->p_masking      ||
382                          s->avctx->border_masking ||
383                          (s->flags & CODEC_FLAG_QP_RD)) &&
384                         !s->fixed_qscale;
385
386     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
387 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
388     s->alternate_scan   = !!(s->flags  & CODEC_FLAG_ALT_SCAN);
389     s->intra_vlc_format = !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
390     s->q_scale_type     = !!(s->flags2 & CODEC_FLAG2_NON_LINEAR_QUANT);
391     s->obmc             = !!(s->flags  & CODEC_FLAG_OBMC);
392 #endif
393
394     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
395         av_log(avctx, AV_LOG_ERROR,
396                "a vbv buffer size is needed, "
397                "for encoding with a maximum bitrate\n");
398         return -1;
399     }
400
401     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
402         av_log(avctx, AV_LOG_INFO,
403                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
404     }
405
406     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
407         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
408         return -1;
409     }
410
411     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
412         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
413         return -1;
414     }
415
416     if (avctx->rc_max_rate &&
417         avctx->rc_max_rate == avctx->bit_rate &&
418         avctx->rc_max_rate != avctx->rc_min_rate) {
419         av_log(avctx, AV_LOG_INFO,
420                "impossible bitrate constraints, this will fail\n");
421     }
422
423     if (avctx->rc_buffer_size &&
424         avctx->bit_rate * (int64_t)avctx->time_base.num >
425             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
426         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
427         return -1;
428     }
429
430     if (!s->fixed_qscale &&
431         avctx->bit_rate * av_q2d(avctx->time_base) >
432             avctx->bit_rate_tolerance) {
433         av_log(avctx, AV_LOG_ERROR,
434                "bitrate tolerance too small for bitrate\n");
435         return -1;
436     }
437
438     if (s->avctx->rc_max_rate &&
439         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
440         (s->codec_id == CODEC_ID_MPEG1VIDEO ||
441          s->codec_id == CODEC_ID_MPEG2VIDEO) &&
442         90000LL * (avctx->rc_buffer_size - 1) >
443             s->avctx->rc_max_rate * 0xFFFFLL) {
444         av_log(avctx, AV_LOG_INFO,
445                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
446                "specified vbv buffer is too large for the given bitrate!\n");
447     }
448
449     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != CODEC_ID_MPEG4 &&
450         s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P &&
451         s->codec_id != CODEC_ID_FLV1) {
452         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
453         return -1;
454     }
455
456     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
457         av_log(avctx, AV_LOG_ERROR,
458                "OBMC is only supported with simple mb decision\n");
459         return -1;
460     }
461
462 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
463     if (s->obmc && s->codec_id != CODEC_ID_H263 &&
464         s->codec_id != CODEC_ID_H263P) {
465         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
466         return -1;
467     }
468 #endif
469
470     if (s->quarter_sample && s->codec_id != CODEC_ID_MPEG4) {
471         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
472         return -1;
473     }
474
475 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
476     if (s->data_partitioning && s->codec_id != CODEC_ID_MPEG4) {
477         av_log(avctx, AV_LOG_ERROR,
478                "data partitioning not supported by codec\n");
479         return -1;
480     }
481 #endif
482
483     if (s->max_b_frames                    &&
484         s->codec_id != CODEC_ID_MPEG4      &&
485         s->codec_id != CODEC_ID_MPEG1VIDEO &&
486         s->codec_id != CODEC_ID_MPEG2VIDEO) {
487         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
488         return -1;
489     }
490
491     if ((s->codec_id == CODEC_ID_MPEG4 ||
492          s->codec_id == CODEC_ID_H263  ||
493          s->codec_id == CODEC_ID_H263P) &&
494         (avctx->sample_aspect_ratio.num > 255 ||
495          avctx->sample_aspect_ratio.den > 255)) {
496         av_log(avctx, AV_LOG_WARNING,
497                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
498                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
499         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
500                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
501     }
502
503     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME |
504                      CODEC_FLAG_ALT_SCAN)) &&
505         s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO) {
506         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
507         return -1;
508     }
509
510     // FIXME mpeg2 uses that too
511     if (s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4) {
512         av_log(avctx, AV_LOG_ERROR,
513                "mpeg2 style quantization not supported by codec\n");
514         return -1;
515     }
516
517     if ((s->flags & CODEC_FLAG_CBP_RD) && !avctx->trellis) {
518         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
519         return -1;
520     }
521
522     if ((s->flags & CODEC_FLAG_QP_RD) &&
523         s->avctx->mb_decision != FF_MB_DECISION_RD) {
524         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
525         return -1;
526     }
527
528     if (s->avctx->scenechange_threshold < 1000000000 &&
529         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
530         av_log(avctx, AV_LOG_ERROR,
531                "closed gop with scene change detection are not supported yet, "
532                "set threshold to 1000000000\n");
533         return -1;
534     }
535
536     if ((s->flags2 & CODEC_FLAG2_INTRA_VLC) &&
537         s->codec_id != CODEC_ID_MPEG2VIDEO) {
538         av_log(avctx, AV_LOG_ERROR,
539                "intra vlc table not supported by codec\n");
540         return -1;
541     }
542
543     if (s->flags & CODEC_FLAG_LOW_DELAY) {
544         if (s->codec_id != CODEC_ID_MPEG2VIDEO) {
545             av_log(avctx, AV_LOG_ERROR,
546                   "low delay forcing is only available for mpeg2\n");
547             return -1;
548         }
549         if (s->max_b_frames != 0) {
550             av_log(avctx, AV_LOG_ERROR,
551                    "b frames cannot be used with low delay\n");
552             return -1;
553         }
554     }
555
556     if (s->q_scale_type == 1) {
557 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
558         if (s->codec_id != CODEC_ID_MPEG2VIDEO) {
559             av_log(avctx, AV_LOG_ERROR,
560                    "non linear quant is only available for mpeg2\n");
561             return -1;
562         }
563 #endif
564         if (avctx->qmax > 12) {
565             av_log(avctx, AV_LOG_ERROR,
566                    "non linear quant only supports qmax <= 12 currently\n");
567             return -1;
568         }
569     }
570
571     if (s->avctx->thread_count > 1         &&
572         s->codec_id != CODEC_ID_MPEG4      &&
573         s->codec_id != CODEC_ID_MPEG1VIDEO &&
574         s->codec_id != CODEC_ID_MPEG2VIDEO &&
575         (s->codec_id != CODEC_ID_H263P ||
576          !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))) {
577         av_log(avctx, AV_LOG_ERROR,
578                "multi threaded encoding not supported by codec\n");
579         return -1;
580     }
581
582     if (s->avctx->thread_count < 1) {
583         av_log(avctx, AV_LOG_ERROR,
584                "automatic thread number detection not supported by codec, "
585                "patch welcome\n");
586         return -1;
587     }
588
589     if (s->avctx->thread_count > 1)
590         s->rtp_mode = 1;
591
592     if (!avctx->time_base.den || !avctx->time_base.num) {
593         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
594         return -1;
595     }
596
597     i = (INT_MAX / 2 + 128) >> 8;
598     if (avctx->me_threshold >= i) {
599         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
600                i - 1);
601         return -1;
602     }
603     if (avctx->mb_threshold >= i) {
604         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
605                i - 1);
606         return -1;
607     }
608
609     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
610         av_log(avctx, AV_LOG_INFO,
611                "notice: b_frame_strategy only affects the first pass\n");
612         avctx->b_frame_strategy = 0;
613     }
614
615     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
616     if (i > 1) {
617         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
618         avctx->time_base.den /= i;
619         avctx->time_base.num /= i;
620         //return -1;
621     }
622
623     if (s->mpeg_quant || s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || s->codec_id == CODEC_ID_MJPEG || s->codec_id==CODEC_ID_AMV) {
624         // (a + x * 3 / 8) / x
625         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
626         s->inter_quant_bias = 0;
627     } else {
628         s->intra_quant_bias = 0;
629         // (a - x / 4) / x
630         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
631     }
632
633     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
634         s->intra_quant_bias = avctx->intra_quant_bias;
635     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
636         s->inter_quant_bias = avctx->inter_quant_bias;
637
638     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
639
640     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
641                                   &chroma_v_shift);
642
643     if (avctx->codec_id == CODEC_ID_MPEG4 &&
644         s->avctx->time_base.den > (1 << 16) - 1) {
645         av_log(avctx, AV_LOG_ERROR,
646                "timebase %d/%d not supported by MPEG 4 standard, "
647                "the maximum admitted value for the timebase denominator "
648                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
649                (1 << 16) - 1);
650         return -1;
651     }
652     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
653
654     switch (avctx->codec->id) {
655     case CODEC_ID_MPEG1VIDEO:
656         s->out_format = FMT_MPEG1;
657         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
658         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
659         break;
660     case CODEC_ID_MPEG2VIDEO:
661         s->out_format = FMT_MPEG1;
662         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
663         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
664         s->rtp_mode   = 1;
665         break;
666     case CODEC_ID_LJPEG:
667     case CODEC_ID_MJPEG:
668     case CODEC_ID_AMV:
669         s->out_format = FMT_MJPEG;
670         s->intra_only = 1; /* force intra only for jpeg */
671         if (avctx->codec->id == CODEC_ID_LJPEG && avctx->pix_fmt   == PIX_FMT_BGRA) {
672             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
673             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
674             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
675         } else {
676             s->mjpeg_vsample[0] = 2;
677             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
678             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
679             s->mjpeg_hsample[0] = 2;
680             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
681             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
682         }
683         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
684             ff_mjpeg_encode_init(s) < 0)
685             return -1;
686         avctx->delay = 0;
687         s->low_delay = 1;
688         break;
689     case CODEC_ID_H261:
690         if (!CONFIG_H261_ENCODER)
691             return -1;
692         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
693             av_log(avctx, AV_LOG_ERROR,
694                    "The specified picture size of %dx%d is not valid for the "
695                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
696                     s->width, s->height);
697             return -1;
698         }
699         s->out_format = FMT_H261;
700         avctx->delay  = 0;
701         s->low_delay  = 1;
702         break;
703     case CODEC_ID_H263:
704         if (!CONFIG_H263_ENCODER)
705             return -1;
706         if (ff_match_2uint16(h263_format, FF_ARRAY_ELEMS(h263_format),
707                              s->width, s->height) == 8) {
708             av_log(avctx, AV_LOG_ERROR,
709                    "The specified picture size of %dx%d is not valid for "
710                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
711                    "352x288, 704x576, and 1408x1152. "
712                    "Try H.263+.\n", s->width, s->height);
713             return -1;
714         }
715         s->out_format = FMT_H263;
716         avctx->delay  = 0;
717         s->low_delay  = 1;
718         break;
719     case CODEC_ID_H263P:
720         s->out_format = FMT_H263;
721         s->h263_plus  = 1;
722         /* Fx */
723 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
724         if (avctx->flags & CODEC_FLAG_H263P_UMV)
725             s->umvplus = 1;
726         if (avctx->flags & CODEC_FLAG_H263P_AIV)
727             s->alt_inter_vlc = 1;
728         if (avctx->flags & CODEC_FLAG_H263P_SLICE_STRUCT)
729             s->h263_slice_structured = 1;
730 #endif
731         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
732         s->modified_quant  = s->h263_aic;
733         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
734         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
735
736         /* /Fx */
737         /* These are just to be sure */
738         avctx->delay = 0;
739         s->low_delay = 1;
740         break;
741     case CODEC_ID_FLV1:
742         s->out_format      = FMT_H263;
743         s->h263_flv        = 2; /* format = 1; 11-bit codes */
744         s->unrestricted_mv = 1;
745         s->rtp_mode  = 0; /* don't allow GOB */
746         avctx->delay = 0;
747         s->low_delay = 1;
748         break;
749     case CODEC_ID_RV10:
750         s->out_format = FMT_H263;
751         avctx->delay  = 0;
752         s->low_delay  = 1;
753         break;
754     case CODEC_ID_RV20:
755         s->out_format      = FMT_H263;
756         avctx->delay       = 0;
757         s->low_delay       = 1;
758         s->modified_quant  = 1;
759         s->h263_aic        = 1;
760         s->h263_plus       = 1;
761         s->loop_filter     = 1;
762         s->unrestricted_mv = 0;
763         break;
764     case CODEC_ID_MPEG4:
765         s->out_format      = FMT_H263;
766         s->h263_pred       = 1;
767         s->unrestricted_mv = 1;
768         s->low_delay       = s->max_b_frames ? 0 : 1;
769         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
770         break;
771     case CODEC_ID_MSMPEG4V2:
772         s->out_format      = FMT_H263;
773         s->h263_pred       = 1;
774         s->unrestricted_mv = 1;
775         s->msmpeg4_version = 2;
776         avctx->delay       = 0;
777         s->low_delay       = 1;
778         break;
779     case CODEC_ID_MSMPEG4V3:
780         s->out_format        = FMT_H263;
781         s->h263_pred         = 1;
782         s->unrestricted_mv   = 1;
783         s->msmpeg4_version   = 3;
784         s->flipflop_rounding = 1;
785         avctx->delay         = 0;
786         s->low_delay         = 1;
787         break;
788     case CODEC_ID_WMV1:
789         s->out_format        = FMT_H263;
790         s->h263_pred         = 1;
791         s->unrestricted_mv   = 1;
792         s->msmpeg4_version   = 4;
793         s->flipflop_rounding = 1;
794         avctx->delay         = 0;
795         s->low_delay         = 1;
796         break;
797     case CODEC_ID_WMV2:
798         s->out_format        = FMT_H263;
799         s->h263_pred         = 1;
800         s->unrestricted_mv   = 1;
801         s->msmpeg4_version   = 5;
802         s->flipflop_rounding = 1;
803         avctx->delay         = 0;
804         s->low_delay         = 1;
805         break;
806     default:
807         return -1;
808     }
809
810     avctx->has_b_frames = !s->low_delay;
811
812     s->encoding = 1;
813
814     s->progressive_frame    =
815     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
816                                                 CODEC_FLAG_INTERLACED_ME  |
817                                                 CODEC_FLAG_ALT_SCAN));
818
819     /* init */
820     if (MPV_common_init(s) < 0)
821         return -1;
822
823     if (!s->dct_quantize)
824         s->dct_quantize = dct_quantize_c;
825     if (!s->denoise_dct)
826         s->denoise_dct  = denoise_dct_c;
827     s->fast_dct_quantize = s->dct_quantize;
828     if (avctx->trellis)
829         s->dct_quantize  = dct_quantize_trellis_c;
830
831     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
832         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
833
834     s->quant_precision = 5;
835
836     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
837     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
838
839     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
840         ff_h261_encode_init(s);
841     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
842         h263_encode_init(s);
843     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
844         ff_msmpeg4_encode_init(s);
845     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
846         && s->out_format == FMT_MPEG1)
847         ff_mpeg1_encode_init(s);
848
849     /* init q matrix */
850     for (i = 0; i < 64; i++) {
851         int j = s->dsp.idct_permutation[i];
852         if (CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4 &&
853             s->mpeg_quant) {
854             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
855             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
856         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
857             s->intra_matrix[j] =
858             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
859         } else {
860             /* mpeg1/2 */
861             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
862             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
863         }
864         if (s->avctx->intra_matrix)
865             s->intra_matrix[j] = s->avctx->intra_matrix[i];
866         if (s->avctx->inter_matrix)
867             s->inter_matrix[j] = s->avctx->inter_matrix[i];
868     }
869
870     /* precompute matrix */
871     /* for mjpeg, we do include qscale in the matrix */
872     if (s->out_format != FMT_MJPEG) {
873         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
874                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
875                           31, 1);
876         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
877                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
878                           31, 0);
879     }
880
881     if (ff_rate_control_init(s) < 0)
882         return -1;
883
884     return 0;
885 }
886
887 av_cold int MPV_encode_end(AVCodecContext *avctx)
888 {
889     MpegEncContext *s = avctx->priv_data;
890
891     ff_rate_control_uninit(s);
892
893     MPV_common_end(s);
894     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
895         s->out_format == FMT_MJPEG)
896         ff_mjpeg_encode_close(s);
897
898     av_freep(&avctx->extradata);
899
900     return 0;
901 }
902
903 static int get_sae(uint8_t *src, int ref, int stride)
904 {
905     int x,y;
906     int acc = 0;
907
908     for (y = 0; y < 16; y++) {
909         for (x = 0; x < 16; x++) {
910             acc += FFABS(src[x + y * stride] - ref);
911         }
912     }
913
914     return acc;
915 }
916
917 static int get_intra_count(MpegEncContext *s, uint8_t *src,
918                            uint8_t *ref, int stride)
919 {
920     int x, y, w, h;
921     int acc = 0;
922
923     w = s->width  & ~15;
924     h = s->height & ~15;
925
926     for (y = 0; y < h; y += 16) {
927         for (x = 0; x < w; x += 16) {
928             int offset = x + y * stride;
929             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
930                                      16);
931             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
932             int sae  = get_sae(src + offset, mean, stride);
933
934             acc += sae + 500 < sad;
935         }
936     }
937     return acc;
938 }
939
940
941 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
942 {
943     AVFrame *pic = NULL;
944     int64_t pts;
945     int i;
946     const int encoding_delay = s->max_b_frames;
947     int direct = 1;
948
949     if (pic_arg) {
950         pts = pic_arg->pts;
951         pic_arg->display_picture_number = s->input_picture_number++;
952
953         if (pts != AV_NOPTS_VALUE) {
954             if (s->user_specified_pts != AV_NOPTS_VALUE) {
955                 int64_t time = pts;
956                 int64_t last = s->user_specified_pts;
957
958                 if (time <= last) {
959                     av_log(s->avctx, AV_LOG_ERROR,
960                            "Error, Invalid timestamp=%"PRId64", "
961                            "last=%"PRId64"\n", pts, s->user_specified_pts);
962                     return -1;
963                 }
964             }
965             s->user_specified_pts = pts;
966         } else {
967             if (s->user_specified_pts != AV_NOPTS_VALUE) {
968                 s->user_specified_pts =
969                 pts = s->user_specified_pts + 1;
970                 av_log(s->avctx, AV_LOG_INFO,
971                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
972                        pts);
973             } else {
974                 pts = pic_arg->display_picture_number;
975             }
976         }
977     }
978
979   if (pic_arg) {
980     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
981         direct = 0;
982     if (pic_arg->linesize[0] != s->linesize)
983         direct = 0;
984     if (pic_arg->linesize[1] != s->uvlinesize)
985         direct = 0;
986     if (pic_arg->linesize[2] != s->uvlinesize)
987         direct = 0;
988
989     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
990     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
991
992     if (direct) {
993         i = ff_find_unused_picture(s, 1);
994         if (i < 0)
995             return i;
996
997         pic = (AVFrame *) &s->picture[i];
998         pic->reference = 3;
999
1000         for (i = 0; i < 4; i++) {
1001             pic->data[i]     = pic_arg->data[i];
1002             pic->linesize[i] = pic_arg->linesize[i];
1003         }
1004         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
1005             return -1;
1006         }
1007     } else {
1008         i = ff_find_unused_picture(s, 0);
1009         if (i < 0)
1010             return i;
1011
1012         pic = (AVFrame *) &s->picture[i];
1013         pic->reference = 3;
1014
1015         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
1016             return -1;
1017         }
1018
1019         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1020             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1021             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1022             // empty
1023         } else {
1024             int h_chroma_shift, v_chroma_shift;
1025             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
1026                                           &v_chroma_shift);
1027
1028             for (i = 0; i < 3; i++) {
1029                 int src_stride = pic_arg->linesize[i];
1030                 int dst_stride = i ? s->uvlinesize : s->linesize;
1031                 int h_shift = i ? h_chroma_shift : 0;
1032                 int v_shift = i ? v_chroma_shift : 0;
1033                 int w = s->width  >> h_shift;
1034                 int h = s->height >> v_shift;
1035                 uint8_t *src = pic_arg->data[i];
1036                 uint8_t *dst = pic->data[i];
1037
1038                 if(s->codec_id == CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)){
1039                     h= ((s->height+15)/16*16)>>v_shift;
1040                 }
1041
1042                 if (!s->avctx->rc_buffer_size)
1043                     dst += INPLACE_OFFSET;
1044
1045                 if (src_stride == dst_stride)
1046                     memcpy(dst, src, src_stride * h);
1047                 else {
1048                     while (h--) {
1049                         memcpy(dst, src, w);
1050                         dst += dst_stride;
1051                         src += src_stride;
1052                     }
1053                 }
1054             }
1055         }
1056     }
1057     copy_picture_attributes(s, pic, pic_arg);
1058     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1059   }
1060
1061     /* shift buffer entries */
1062     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1063         s->input_picture[i - 1] = s->input_picture[i];
1064
1065     s->input_picture[encoding_delay] = (Picture*) pic;
1066
1067     return 0;
1068 }
1069
1070 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1071 {
1072     int x, y, plane;
1073     int score = 0;
1074     int64_t score64 = 0;
1075
1076     for (plane = 0; plane < 3; plane++) {
1077         const int stride = p->f.linesize[plane];
1078         const int bw = plane ? 1 : 2;
1079         for (y = 0; y < s->mb_height * bw; y++) {
1080             for (x = 0; x < s->mb_width * bw; x++) {
1081                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1082                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1083                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1084                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1085
1086                 switch (s->avctx->frame_skip_exp) {
1087                 case 0: score    =  FFMAX(score, v);          break;
1088                 case 1: score   += FFABS(v);                  break;
1089                 case 2: score   += v * v;                     break;
1090                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1091                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1092                 }
1093             }
1094         }
1095     }
1096
1097     if (score)
1098         score64 = score;
1099
1100     if (score64 < s->avctx->frame_skip_threshold)
1101         return 1;
1102     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1103         return 1;
1104     return 0;
1105 }
1106
1107 static int estimate_best_b_count(MpegEncContext *s)
1108 {
1109     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1110     AVCodecContext *c = avcodec_alloc_context3(NULL);
1111     AVFrame input[FF_MAX_B_FRAMES + 2];
1112     const int scale = s->avctx->brd_scale;
1113     int i, j, out_size, p_lambda, b_lambda, lambda2;
1114     int outbuf_size  = s->width * s->height; // FIXME
1115     uint8_t *outbuf  = av_malloc(outbuf_size);
1116     int64_t best_rd  = INT64_MAX;
1117     int best_b_count = -1;
1118
1119     assert(scale >= 0 && scale <= 3);
1120
1121     //emms_c();
1122     //s->next_picture_ptr->quality;
1123     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1124     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1125     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1126     if (!b_lambda) // FIXME we should do this somewhere else
1127         b_lambda = p_lambda;
1128     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1129                FF_LAMBDA_SHIFT;
1130
1131     c->width        = s->width  >> scale;
1132     c->height       = s->height >> scale;
1133     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1134                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1135     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1136     c->mb_decision  = s->avctx->mb_decision;
1137     c->me_cmp       = s->avctx->me_cmp;
1138     c->mb_cmp       = s->avctx->mb_cmp;
1139     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1140     c->pix_fmt      = PIX_FMT_YUV420P;
1141     c->time_base    = s->avctx->time_base;
1142     c->max_b_frames = s->max_b_frames;
1143
1144     if (avcodec_open2(c, codec, NULL) < 0)
1145         return -1;
1146
1147     for (i = 0; i < s->max_b_frames + 2; i++) {
1148         int ysize = c->width * c->height;
1149         int csize = (c->width / 2) * (c->height / 2);
1150         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1151                                                 s->next_picture_ptr;
1152
1153         avcodec_get_frame_defaults(&input[i]);
1154         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1155         input[i].data[1]     = input[i].data[0] + ysize;
1156         input[i].data[2]     = input[i].data[1] + csize;
1157         input[i].linesize[0] = c->width;
1158         input[i].linesize[1] =
1159         input[i].linesize[2] = c->width / 2;
1160
1161         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1162             pre_input = *pre_input_ptr;
1163
1164             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1165                 pre_input.f.data[0] += INPLACE_OFFSET;
1166                 pre_input.f.data[1] += INPLACE_OFFSET;
1167                 pre_input.f.data[2] += INPLACE_OFFSET;
1168             }
1169
1170             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1171                                  pre_input.f.data[0], pre_input.f.linesize[0],
1172                                  c->width,      c->height);
1173             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1174                                  pre_input.f.data[1], pre_input.f.linesize[1],
1175                                  c->width >> 1, c->height >> 1);
1176             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1177                                  pre_input.f.data[2], pre_input.f.linesize[2],
1178                                  c->width >> 1, c->height >> 1);
1179         }
1180     }
1181
1182     for (j = 0; j < s->max_b_frames + 1; j++) {
1183         int64_t rd = 0;
1184
1185         if (!s->input_picture[j])
1186             break;
1187
1188         c->error[0] = c->error[1] = c->error[2] = 0;
1189
1190         input[0].pict_type = AV_PICTURE_TYPE_I;
1191         input[0].quality   = 1 * FF_QP2LAMBDA;
1192         out_size           = avcodec_encode_video(c, outbuf,
1193                                                   outbuf_size, &input[0]);
1194         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1195
1196         for (i = 0; i < s->max_b_frames + 1; i++) {
1197             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1198
1199             input[i + 1].pict_type = is_p ?
1200                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1201             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1202             out_size = avcodec_encode_video(c, outbuf, outbuf_size,
1203                                             &input[i + 1]);
1204             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1205         }
1206
1207         /* get the delayed frames */
1208         while (out_size) {
1209             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
1210             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1211         }
1212
1213         rd += c->error[0] + c->error[1] + c->error[2];
1214
1215         if (rd < best_rd) {
1216             best_rd = rd;
1217             best_b_count = j;
1218         }
1219     }
1220
1221     av_freep(&outbuf);
1222     avcodec_close(c);
1223     av_freep(&c);
1224
1225     for (i = 0; i < s->max_b_frames + 2; i++) {
1226         av_freep(&input[i].data[0]);
1227     }
1228
1229     return best_b_count;
1230 }
1231
1232 static int select_input_picture(MpegEncContext *s)
1233 {
1234     int i;
1235
1236     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1237         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1238     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1239
1240     /* set next picture type & ordering */
1241     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1242         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1243             s->next_picture_ptr == NULL || s->intra_only) {
1244             s->reordered_input_picture[0] = s->input_picture[0];
1245             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1246             s->reordered_input_picture[0]->f.coded_picture_number =
1247                 s->coded_picture_number++;
1248         } else {
1249             int b_frames;
1250
1251             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1252                 if (s->picture_in_gop_number < s->gop_size &&
1253                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1254                     // FIXME check that te gop check above is +-1 correct
1255                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1256                     //       s->input_picture[0]->f.data[0],
1257                     //       s->input_picture[0]->pts);
1258
1259                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1260                         for (i = 0; i < 4; i++)
1261                             s->input_picture[0]->f.data[i] = NULL;
1262                         s->input_picture[0]->f.type = 0;
1263                     } else {
1264                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1265                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1266
1267                         s->avctx->release_buffer(s->avctx,
1268                                                  (AVFrame *) s->input_picture[0]);
1269                     }
1270
1271                     emms_c();
1272                     ff_vbv_update(s, 0);
1273
1274                     goto no_output_pic;
1275                 }
1276             }
1277
1278             if (s->flags & CODEC_FLAG_PASS2) {
1279                 for (i = 0; i < s->max_b_frames + 1; i++) {
1280                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1281
1282                     if (pict_num >= s->rc_context.num_entries)
1283                         break;
1284                     if (!s->input_picture[i]) {
1285                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1286                         break;
1287                     }
1288
1289                     s->input_picture[i]->f.pict_type =
1290                         s->rc_context.entry[pict_num].new_pict_type;
1291                 }
1292             }
1293
1294             if (s->avctx->b_frame_strategy == 0) {
1295                 b_frames = s->max_b_frames;
1296                 while (b_frames && !s->input_picture[b_frames])
1297                     b_frames--;
1298             } else if (s->avctx->b_frame_strategy == 1) {
1299                 for (i = 1; i < s->max_b_frames + 1; i++) {
1300                     if (s->input_picture[i] &&
1301                         s->input_picture[i]->b_frame_score == 0) {
1302                         s->input_picture[i]->b_frame_score =
1303                             get_intra_count(s,
1304                                             s->input_picture[i    ]->f.data[0],
1305                                             s->input_picture[i - 1]->f.data[0],
1306                                             s->linesize) + 1;
1307                     }
1308                 }
1309                 for (i = 0; i < s->max_b_frames + 1; i++) {
1310                     if (s->input_picture[i] == NULL ||
1311                         s->input_picture[i]->b_frame_score - 1 >
1312                             s->mb_num / s->avctx->b_sensitivity)
1313                         break;
1314                 }
1315
1316                 b_frames = FFMAX(0, i - 1);
1317
1318                 /* reset scores */
1319                 for (i = 0; i < b_frames + 1; i++) {
1320                     s->input_picture[i]->b_frame_score = 0;
1321                 }
1322             } else if (s->avctx->b_frame_strategy == 2) {
1323                 b_frames = estimate_best_b_count(s);
1324             } else {
1325                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1326                 b_frames = 0;
1327             }
1328
1329             emms_c();
1330             //static int b_count = 0;
1331             //b_count += b_frames;
1332             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1333
1334             for (i = b_frames - 1; i >= 0; i--) {
1335                 int type = s->input_picture[i]->f.pict_type;
1336                 if (type && type != AV_PICTURE_TYPE_B)
1337                     b_frames = i;
1338             }
1339             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1340                 b_frames == s->max_b_frames) {
1341                 av_log(s->avctx, AV_LOG_ERROR,
1342                        "warning, too many b frames in a row\n");
1343             }
1344
1345             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1346                 if ((s->flags2 & CODEC_FLAG2_STRICT_GOP) &&
1347                     s->gop_size > s->picture_in_gop_number) {
1348                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1349                 } else {
1350                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1351                         b_frames = 0;
1352                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1353                 }
1354             }
1355
1356             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1357                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1358                 b_frames--;
1359
1360             s->reordered_input_picture[0] = s->input_picture[b_frames];
1361             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1362                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1363             s->reordered_input_picture[0]->f.coded_picture_number =
1364                 s->coded_picture_number++;
1365             for (i = 0; i < b_frames; i++) {
1366                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1367                 s->reordered_input_picture[i + 1]->f.pict_type =
1368                     AV_PICTURE_TYPE_B;
1369                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1370                     s->coded_picture_number++;
1371             }
1372         }
1373     }
1374 no_output_pic:
1375     if (s->reordered_input_picture[0]) {
1376         s->reordered_input_picture[0]->f.reference =
1377            s->reordered_input_picture[0]->f.pict_type !=
1378                AV_PICTURE_TYPE_B ? 3 : 0;
1379
1380         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1381
1382         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1383             s->avctx->rc_buffer_size) {
1384             // input is a shared pix, so we can't modifiy it -> alloc a new
1385             // one & ensure that the shared one is reuseable
1386
1387             Picture *pic;
1388             int i = ff_find_unused_picture(s, 0);
1389             if (i < 0)
1390                 return i;
1391             pic = &s->picture[i];
1392
1393             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1394             if (ff_alloc_picture(s, pic, 0) < 0) {
1395                 return -1;
1396             }
1397
1398             /* mark us unused / free shared pic */
1399             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1400                 s->avctx->release_buffer(s->avctx,
1401                                          (AVFrame *) s->reordered_input_picture[0]);
1402             for (i = 0; i < 4; i++)
1403                 s->reordered_input_picture[0]->f.data[i] = NULL;
1404             s->reordered_input_picture[0]->f.type = 0;
1405
1406             copy_picture_attributes(s, (AVFrame *) pic,
1407                                     (AVFrame *) s->reordered_input_picture[0]);
1408
1409             s->current_picture_ptr = pic;
1410         } else {
1411             // input is not a shared pix -> reuse buffer for current_pix
1412
1413             assert(s->reordered_input_picture[0]->f.type ==
1414                        FF_BUFFER_TYPE_USER ||
1415                    s->reordered_input_picture[0]->f.type ==
1416                        FF_BUFFER_TYPE_INTERNAL);
1417
1418             s->current_picture_ptr = s->reordered_input_picture[0];
1419             for (i = 0; i < 4; i++) {
1420                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1421             }
1422         }
1423         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1424
1425         s->picture_number = s->new_picture.f.display_picture_number;
1426         //printf("dpn:%d\n", s->picture_number);
1427     } else {
1428         memset(&s->new_picture, 0, sizeof(Picture));
1429     }
1430     return 0;
1431 }
1432
1433 int MPV_encode_picture(AVCodecContext *avctx,
1434                        unsigned char *buf, int buf_size, void *data)
1435 {
1436     MpegEncContext *s = avctx->priv_data;
1437     AVFrame *pic_arg  = data;
1438     int i, stuffing_count;
1439     int context_count = s->slice_context_count;
1440
1441     for (i = 0; i < context_count; i++) {
1442         int start_y = s->thread_context[i]->start_mb_y;
1443         int   end_y = s->thread_context[i]->  end_mb_y;
1444         int h       = s->mb_height;
1445         uint8_t *start = buf + (size_t)(((int64_t) buf_size) * start_y / h);
1446         uint8_t *end   = buf + (size_t)(((int64_t) buf_size) *   end_y / h);
1447
1448         init_put_bits(&s->thread_context[i]->pb, start, end - start);
1449     }
1450
1451     s->picture_in_gop_number++;
1452
1453     if (load_input_picture(s, pic_arg) < 0)
1454         return -1;
1455
1456     if (select_input_picture(s) < 0) {
1457         return -1;
1458     }
1459
1460     /* output? */
1461     if (s->new_picture.f.data[0]) {
1462         s->pict_type = s->new_picture.f.pict_type;
1463         //emms_c();
1464         //printf("qs:%f %f %d\n", s->new_picture.quality,
1465         //       s->current_picture.quality, s->qscale);
1466         MPV_frame_start(s, avctx);
1467 vbv_retry:
1468         if (encode_picture(s, s->picture_number) < 0)
1469             return -1;
1470
1471         avctx->header_bits = s->header_bits;
1472         avctx->mv_bits     = s->mv_bits;
1473         avctx->misc_bits   = s->misc_bits;
1474         avctx->i_tex_bits  = s->i_tex_bits;
1475         avctx->p_tex_bits  = s->p_tex_bits;
1476         avctx->i_count     = s->i_count;
1477         // FIXME f/b_count in avctx
1478         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1479         avctx->skip_count  = s->skip_count;
1480
1481         MPV_frame_end(s);
1482
1483         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1484             ff_mjpeg_encode_picture_trailer(s);
1485
1486         if (avctx->rc_buffer_size) {
1487             RateControlContext *rcc = &s->rc_context;
1488             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1489
1490             if (put_bits_count(&s->pb) > max_size &&
1491                 s->lambda < s->avctx->lmax) {
1492                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1493                                        (s->qscale + 1) / s->qscale);
1494                 if (s->adaptive_quant) {
1495                     int i;
1496                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1497                         s->lambda_table[i] =
1498                             FFMAX(s->lambda_table[i] + 1,
1499                                   s->lambda_table[i] * (s->qscale + 1) /
1500                                   s->qscale);
1501                 }
1502                 s->mb_skipped = 0;        // done in MPV_frame_start()
1503                 // done in encode_picture() so we must undo it
1504                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1505                     if (s->flipflop_rounding          ||
1506                         s->codec_id == CODEC_ID_H263P ||
1507                         s->codec_id == CODEC_ID_MPEG4)
1508                         s->no_rounding ^= 1;
1509                 }
1510                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1511                     s->time_base       = s->last_time_base;
1512                     s->last_non_b_time = s->time - s->pp_time;
1513                 }
1514                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1515                 for (i = 0; i < context_count; i++) {
1516                     PutBitContext *pb = &s->thread_context[i]->pb;
1517                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1518                 }
1519                 goto vbv_retry;
1520             }
1521
1522             assert(s->avctx->rc_max_rate);
1523         }
1524
1525         if (s->flags & CODEC_FLAG_PASS1)
1526             ff_write_pass1_stats(s);
1527
1528         for (i = 0; i < 4; i++) {
1529             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1530             avctx->error[i] += s->current_picture_ptr->f.error[i];
1531         }
1532
1533         if (s->flags & CODEC_FLAG_PASS1)
1534             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1535                    avctx->i_tex_bits + avctx->p_tex_bits ==
1536                        put_bits_count(&s->pb));
1537         flush_put_bits(&s->pb);
1538         s->frame_bits  = put_bits_count(&s->pb);
1539
1540         stuffing_count = ff_vbv_update(s, s->frame_bits);
1541         if (stuffing_count) {
1542             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1543                     stuffing_count + 50) {
1544                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1545                 return -1;
1546             }
1547
1548             switch (s->codec_id) {
1549             case CODEC_ID_MPEG1VIDEO:
1550             case CODEC_ID_MPEG2VIDEO:
1551                 while (stuffing_count--) {
1552                     put_bits(&s->pb, 8, 0);
1553                 }
1554             break;
1555             case CODEC_ID_MPEG4:
1556                 put_bits(&s->pb, 16, 0);
1557                 put_bits(&s->pb, 16, 0x1C3);
1558                 stuffing_count -= 4;
1559                 while (stuffing_count--) {
1560                     put_bits(&s->pb, 8, 0xFF);
1561                 }
1562             break;
1563             default:
1564                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1565             }
1566             flush_put_bits(&s->pb);
1567             s->frame_bits  = put_bits_count(&s->pb);
1568         }
1569
1570         /* update mpeg1/2 vbv_delay for CBR */
1571         if (s->avctx->rc_max_rate                          &&
1572             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1573             s->out_format == FMT_MPEG1                     &&
1574             90000LL * (avctx->rc_buffer_size - 1) <=
1575                 s->avctx->rc_max_rate * 0xFFFFLL) {
1576             int vbv_delay, min_delay;
1577             double inbits  = s->avctx->rc_max_rate *
1578                              av_q2d(s->avctx->time_base);
1579             int    minbits = s->frame_bits - 8 *
1580                              (s->vbv_delay_ptr - s->pb.buf - 1);
1581             double bits    = s->rc_context.buffer_index + minbits - inbits;
1582
1583             if (bits < 0)
1584                 av_log(s->avctx, AV_LOG_ERROR,
1585                        "Internal error, negative bits\n");
1586
1587             assert(s->repeat_first_field == 0);
1588
1589             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1590             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1591                         s->avctx->rc_max_rate;
1592
1593             vbv_delay = FFMAX(vbv_delay, min_delay);
1594
1595             assert(vbv_delay < 0xFFFF);
1596
1597             s->vbv_delay_ptr[0] &= 0xF8;
1598             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1599             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1600             s->vbv_delay_ptr[2] &= 0x07;
1601             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1602             avctx->vbv_delay     = vbv_delay * 300;
1603         }
1604         s->total_bits     += s->frame_bits;
1605         avctx->frame_bits  = s->frame_bits;
1606     } else {
1607         assert((put_bits_ptr(&s->pb) == s->pb.buf));
1608         s->frame_bits = 0;
1609     }
1610     assert((s->frame_bits & 7) == 0);
1611
1612     return s->frame_bits / 8;
1613 }
1614
1615 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1616                                                 int n, int threshold)
1617 {
1618     static const char tab[64] = {
1619         3, 2, 2, 1, 1, 1, 1, 1,
1620         1, 1, 1, 1, 1, 1, 1, 1,
1621         1, 1, 1, 1, 1, 1, 1, 1,
1622         0, 0, 0, 0, 0, 0, 0, 0,
1623         0, 0, 0, 0, 0, 0, 0, 0,
1624         0, 0, 0, 0, 0, 0, 0, 0,
1625         0, 0, 0, 0, 0, 0, 0, 0,
1626         0, 0, 0, 0, 0, 0, 0, 0
1627     };
1628     int score = 0;
1629     int run = 0;
1630     int i;
1631     DCTELEM *block = s->block[n];
1632     const int last_index = s->block_last_index[n];
1633     int skip_dc;
1634
1635     if (threshold < 0) {
1636         skip_dc = 0;
1637         threshold = -threshold;
1638     } else
1639         skip_dc = 1;
1640
1641     /* Are all we could set to zero already zero? */
1642     if (last_index <= skip_dc - 1)
1643         return;
1644
1645     for (i = 0; i <= last_index; i++) {
1646         const int j = s->intra_scantable.permutated[i];
1647         const int level = FFABS(block[j]);
1648         if (level == 1) {
1649             if (skip_dc && i == 0)
1650                 continue;
1651             score += tab[run];
1652             run = 0;
1653         } else if (level > 1) {
1654             return;
1655         } else {
1656             run++;
1657         }
1658     }
1659     if (score >= threshold)
1660         return;
1661     for (i = skip_dc; i <= last_index; i++) {
1662         const int j = s->intra_scantable.permutated[i];
1663         block[j] = 0;
1664     }
1665     if (block[0])
1666         s->block_last_index[n] = 0;
1667     else
1668         s->block_last_index[n] = -1;
1669 }
1670
1671 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1672                                int last_index)
1673 {
1674     int i;
1675     const int maxlevel = s->max_qcoeff;
1676     const int minlevel = s->min_qcoeff;
1677     int overflow = 0;
1678
1679     if (s->mb_intra) {
1680         i = 1; // skip clipping of intra dc
1681     } else
1682         i = 0;
1683
1684     for (; i <= last_index; i++) {
1685         const int j = s->intra_scantable.permutated[i];
1686         int level = block[j];
1687
1688         if (level > maxlevel) {
1689             level = maxlevel;
1690             overflow++;
1691         } else if (level < minlevel) {
1692             level = minlevel;
1693             overflow++;
1694         }
1695
1696         block[j] = level;
1697     }
1698
1699     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1700         av_log(s->avctx, AV_LOG_INFO,
1701                "warning, clipping %d dct coefficients to %d..%d\n",
1702                overflow, minlevel, maxlevel);
1703 }
1704
1705 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1706 {
1707     int x, y;
1708     // FIXME optimize
1709     for (y = 0; y < 8; y++) {
1710         for (x = 0; x < 8; x++) {
1711             int x2, y2;
1712             int sum = 0;
1713             int sqr = 0;
1714             int count = 0;
1715
1716             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1717                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1718                     int v = ptr[x2 + y2 * stride];
1719                     sum += v;
1720                     sqr += v * v;
1721                     count++;
1722                 }
1723             }
1724             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1725         }
1726     }
1727 }
1728
1729 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1730                                                 int motion_x, int motion_y,
1731                                                 int mb_block_height,
1732                                                 int mb_block_count)
1733 {
1734     int16_t weight[8][64];
1735     DCTELEM orig[8][64];
1736     const int mb_x = s->mb_x;
1737     const int mb_y = s->mb_y;
1738     int i;
1739     int skip_dct[8];
1740     int dct_offset = s->linesize * 8; // default for progressive frames
1741     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1742     int wrap_y, wrap_c;
1743
1744     for (i = 0; i < mb_block_count; i++)
1745         skip_dct[i] = s->skipdct;
1746
1747     if (s->adaptive_quant) {
1748         const int last_qp = s->qscale;
1749         const int mb_xy = mb_x + mb_y * s->mb_stride;
1750
1751         s->lambda = s->lambda_table[mb_xy];
1752         update_qscale(s);
1753
1754         if (!(s->flags & CODEC_FLAG_QP_RD)) {
1755             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1756             s->dquant = s->qscale - last_qp;
1757
1758             if (s->out_format == FMT_H263) {
1759                 s->dquant = av_clip(s->dquant, -2, 2);
1760
1761                 if (s->codec_id == CODEC_ID_MPEG4) {
1762                     if (!s->mb_intra) {
1763                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1764                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1765                                 s->dquant = 0;
1766                         }
1767                         if (s->mv_type == MV_TYPE_8X8)
1768                             s->dquant = 0;
1769                     }
1770                 }
1771             }
1772         }
1773         ff_set_qscale(s, last_qp + s->dquant);
1774     } else if (s->flags & CODEC_FLAG_QP_RD)
1775         ff_set_qscale(s, s->qscale + s->dquant);
1776
1777     wrap_y = s->linesize;
1778     wrap_c = s->uvlinesize;
1779     ptr_y  = s->new_picture.f.data[0] +
1780              (mb_y * 16 * wrap_y)              + mb_x * 16;
1781     ptr_cb = s->new_picture.f.data[1] +
1782              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1783     ptr_cr = s->new_picture.f.data[2] +
1784              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1785
1786     if((mb_x*16+16 > s->width || mb_y*16+16 > s->height) && s->codec_id != CODEC_ID_AMV){
1787         uint8_t *ebuf = s->edge_emu_buffer + 32;
1788         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1789                                 mb_y * 16, s->width, s->height);
1790         ptr_y = ebuf;
1791         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1792                                 mb_block_height, mb_x * 8, mb_y * 8,
1793                                 s->width >> 1, s->height >> 1);
1794         ptr_cb = ebuf + 18 * wrap_y;
1795         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1796                                 mb_block_height, mb_x * 8, mb_y * 8,
1797                                 s->width >> 1, s->height >> 1);
1798         ptr_cr = ebuf + 18 * wrap_y + 8;
1799     }
1800
1801     if (s->mb_intra) {
1802         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1803             int progressive_score, interlaced_score;
1804
1805             s->interlaced_dct = 0;
1806             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1807                                                     NULL, wrap_y, 8) +
1808                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1809                                                     NULL, wrap_y, 8) - 400;
1810
1811             if (progressive_score > 0) {
1812                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1813                                                        NULL, wrap_y * 2, 8) +
1814                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1815                                                        NULL, wrap_y * 2, 8);
1816                 if (progressive_score > interlaced_score) {
1817                     s->interlaced_dct = 1;
1818
1819                     dct_offset = wrap_y;
1820                     wrap_y <<= 1;
1821                     if (s->chroma_format == CHROMA_422)
1822                         wrap_c <<= 1;
1823                 }
1824             }
1825         }
1826
1827         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1828         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1829         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1830         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1831
1832         if (s->flags & CODEC_FLAG_GRAY) {
1833             skip_dct[4] = 1;
1834             skip_dct[5] = 1;
1835         } else {
1836             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1837             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1838             if (!s->chroma_y_shift) { /* 422 */
1839                 s->dsp.get_pixels(s->block[6],
1840                                   ptr_cb + (dct_offset >> 1), wrap_c);
1841                 s->dsp.get_pixels(s->block[7],
1842                                   ptr_cr + (dct_offset >> 1), wrap_c);
1843             }
1844         }
1845     } else {
1846         op_pixels_func (*op_pix)[4];
1847         qpel_mc_func (*op_qpix)[16];
1848         uint8_t *dest_y, *dest_cb, *dest_cr;
1849
1850         dest_y  = s->dest[0];
1851         dest_cb = s->dest[1];
1852         dest_cr = s->dest[2];
1853
1854         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1855             op_pix  = s->dsp.put_pixels_tab;
1856             op_qpix = s->dsp.put_qpel_pixels_tab;
1857         } else {
1858             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1859             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1860         }
1861
1862         if (s->mv_dir & MV_DIR_FORWARD) {
1863             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data,
1864                        op_pix, op_qpix);
1865             op_pix  = s->dsp.avg_pixels_tab;
1866             op_qpix = s->dsp.avg_qpel_pixels_tab;
1867         }
1868         if (s->mv_dir & MV_DIR_BACKWARD) {
1869             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data,
1870                        op_pix, op_qpix);
1871         }
1872
1873         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1874             int progressive_score, interlaced_score;
1875
1876             s->interlaced_dct = 0;
1877             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1878                                                     ptr_y,              wrap_y,
1879                                                     8) +
1880                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1881                                                     ptr_y + wrap_y * 8, wrap_y,
1882                                                     8) - 400;
1883
1884             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1885                 progressive_score -= 400;
1886
1887             if (progressive_score > 0) {
1888                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1889                                                        ptr_y,
1890                                                        wrap_y * 2, 8) +
1891                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1892                                                        ptr_y + wrap_y,
1893                                                        wrap_y * 2, 8);
1894
1895                 if (progressive_score > interlaced_score) {
1896                     s->interlaced_dct = 1;
1897
1898                     dct_offset = wrap_y;
1899                     wrap_y <<= 1;
1900                     if (s->chroma_format == CHROMA_422)
1901                         wrap_c <<= 1;
1902                 }
1903             }
1904         }
1905
1906         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1907         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1908         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1909                            dest_y + dct_offset, wrap_y);
1910         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1911                            dest_y + dct_offset + 8, wrap_y);
1912
1913         if (s->flags & CODEC_FLAG_GRAY) {
1914             skip_dct[4] = 1;
1915             skip_dct[5] = 1;
1916         } else {
1917             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1918             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1919             if (!s->chroma_y_shift) { /* 422 */
1920                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1921                                    dest_cb + (dct_offset >> 1), wrap_c);
1922                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1923                                    dest_cr + (dct_offset >> 1), wrap_c);
1924             }
1925         }
1926         /* pre quantization */
1927         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1928                 2 * s->qscale * s->qscale) {
1929             // FIXME optimize
1930             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1931                               wrap_y, 8) < 20 * s->qscale)
1932                 skip_dct[0] = 1;
1933             if (s->dsp.sad[1](NULL, ptr_y + 8,
1934                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1935                 skip_dct[1] = 1;
1936             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1937                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1938                 skip_dct[2] = 1;
1939             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1940                               dest_y + dct_offset + 8,
1941                               wrap_y, 8) < 20 * s->qscale)
1942                 skip_dct[3] = 1;
1943             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1944                               wrap_c, 8) < 20 * s->qscale)
1945                 skip_dct[4] = 1;
1946             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1947                               wrap_c, 8) < 20 * s->qscale)
1948                 skip_dct[5] = 1;
1949             if (!s->chroma_y_shift) { /* 422 */
1950                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1951                                   dest_cb + (dct_offset >> 1),
1952                                   wrap_c, 8) < 20 * s->qscale)
1953                     skip_dct[6] = 1;
1954                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1955                                   dest_cr + (dct_offset >> 1),
1956                                   wrap_c, 8) < 20 * s->qscale)
1957                     skip_dct[7] = 1;
1958             }
1959         }
1960     }
1961
1962     if (s->avctx->quantizer_noise_shaping) {
1963         if (!skip_dct[0])
1964             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1965         if (!skip_dct[1])
1966             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1967         if (!skip_dct[2])
1968             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1969         if (!skip_dct[3])
1970             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1971         if (!skip_dct[4])
1972             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1973         if (!skip_dct[5])
1974             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1975         if (!s->chroma_y_shift) { /* 422 */
1976             if (!skip_dct[6])
1977                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1978                                   wrap_c);
1979             if (!skip_dct[7])
1980                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1981                                   wrap_c);
1982         }
1983         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
1984     }
1985
1986     /* DCT & quantize */
1987     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1988     {
1989         for (i = 0; i < mb_block_count; i++) {
1990             if (!skip_dct[i]) {
1991                 int overflow;
1992                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1993                 // FIXME we could decide to change to quantizer instead of
1994                 // clipping
1995                 // JS: I don't think that would be a good idea it could lower
1996                 //     quality instead of improve it. Just INTRADC clipping
1997                 //     deserves changes in quantizer
1998                 if (overflow)
1999                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2000             } else
2001                 s->block_last_index[i] = -1;
2002         }
2003         if (s->avctx->quantizer_noise_shaping) {
2004             for (i = 0; i < mb_block_count; i++) {
2005                 if (!skip_dct[i]) {
2006                     s->block_last_index[i] =
2007                         dct_quantize_refine(s, s->block[i], weight[i],
2008                                             orig[i], i, s->qscale);
2009                 }
2010             }
2011         }
2012
2013         if (s->luma_elim_threshold && !s->mb_intra)
2014             for (i = 0; i < 4; i++)
2015                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2016         if (s->chroma_elim_threshold && !s->mb_intra)
2017             for (i = 4; i < mb_block_count; i++)
2018                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2019
2020         if (s->flags & CODEC_FLAG_CBP_RD) {
2021             for (i = 0; i < mb_block_count; i++) {
2022                 if (s->block_last_index[i] == -1)
2023                     s->coded_score[i] = INT_MAX / 256;
2024             }
2025         }
2026     }
2027
2028     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2029         s->block_last_index[4] =
2030         s->block_last_index[5] = 0;
2031         s->block[4][0] =
2032         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2033     }
2034
2035     // non c quantize code returns incorrect block_last_index FIXME
2036     if (s->alternate_scan && s->dct_quantize != dct_quantize_c) {
2037         for (i = 0; i < mb_block_count; i++) {
2038             int j;
2039             if (s->block_last_index[i] > 0) {
2040                 for (j = 63; j > 0; j--) {
2041                     if (s->block[i][s->intra_scantable.permutated[j]])
2042                         break;
2043                 }
2044                 s->block_last_index[i] = j;
2045             }
2046         }
2047     }
2048
2049     /* huffman encode */
2050     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2051     case CODEC_ID_MPEG1VIDEO:
2052     case CODEC_ID_MPEG2VIDEO:
2053         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2054             mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2055         break;
2056     case CODEC_ID_MPEG4:
2057         if (CONFIG_MPEG4_ENCODER)
2058             mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2059         break;
2060     case CODEC_ID_MSMPEG4V2:
2061     case CODEC_ID_MSMPEG4V3:
2062     case CODEC_ID_WMV1:
2063         if (CONFIG_MSMPEG4_ENCODER)
2064             msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2065         break;
2066     case CODEC_ID_WMV2:
2067         if (CONFIG_WMV2_ENCODER)
2068             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2069         break;
2070     case CODEC_ID_H261:
2071         if (CONFIG_H261_ENCODER)
2072             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2073         break;
2074     case CODEC_ID_H263:
2075     case CODEC_ID_H263P:
2076     case CODEC_ID_FLV1:
2077     case CODEC_ID_RV10:
2078     case CODEC_ID_RV20:
2079         if (CONFIG_H263_ENCODER)
2080             h263_encode_mb(s, s->block, motion_x, motion_y);
2081         break;
2082     case CODEC_ID_MJPEG:
2083     case CODEC_ID_AMV:
2084         if (CONFIG_MJPEG_ENCODER)
2085             ff_mjpeg_encode_mb(s, s->block);
2086         break;
2087     default:
2088         assert(0);
2089     }
2090 }
2091
2092 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2093 {
2094     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2095     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2096 }
2097
2098 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2099     int i;
2100
2101     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2102
2103     /* mpeg1 */
2104     d->mb_skip_run= s->mb_skip_run;
2105     for(i=0; i<3; i++)
2106         d->last_dc[i] = s->last_dc[i];
2107
2108     /* statistics */
2109     d->mv_bits= s->mv_bits;
2110     d->i_tex_bits= s->i_tex_bits;
2111     d->p_tex_bits= s->p_tex_bits;
2112     d->i_count= s->i_count;
2113     d->f_count= s->f_count;
2114     d->b_count= s->b_count;
2115     d->skip_count= s->skip_count;
2116     d->misc_bits= s->misc_bits;
2117     d->last_bits= 0;
2118
2119     d->mb_skipped= 0;
2120     d->qscale= s->qscale;
2121     d->dquant= s->dquant;
2122
2123     d->esc3_level_length= s->esc3_level_length;
2124 }
2125
2126 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2127     int i;
2128
2129     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2130     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2131
2132     /* mpeg1 */
2133     d->mb_skip_run= s->mb_skip_run;
2134     for(i=0; i<3; i++)
2135         d->last_dc[i] = s->last_dc[i];
2136
2137     /* statistics */
2138     d->mv_bits= s->mv_bits;
2139     d->i_tex_bits= s->i_tex_bits;
2140     d->p_tex_bits= s->p_tex_bits;
2141     d->i_count= s->i_count;
2142     d->f_count= s->f_count;
2143     d->b_count= s->b_count;
2144     d->skip_count= s->skip_count;
2145     d->misc_bits= s->misc_bits;
2146
2147     d->mb_intra= s->mb_intra;
2148     d->mb_skipped= s->mb_skipped;
2149     d->mv_type= s->mv_type;
2150     d->mv_dir= s->mv_dir;
2151     d->pb= s->pb;
2152     if(s->data_partitioning){
2153         d->pb2= s->pb2;
2154         d->tex_pb= s->tex_pb;
2155     }
2156     d->block= s->block;
2157     for(i=0; i<8; i++)
2158         d->block_last_index[i]= s->block_last_index[i];
2159     d->interlaced_dct= s->interlaced_dct;
2160     d->qscale= s->qscale;
2161
2162     d->esc3_level_length= s->esc3_level_length;
2163 }
2164
2165 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2166                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2167                            int *dmin, int *next_block, int motion_x, int motion_y)
2168 {
2169     int score;
2170     uint8_t *dest_backup[3];
2171
2172     copy_context_before_encode(s, backup, type);
2173
2174     s->block= s->blocks[*next_block];
2175     s->pb= pb[*next_block];
2176     if(s->data_partitioning){
2177         s->pb2   = pb2   [*next_block];
2178         s->tex_pb= tex_pb[*next_block];
2179     }
2180
2181     if(*next_block){
2182         memcpy(dest_backup, s->dest, sizeof(s->dest));
2183         s->dest[0] = s->rd_scratchpad;
2184         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2185         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2186         assert(s->linesize >= 32); //FIXME
2187     }
2188
2189     encode_mb(s, motion_x, motion_y);
2190
2191     score= put_bits_count(&s->pb);
2192     if(s->data_partitioning){
2193         score+= put_bits_count(&s->pb2);
2194         score+= put_bits_count(&s->tex_pb);
2195     }
2196
2197     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2198         MPV_decode_mb(s, s->block);
2199
2200         score *= s->lambda2;
2201         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2202     }
2203
2204     if(*next_block){
2205         memcpy(s->dest, dest_backup, sizeof(s->dest));
2206     }
2207
2208     if(score<*dmin){
2209         *dmin= score;
2210         *next_block^=1;
2211
2212         copy_context_after_encode(best, s, type);
2213     }
2214 }
2215
2216 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2217     uint32_t *sq = ff_squareTbl + 256;
2218     int acc=0;
2219     int x,y;
2220
2221     if(w==16 && h==16)
2222         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2223     else if(w==8 && h==8)
2224         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2225
2226     for(y=0; y<h; y++){
2227         for(x=0; x<w; x++){
2228             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2229         }
2230     }
2231
2232     assert(acc>=0);
2233
2234     return acc;
2235 }
2236
2237 static int sse_mb(MpegEncContext *s){
2238     int w= 16;
2239     int h= 16;
2240
2241     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2242     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2243
2244     if(w==16 && h==16)
2245       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2246         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2247                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2248                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2249       }else{
2250         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2251                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2252                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2253       }
2254     else
2255         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2256                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2257                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2258 }
2259
2260 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2261     MpegEncContext *s= *(void**)arg;
2262
2263
2264     s->me.pre_pass=1;
2265     s->me.dia_size= s->avctx->pre_dia_size;
2266     s->first_slice_line=1;
2267     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2268         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2269             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2270         }
2271         s->first_slice_line=0;
2272     }
2273
2274     s->me.pre_pass=0;
2275
2276     return 0;
2277 }
2278
2279 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2280     MpegEncContext *s= *(void**)arg;
2281
2282     ff_check_alignment();
2283
2284     s->me.dia_size= s->avctx->dia_size;
2285     s->first_slice_line=1;
2286     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2287         s->mb_x=0; //for block init below
2288         ff_init_block_index(s);
2289         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2290             s->block_index[0]+=2;
2291             s->block_index[1]+=2;
2292             s->block_index[2]+=2;
2293             s->block_index[3]+=2;
2294
2295             /* compute motion vector & mb_type and store in context */
2296             if(s->pict_type==AV_PICTURE_TYPE_B)
2297                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2298             else
2299                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2300         }
2301         s->first_slice_line=0;
2302     }
2303     return 0;
2304 }
2305
2306 static int mb_var_thread(AVCodecContext *c, void *arg){
2307     MpegEncContext *s= *(void**)arg;
2308     int mb_x, mb_y;
2309
2310     ff_check_alignment();
2311
2312     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2313         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2314             int xx = mb_x * 16;
2315             int yy = mb_y * 16;
2316             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2317             int varc;
2318             int sum = s->dsp.pix_sum(pix, s->linesize);
2319
2320             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2321
2322             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2323             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2324             s->me.mb_var_sum_temp    += varc;
2325         }
2326     }
2327     return 0;
2328 }
2329
2330 static void write_slice_end(MpegEncContext *s){
2331     if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4){
2332         if(s->partitioned_frame){
2333             ff_mpeg4_merge_partitions(s);
2334         }
2335
2336         ff_mpeg4_stuffing(&s->pb);
2337     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2338         ff_mjpeg_encode_stuffing(&s->pb);
2339     }
2340
2341     avpriv_align_put_bits(&s->pb);
2342     flush_put_bits(&s->pb);
2343
2344     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2345         s->misc_bits+= get_bits_diff(s);
2346 }
2347
2348 static int encode_thread(AVCodecContext *c, void *arg){
2349     MpegEncContext *s= *(void**)arg;
2350     int mb_x, mb_y, pdif = 0;
2351     int chr_h= 16>>s->chroma_y_shift;
2352     int i, j;
2353     MpegEncContext best_s, backup_s;
2354     uint8_t bit_buf[2][MAX_MB_BYTES];
2355     uint8_t bit_buf2[2][MAX_MB_BYTES];
2356     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2357     PutBitContext pb[2], pb2[2], tex_pb[2];
2358 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2359
2360     ff_check_alignment();
2361
2362     for(i=0; i<2; i++){
2363         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2364         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2365         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2366     }
2367
2368     s->last_bits= put_bits_count(&s->pb);
2369     s->mv_bits=0;
2370     s->misc_bits=0;
2371     s->i_tex_bits=0;
2372     s->p_tex_bits=0;
2373     s->i_count=0;
2374     s->f_count=0;
2375     s->b_count=0;
2376     s->skip_count=0;
2377
2378     for(i=0; i<3; i++){
2379         /* init last dc values */
2380         /* note: quant matrix value (8) is implied here */
2381         s->last_dc[i] = 128 << s->intra_dc_precision;
2382
2383         s->current_picture.f.error[i] = 0;
2384     }
2385     if(s->codec_id==CODEC_ID_AMV){
2386         s->last_dc[0] = 128*8/13;
2387         s->last_dc[1] = 128*8/14;
2388         s->last_dc[2] = 128*8/14;
2389     }
2390     s->mb_skip_run = 0;
2391     memset(s->last_mv, 0, sizeof(s->last_mv));
2392
2393     s->last_mv_dir = 0;
2394
2395     switch(s->codec_id){
2396     case CODEC_ID_H263:
2397     case CODEC_ID_H263P:
2398     case CODEC_ID_FLV1:
2399         if (CONFIG_H263_ENCODER)
2400             s->gob_index = ff_h263_get_gob_height(s);
2401         break;
2402     case CODEC_ID_MPEG4:
2403         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2404             ff_mpeg4_init_partitions(s);
2405         break;
2406     }
2407
2408     s->resync_mb_x=0;
2409     s->resync_mb_y=0;
2410     s->first_slice_line = 1;
2411     s->ptr_lastgob = s->pb.buf;
2412     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2413 //    printf("row %d at %X\n", s->mb_y, (int)s);
2414         s->mb_x=0;
2415         s->mb_y= mb_y;
2416
2417         ff_set_qscale(s, s->qscale);
2418         ff_init_block_index(s);
2419
2420         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2421             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2422             int mb_type= s->mb_type[xy];
2423 //            int d;
2424             int dmin= INT_MAX;
2425             int dir;
2426
2427             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2428                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2429                 return -1;
2430             }
2431             if(s->data_partitioning){
2432                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2433                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2434                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2435                     return -1;
2436                 }
2437             }
2438
2439             s->mb_x = mb_x;
2440             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2441             ff_update_block_index(s);
2442
2443             if(CONFIG_H261_ENCODER && s->codec_id == CODEC_ID_H261){
2444                 ff_h261_reorder_mb_index(s);
2445                 xy= s->mb_y*s->mb_stride + s->mb_x;
2446                 mb_type= s->mb_type[xy];
2447             }
2448
2449             /* write gob / video packet header  */
2450             if(s->rtp_mode){
2451                 int current_packet_size, is_gob_start;
2452
2453                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2454
2455                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2456
2457                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2458
2459                 switch(s->codec_id){
2460                 case CODEC_ID_H263:
2461                 case CODEC_ID_H263P:
2462                     if(!s->h263_slice_structured)
2463                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2464                     break;
2465                 case CODEC_ID_MPEG2VIDEO:
2466                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2467                 case CODEC_ID_MPEG1VIDEO:
2468                     if(s->mb_skip_run) is_gob_start=0;
2469                     break;
2470                 }
2471
2472                 if(is_gob_start){
2473                     if(s->start_mb_y != mb_y || mb_x!=0){
2474                         write_slice_end(s);
2475
2476                         if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
2477                             ff_mpeg4_init_partitions(s);
2478                         }
2479                     }
2480
2481                     assert((put_bits_count(&s->pb)&7) == 0);
2482                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2483
2484                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2485                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2486                         int d= 100 / s->avctx->error_rate;
2487                         if(r % d == 0){
2488                             current_packet_size=0;
2489                             s->pb.buf_ptr= s->ptr_lastgob;
2490                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2491                         }
2492                     }
2493
2494                     if (s->avctx->rtp_callback){
2495                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2496                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2497                     }
2498
2499                     switch(s->codec_id){
2500                     case CODEC_ID_MPEG4:
2501                         if (CONFIG_MPEG4_ENCODER) {
2502                             ff_mpeg4_encode_video_packet_header(s);
2503                             ff_mpeg4_clean_buffers(s);
2504                         }
2505                     break;
2506                     case CODEC_ID_MPEG1VIDEO:
2507                     case CODEC_ID_MPEG2VIDEO:
2508                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2509                             ff_mpeg1_encode_slice_header(s);
2510                             ff_mpeg1_clean_buffers(s);
2511                         }
2512                     break;
2513                     case CODEC_ID_H263:
2514                     case CODEC_ID_H263P:
2515                         if (CONFIG_H263_ENCODER)
2516                             h263_encode_gob_header(s, mb_y);
2517                     break;
2518                     }
2519
2520                     if(s->flags&CODEC_FLAG_PASS1){
2521                         int bits= put_bits_count(&s->pb);
2522                         s->misc_bits+= bits - s->last_bits;
2523                         s->last_bits= bits;
2524                     }
2525
2526                     s->ptr_lastgob += current_packet_size;
2527                     s->first_slice_line=1;
2528                     s->resync_mb_x=mb_x;
2529                     s->resync_mb_y=mb_y;
2530                 }
2531             }
2532
2533             if(  (s->resync_mb_x   == s->mb_x)
2534                && s->resync_mb_y+1 == s->mb_y){
2535                 s->first_slice_line=0;
2536             }
2537
2538             s->mb_skipped=0;
2539             s->dquant=0; //only for QP_RD
2540
2541             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
2542                 int next_block=0;
2543                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2544
2545                 copy_context_before_encode(&backup_s, s, -1);
2546                 backup_s.pb= s->pb;
2547                 best_s.data_partitioning= s->data_partitioning;
2548                 best_s.partitioned_frame= s->partitioned_frame;
2549                 if(s->data_partitioning){
2550                     backup_s.pb2= s->pb2;
2551                     backup_s.tex_pb= s->tex_pb;
2552                 }
2553
2554                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2555                     s->mv_dir = MV_DIR_FORWARD;
2556                     s->mv_type = MV_TYPE_16X16;
2557                     s->mb_intra= 0;
2558                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2559                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2560                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2561                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2562                 }
2563                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2564                     s->mv_dir = MV_DIR_FORWARD;
2565                     s->mv_type = MV_TYPE_FIELD;
2566                     s->mb_intra= 0;
2567                     for(i=0; i<2; i++){
2568                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2569                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2570                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2571                     }
2572                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2573                                  &dmin, &next_block, 0, 0);
2574                 }
2575                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2576                     s->mv_dir = MV_DIR_FORWARD;
2577                     s->mv_type = MV_TYPE_16X16;
2578                     s->mb_intra= 0;
2579                     s->mv[0][0][0] = 0;
2580                     s->mv[0][0][1] = 0;
2581                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2582                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2583                 }
2584                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2585                     s->mv_dir = MV_DIR_FORWARD;
2586                     s->mv_type = MV_TYPE_8X8;
2587                     s->mb_intra= 0;
2588                     for(i=0; i<4; i++){
2589                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2590                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2591                     }
2592                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2593                                  &dmin, &next_block, 0, 0);
2594                 }
2595                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2596                     s->mv_dir = MV_DIR_FORWARD;
2597                     s->mv_type = MV_TYPE_16X16;
2598                     s->mb_intra= 0;
2599                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2600                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2601                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2602                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2603                 }
2604                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2605                     s->mv_dir = MV_DIR_BACKWARD;
2606                     s->mv_type = MV_TYPE_16X16;
2607                     s->mb_intra= 0;
2608                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2609                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2610                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2611                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2612                 }
2613                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2614                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2615                     s->mv_type = MV_TYPE_16X16;
2616                     s->mb_intra= 0;
2617                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2618                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2619                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2620                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2621                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2622                                  &dmin, &next_block, 0, 0);
2623                 }
2624                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2625                     s->mv_dir = MV_DIR_FORWARD;
2626                     s->mv_type = MV_TYPE_FIELD;
2627                     s->mb_intra= 0;
2628                     for(i=0; i<2; i++){
2629                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2630                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2631                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2632                     }
2633                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2634                                  &dmin, &next_block, 0, 0);
2635                 }
2636                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2637                     s->mv_dir = MV_DIR_BACKWARD;
2638                     s->mv_type = MV_TYPE_FIELD;
2639                     s->mb_intra= 0;
2640                     for(i=0; i<2; i++){
2641                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2642                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2643                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2644                     }
2645                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2646                                  &dmin, &next_block, 0, 0);
2647                 }
2648                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2649                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2650                     s->mv_type = MV_TYPE_FIELD;
2651                     s->mb_intra= 0;
2652                     for(dir=0; dir<2; dir++){
2653                         for(i=0; i<2; i++){
2654                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2655                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2656                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2657                         }
2658                     }
2659                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2660                                  &dmin, &next_block, 0, 0);
2661                 }
2662                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2663                     s->mv_dir = 0;
2664                     s->mv_type = MV_TYPE_16X16;
2665                     s->mb_intra= 1;
2666                     s->mv[0][0][0] = 0;
2667                     s->mv[0][0][1] = 0;
2668                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2669                                  &dmin, &next_block, 0, 0);
2670                     if(s->h263_pred || s->h263_aic){
2671                         if(best_s.mb_intra)
2672                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2673                         else
2674                             ff_clean_intra_table_entries(s); //old mode?
2675                     }
2676                 }
2677
2678                 if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){
2679                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2680                         const int last_qp= backup_s.qscale;
2681                         int qpi, qp, dc[6];
2682                         DCTELEM ac[6][16];
2683                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2684                         static const int dquant_tab[4]={-1,1,-2,2};
2685
2686                         assert(backup_s.dquant == 0);
2687
2688                         //FIXME intra
2689                         s->mv_dir= best_s.mv_dir;
2690                         s->mv_type = MV_TYPE_16X16;
2691                         s->mb_intra= best_s.mb_intra;
2692                         s->mv[0][0][0] = best_s.mv[0][0][0];
2693                         s->mv[0][0][1] = best_s.mv[0][0][1];
2694                         s->mv[1][0][0] = best_s.mv[1][0][0];
2695                         s->mv[1][0][1] = best_s.mv[1][0][1];
2696
2697                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2698                         for(; qpi<4; qpi++){
2699                             int dquant= dquant_tab[qpi];
2700                             qp= last_qp + dquant;
2701                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2702                                 continue;
2703                             backup_s.dquant= dquant;
2704                             if(s->mb_intra && s->dc_val[0]){
2705                                 for(i=0; i<6; i++){
2706                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2707                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2708                                 }
2709                             }
2710
2711                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2712                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2713                             if(best_s.qscale != qp){
2714                                 if(s->mb_intra && s->dc_val[0]){
2715                                     for(i=0; i<6; i++){
2716                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2717                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2718                                     }
2719                                 }
2720                             }
2721                         }
2722                     }
2723                 }
2724                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2725                     int mx= s->b_direct_mv_table[xy][0];
2726                     int my= s->b_direct_mv_table[xy][1];
2727
2728                     backup_s.dquant = 0;
2729                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2730                     s->mb_intra= 0;
2731                     ff_mpeg4_set_direct_mv(s, mx, my);
2732                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2733                                  &dmin, &next_block, mx, my);
2734                 }
2735                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2736                     backup_s.dquant = 0;
2737                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2738                     s->mb_intra= 0;
2739                     ff_mpeg4_set_direct_mv(s, 0, 0);
2740                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2741                                  &dmin, &next_block, 0, 0);
2742                 }
2743                 if(!best_s.mb_intra && s->flags2&CODEC_FLAG2_SKIP_RD){
2744                     int coded=0;
2745                     for(i=0; i<6; i++)
2746                         coded |= s->block_last_index[i];
2747                     if(coded){
2748                         int mx,my;
2749                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2750                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2751                             mx=my=0; //FIXME find the one we actually used
2752                             ff_mpeg4_set_direct_mv(s, mx, my);
2753                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2754                             mx= s->mv[1][0][0];
2755                             my= s->mv[1][0][1];
2756                         }else{
2757                             mx= s->mv[0][0][0];
2758                             my= s->mv[0][0][1];
2759                         }
2760
2761                         s->mv_dir= best_s.mv_dir;
2762                         s->mv_type = best_s.mv_type;
2763                         s->mb_intra= 0;
2764 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2765                         s->mv[0][0][1] = best_s.mv[0][0][1];
2766                         s->mv[1][0][0] = best_s.mv[1][0][0];
2767                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2768                         backup_s.dquant= 0;
2769                         s->skipdct=1;
2770                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2771                                         &dmin, &next_block, mx, my);
2772                         s->skipdct=0;
2773                     }
2774                 }
2775
2776                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2777
2778                 copy_context_after_encode(s, &best_s, -1);
2779
2780                 pb_bits_count= put_bits_count(&s->pb);
2781                 flush_put_bits(&s->pb);
2782                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2783                 s->pb= backup_s.pb;
2784
2785                 if(s->data_partitioning){
2786                     pb2_bits_count= put_bits_count(&s->pb2);
2787                     flush_put_bits(&s->pb2);
2788                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2789                     s->pb2= backup_s.pb2;
2790
2791                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2792                     flush_put_bits(&s->tex_pb);
2793                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2794                     s->tex_pb= backup_s.tex_pb;
2795                 }
2796                 s->last_bits= put_bits_count(&s->pb);
2797
2798                 if (CONFIG_H263_ENCODER &&
2799                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2800                     ff_h263_update_motion_val(s);
2801
2802                 if(next_block==0){ //FIXME 16 vs linesize16
2803                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2804                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2805                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2806                 }
2807
2808                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2809                     MPV_decode_mb(s, s->block);
2810             } else {
2811                 int motion_x = 0, motion_y = 0;
2812                 s->mv_type=MV_TYPE_16X16;
2813                 // only one MB-Type possible
2814
2815                 switch(mb_type){
2816                 case CANDIDATE_MB_TYPE_INTRA:
2817                     s->mv_dir = 0;
2818                     s->mb_intra= 1;
2819                     motion_x= s->mv[0][0][0] = 0;
2820                     motion_y= s->mv[0][0][1] = 0;
2821                     break;
2822                 case CANDIDATE_MB_TYPE_INTER:
2823                     s->mv_dir = MV_DIR_FORWARD;
2824                     s->mb_intra= 0;
2825                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2826                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2827                     break;
2828                 case CANDIDATE_MB_TYPE_INTER_I:
2829                     s->mv_dir = MV_DIR_FORWARD;
2830                     s->mv_type = MV_TYPE_FIELD;
2831                     s->mb_intra= 0;
2832                     for(i=0; i<2; i++){
2833                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2834                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2835                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2836                     }
2837                     break;
2838                 case CANDIDATE_MB_TYPE_INTER4V:
2839                     s->mv_dir = MV_DIR_FORWARD;
2840                     s->mv_type = MV_TYPE_8X8;
2841                     s->mb_intra= 0;
2842                     for(i=0; i<4; i++){
2843                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2844                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2845                     }
2846                     break;
2847                 case CANDIDATE_MB_TYPE_DIRECT:
2848                     if (CONFIG_MPEG4_ENCODER) {
2849                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2850                         s->mb_intra= 0;
2851                         motion_x=s->b_direct_mv_table[xy][0];
2852                         motion_y=s->b_direct_mv_table[xy][1];
2853                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2854                     }
2855                     break;
2856                 case CANDIDATE_MB_TYPE_DIRECT0:
2857                     if (CONFIG_MPEG4_ENCODER) {
2858                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2859                         s->mb_intra= 0;
2860                         ff_mpeg4_set_direct_mv(s, 0, 0);
2861                     }
2862                     break;
2863                 case CANDIDATE_MB_TYPE_BIDIR:
2864                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2865                     s->mb_intra= 0;
2866                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2867                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2868                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2869                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2870                     break;
2871                 case CANDIDATE_MB_TYPE_BACKWARD:
2872                     s->mv_dir = MV_DIR_BACKWARD;
2873                     s->mb_intra= 0;
2874                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2875                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2876                     break;
2877                 case CANDIDATE_MB_TYPE_FORWARD:
2878                     s->mv_dir = MV_DIR_FORWARD;
2879                     s->mb_intra= 0;
2880                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2881                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2882 //                    printf(" %d %d ", motion_x, motion_y);
2883                     break;
2884                 case CANDIDATE_MB_TYPE_FORWARD_I:
2885                     s->mv_dir = MV_DIR_FORWARD;
2886                     s->mv_type = MV_TYPE_FIELD;
2887                     s->mb_intra= 0;
2888                     for(i=0; i<2; i++){
2889                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2890                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2891                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2892                     }
2893                     break;
2894                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2895                     s->mv_dir = MV_DIR_BACKWARD;
2896                     s->mv_type = MV_TYPE_FIELD;
2897                     s->mb_intra= 0;
2898                     for(i=0; i<2; i++){
2899                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2900                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2901                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2902                     }
2903                     break;
2904                 case CANDIDATE_MB_TYPE_BIDIR_I:
2905                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2906                     s->mv_type = MV_TYPE_FIELD;
2907                     s->mb_intra= 0;
2908                     for(dir=0; dir<2; dir++){
2909                         for(i=0; i<2; i++){
2910                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2911                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2912                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2913                         }
2914                     }
2915                     break;
2916                 default:
2917                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2918                 }
2919
2920                 encode_mb(s, motion_x, motion_y);
2921
2922                 // RAL: Update last macroblock type
2923                 s->last_mv_dir = s->mv_dir;
2924
2925                 if (CONFIG_H263_ENCODER &&
2926                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2927                     ff_h263_update_motion_val(s);
2928
2929                 MPV_decode_mb(s, s->block);
2930             }
2931
2932             /* clean the MV table in IPS frames for direct mode in B frames */
2933             if(s->mb_intra /* && I,P,S_TYPE */){
2934                 s->p_mv_table[xy][0]=0;
2935                 s->p_mv_table[xy][1]=0;
2936             }
2937
2938             if(s->flags&CODEC_FLAG_PSNR){
2939                 int w= 16;
2940                 int h= 16;
2941
2942                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2943                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2944
2945                 s->current_picture.f.error[0] += sse(
2946                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2947                     s->dest[0], w, h, s->linesize);
2948                 s->current_picture.f.error[1] += sse(
2949                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2950                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2951                 s->current_picture.f.error[2] += sse(
2952                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2953                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2954             }
2955             if(s->loop_filter){
2956                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2957                     ff_h263_loop_filter(s);
2958             }
2959 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
2960         }
2961     }
2962
2963     //not beautiful here but we must write it before flushing so it has to be here
2964     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
2965         msmpeg4_encode_ext_header(s);
2966
2967     write_slice_end(s);
2968
2969     /* Send the last GOB if RTP */
2970     if (s->avctx->rtp_callback) {
2971         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
2972         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
2973         /* Call the RTP callback to send the last GOB */
2974         emms_c();
2975         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
2976     }
2977
2978     return 0;
2979 }
2980
2981 #define MERGE(field) dst->field += src->field; src->field=0
2982 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
2983     MERGE(me.scene_change_score);
2984     MERGE(me.mc_mb_var_sum_temp);
2985     MERGE(me.mb_var_sum_temp);
2986 }
2987
2988 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
2989     int i;
2990
2991     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
2992     MERGE(dct_count[1]);
2993     MERGE(mv_bits);
2994     MERGE(i_tex_bits);
2995     MERGE(p_tex_bits);
2996     MERGE(i_count);
2997     MERGE(f_count);
2998     MERGE(b_count);
2999     MERGE(skip_count);
3000     MERGE(misc_bits);
3001     MERGE(error_count);
3002     MERGE(padding_bug_score);
3003     MERGE(current_picture.f.error[0]);
3004     MERGE(current_picture.f.error[1]);
3005     MERGE(current_picture.f.error[2]);
3006
3007     if(dst->avctx->noise_reduction){
3008         for(i=0; i<64; i++){
3009             MERGE(dct_error_sum[0][i]);
3010             MERGE(dct_error_sum[1][i]);
3011         }
3012     }
3013
3014     assert(put_bits_count(&src->pb) % 8 ==0);
3015     assert(put_bits_count(&dst->pb) % 8 ==0);
3016     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3017     flush_put_bits(&dst->pb);
3018 }
3019
3020 static int estimate_qp(MpegEncContext *s, int dry_run){
3021     if (s->next_lambda){
3022         s->current_picture_ptr->f.quality =
3023         s->current_picture.f.quality = s->next_lambda;
3024         if(!dry_run) s->next_lambda= 0;
3025     } else if (!s->fixed_qscale) {
3026         s->current_picture_ptr->f.quality =
3027         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3028         if (s->current_picture.f.quality < 0)
3029             return -1;
3030     }
3031
3032     if(s->adaptive_quant){
3033         switch(s->codec_id){
3034         case CODEC_ID_MPEG4:
3035             if (CONFIG_MPEG4_ENCODER)
3036                 ff_clean_mpeg4_qscales(s);
3037             break;
3038         case CODEC_ID_H263:
3039         case CODEC_ID_H263P:
3040         case CODEC_ID_FLV1:
3041             if (CONFIG_H263_ENCODER)
3042                 ff_clean_h263_qscales(s);
3043             break;
3044         default:
3045             ff_init_qscale_tab(s);
3046         }
3047
3048         s->lambda= s->lambda_table[0];
3049         //FIXME broken
3050     }else
3051         s->lambda = s->current_picture.f.quality;
3052 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3053     update_qscale(s);
3054     return 0;
3055 }
3056
3057 /* must be called before writing the header */
3058 static void set_frame_distances(MpegEncContext * s){
3059     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3060     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3061
3062     if(s->pict_type==AV_PICTURE_TYPE_B){
3063         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3064         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3065     }else{
3066         s->pp_time= s->time - s->last_non_b_time;
3067         s->last_non_b_time= s->time;
3068         assert(s->picture_number==0 || s->pp_time > 0);
3069     }
3070 }
3071
3072 static int encode_picture(MpegEncContext *s, int picture_number)
3073 {
3074     int i;
3075     int bits;
3076     int context_count = s->slice_context_count;
3077
3078     s->picture_number = picture_number;
3079
3080     /* Reset the average MB variance */
3081     s->me.mb_var_sum_temp    =
3082     s->me.mc_mb_var_sum_temp = 0;
3083
3084     /* we need to initialize some time vars before we can encode b-frames */
3085     // RAL: Condition added for MPEG1VIDEO
3086     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3087         set_frame_distances(s);
3088     if(CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4)
3089         ff_set_mpeg4_time(s);
3090
3091     s->me.scene_change_score=0;
3092
3093 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3094
3095     if(s->pict_type==AV_PICTURE_TYPE_I){
3096         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3097         else                        s->no_rounding=0;
3098     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3099         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3100             s->no_rounding ^= 1;
3101     }
3102
3103     if(s->flags & CODEC_FLAG_PASS2){
3104         if (estimate_qp(s,1) < 0)
3105             return -1;
3106         ff_get_2pass_fcode(s);
3107     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3108         if(s->pict_type==AV_PICTURE_TYPE_B)
3109             s->lambda= s->last_lambda_for[s->pict_type];
3110         else
3111             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3112         update_qscale(s);
3113     }
3114
3115     if(s->codec_id != CODEC_ID_AMV){
3116         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3117         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3118         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3119         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3120     }
3121
3122     s->mb_intra=0; //for the rate distortion & bit compare functions
3123     for(i=1; i<context_count; i++){
3124         ff_update_duplicate_context(s->thread_context[i], s);
3125     }
3126
3127     if(ff_init_me(s)<0)
3128         return -1;
3129
3130     /* Estimate motion for every MB */
3131     if(s->pict_type != AV_PICTURE_TYPE_I){
3132         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3133         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3134         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3135             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3136                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3137             }
3138         }
3139
3140         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3141     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3142         /* I-Frame */
3143         for(i=0; i<s->mb_stride*s->mb_height; i++)
3144             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3145
3146         if(!s->fixed_qscale){
3147             /* finding spatial complexity for I-frame rate control */
3148             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3149         }
3150     }
3151     for(i=1; i<context_count; i++){
3152         merge_context_after_me(s, s->thread_context[i]);
3153     }
3154     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3155     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3156     emms_c();
3157
3158     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3159         s->pict_type= AV_PICTURE_TYPE_I;
3160         for(i=0; i<s->mb_stride*s->mb_height; i++)
3161             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3162 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3163     }
3164
3165     if(!s->umvplus){
3166         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3167             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3168
3169             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3170                 int a,b;
3171                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3172                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3173                 s->f_code= FFMAX3(s->f_code, a, b);
3174             }
3175
3176             ff_fix_long_p_mvs(s);
3177             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3178             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3179                 int j;
3180                 for(i=0; i<2; i++){
3181                     for(j=0; j<2; j++)
3182                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3183                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3184                 }
3185             }
3186         }
3187
3188         if(s->pict_type==AV_PICTURE_TYPE_B){
3189             int a, b;
3190
3191             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3192             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3193             s->f_code = FFMAX(a, b);
3194
3195             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3196             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3197             s->b_code = FFMAX(a, b);
3198
3199             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3200             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3201             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3202             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3203             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3204                 int dir, j;
3205                 for(dir=0; dir<2; dir++){
3206                     for(i=0; i<2; i++){
3207                         for(j=0; j<2; j++){
3208                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3209                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3210                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3211                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3212                         }
3213                     }
3214                 }
3215             }
3216         }
3217     }
3218
3219     if (estimate_qp(s, 0) < 0)
3220         return -1;
3221
3222     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3223         s->qscale= 3; //reduce clipping problems
3224
3225     if (s->out_format == FMT_MJPEG) {
3226         /* for mjpeg, we do include qscale in the matrix */
3227         for(i=1;i<64;i++){
3228             int j= s->dsp.idct_permutation[i];
3229
3230             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3231         }
3232         s->y_dc_scale_table=
3233         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3234         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3235         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3236                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3237         s->qscale= 8;
3238     }
3239     if(s->codec_id == CODEC_ID_AMV){
3240         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3241         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3242         for(i=1;i<64;i++){
3243             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3244
3245             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3246             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3247         }
3248         s->y_dc_scale_table= y;
3249         s->c_dc_scale_table= c;
3250         s->intra_matrix[0] = 13;
3251         s->chroma_intra_matrix[0] = 14;
3252         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3253                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3254         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3255                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3256         s->qscale= 8;
3257     }
3258
3259     //FIXME var duplication
3260     s->current_picture_ptr->f.key_frame =
3261     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3262     s->current_picture_ptr->f.pict_type =
3263     s->current_picture.f.pict_type = s->pict_type;
3264
3265     if (s->current_picture.f.key_frame)
3266         s->picture_in_gop_number=0;
3267
3268     s->last_bits= put_bits_count(&s->pb);
3269     switch(s->out_format) {
3270     case FMT_MJPEG:
3271         if (CONFIG_MJPEG_ENCODER)
3272             ff_mjpeg_encode_picture_header(s);
3273         break;
3274     case FMT_H261:
3275         if (CONFIG_H261_ENCODER)
3276             ff_h261_encode_picture_header(s, picture_number);
3277         break;
3278     case FMT_H263:
3279         if (CONFIG_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
3280             ff_wmv2_encode_picture_header(s, picture_number);
3281         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3282             msmpeg4_encode_picture_header(s, picture_number);
3283         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3284             mpeg4_encode_picture_header(s, picture_number);
3285         else if (CONFIG_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
3286             rv10_encode_picture_header(s, picture_number);
3287         else if (CONFIG_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
3288             rv20_encode_picture_header(s, picture_number);
3289         else if (CONFIG_FLV_ENCODER && s->codec_id == CODEC_ID_FLV1)
3290             ff_flv_encode_picture_header(s, picture_number);
3291         else if (CONFIG_H263_ENCODER)
3292             h263_encode_picture_header(s, picture_number);
3293         break;
3294     case FMT_MPEG1:
3295         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3296             mpeg1_encode_picture_header(s, picture_number);
3297         break;
3298     case FMT_H264:
3299         break;
3300     default:
3301         assert(0);
3302     }
3303     bits= put_bits_count(&s->pb);
3304     s->header_bits= bits - s->last_bits;
3305
3306     for(i=1; i<context_count; i++){
3307         update_duplicate_context_after_me(s->thread_context[i], s);
3308     }
3309     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3310     for(i=1; i<context_count; i++){
3311         merge_context_after_encode(s, s->thread_context[i]);
3312     }
3313     emms_c();
3314     return 0;
3315 }
3316
3317 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3318     const int intra= s->mb_intra;
3319     int i;
3320
3321     s->dct_count[intra]++;
3322
3323     for(i=0; i<64; i++){
3324         int level= block[i];
3325
3326         if(level){
3327             if(level>0){
3328                 s->dct_error_sum[intra][i] += level;
3329                 level -= s->dct_offset[intra][i];
3330                 if(level<0) level=0;
3331             }else{
3332                 s->dct_error_sum[intra][i] -= level;
3333                 level += s->dct_offset[intra][i];
3334                 if(level>0) level=0;
3335             }
3336             block[i]= level;
3337         }
3338     }
3339 }
3340
3341 static int dct_quantize_trellis_c(MpegEncContext *s,
3342                                   DCTELEM *block, int n,
3343                                   int qscale, int *overflow){
3344     const int *qmat;
3345     const uint8_t *scantable= s->intra_scantable.scantable;
3346     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3347     int max=0;
3348     unsigned int threshold1, threshold2;
3349     int bias=0;
3350     int run_tab[65];
3351     int level_tab[65];
3352     int score_tab[65];
3353     int survivor[65];
3354     int survivor_count;
3355     int last_run=0;
3356     int last_level=0;
3357     int last_score= 0;
3358     int last_i;
3359     int coeff[2][64];
3360     int coeff_count[64];
3361     int qmul, qadd, start_i, last_non_zero, i, dc;
3362     const int esc_length= s->ac_esc_length;
3363     uint8_t * length;
3364     uint8_t * last_length;
3365     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3366
3367     s->dsp.fdct (block);
3368
3369     if(s->dct_error_sum)
3370         s->denoise_dct(s, block);
3371     qmul= qscale*16;
3372     qadd= ((qscale-1)|1)*8;
3373
3374     if (s->mb_intra) {
3375         int q;
3376         if (!s->h263_aic) {
3377             if (n < 4)
3378                 q = s->y_dc_scale;
3379             else
3380                 q = s->c_dc_scale;
3381             q = q << 3;
3382         } else{
3383             /* For AIC we skip quant/dequant of INTRADC */
3384             q = 1 << 3;
3385             qadd=0;
3386         }
3387
3388         /* note: block[0] is assumed to be positive */
3389         block[0] = (block[0] + (q >> 1)) / q;
3390         start_i = 1;
3391         last_non_zero = 0;
3392         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3393         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3394             bias= 1<<(QMAT_SHIFT-1);
3395         length     = s->intra_ac_vlc_length;
3396         last_length= s->intra_ac_vlc_last_length;
3397     } else {
3398         start_i = 0;
3399         last_non_zero = -1;
3400         qmat = s->q_inter_matrix[qscale];
3401         length     = s->inter_ac_vlc_length;
3402         last_length= s->inter_ac_vlc_last_length;
3403     }
3404     last_i= start_i;
3405
3406     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3407     threshold2= (threshold1<<1);
3408
3409     for(i=63; i>=start_i; i--) {
3410         const int j = scantable[i];
3411         int level = block[j] * qmat[j];
3412
3413         if(((unsigned)(level+threshold1))>threshold2){
3414             last_non_zero = i;
3415             break;
3416         }
3417     }
3418
3419     for(i=start_i; i<=last_non_zero; i++) {
3420         const int j = scantable[i];
3421         int level = block[j] * qmat[j];
3422
3423 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3424 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3425         if(((unsigned)(level+threshold1))>threshold2){
3426             if(level>0){
3427                 level= (bias + level)>>QMAT_SHIFT;
3428                 coeff[0][i]= level;
3429                 coeff[1][i]= level-1;
3430 //                coeff[2][k]= level-2;
3431             }else{
3432                 level= (bias - level)>>QMAT_SHIFT;
3433                 coeff[0][i]= -level;
3434                 coeff[1][i]= -level+1;
3435 //                coeff[2][k]= -level+2;
3436             }
3437             coeff_count[i]= FFMIN(level, 2);
3438             assert(coeff_count[i]);
3439             max |=level;
3440         }else{
3441             coeff[0][i]= (level>>31)|1;
3442             coeff_count[i]= 1;
3443         }
3444     }
3445
3446     *overflow= s->max_qcoeff < max; //overflow might have happened
3447
3448     if(last_non_zero < start_i){
3449         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3450         return last_non_zero;
3451     }
3452
3453     score_tab[start_i]= 0;
3454     survivor[0]= start_i;
3455     survivor_count= 1;
3456
3457     for(i=start_i; i<=last_non_zero; i++){
3458         int level_index, j, zero_distortion;
3459         int dct_coeff= FFABS(block[ scantable[i] ]);
3460         int best_score=256*256*256*120;
3461
3462         if (   s->dsp.fdct == fdct_ifast
3463 #ifndef FAAN_POSTSCALE
3464             || s->dsp.fdct == ff_faandct
3465 #endif
3466            )
3467             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3468         zero_distortion= dct_coeff*dct_coeff;
3469
3470         for(level_index=0; level_index < coeff_count[i]; level_index++){
3471             int distortion;
3472             int level= coeff[level_index][i];
3473             const int alevel= FFABS(level);
3474             int unquant_coeff;
3475
3476             assert(level);
3477
3478             if(s->out_format == FMT_H263){
3479                 unquant_coeff= alevel*qmul + qadd;
3480             }else{ //MPEG1
3481                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3482                 if(s->mb_intra){
3483                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3484                         unquant_coeff =   (unquant_coeff - 1) | 1;
3485                 }else{
3486                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3487                         unquant_coeff =   (unquant_coeff - 1) | 1;
3488                 }
3489                 unquant_coeff<<= 3;
3490             }
3491
3492             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3493             level+=64;
3494             if((level&(~127)) == 0){
3495                 for(j=survivor_count-1; j>=0; j--){
3496                     int run= i - survivor[j];
3497                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3498                     score += score_tab[i-run];
3499
3500                     if(score < best_score){
3501                         best_score= score;
3502                         run_tab[i+1]= run;
3503                         level_tab[i+1]= level-64;
3504                     }
3505                 }
3506
3507                 if(s->out_format == FMT_H263){
3508                     for(j=survivor_count-1; j>=0; j--){
3509                         int run= i - survivor[j];
3510                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3511                         score += score_tab[i-run];
3512                         if(score < last_score){
3513                             last_score= score;
3514                             last_run= run;
3515                             last_level= level-64;
3516                             last_i= i+1;
3517                         }
3518                     }
3519                 }
3520             }else{
3521                 distortion += esc_length*lambda;
3522                 for(j=survivor_count-1; j>=0; j--){
3523                     int run= i - survivor[j];
3524                     int score= distortion + score_tab[i-run];
3525
3526                     if(score < best_score){
3527                         best_score= score;
3528                         run_tab[i+1]= run;
3529                         level_tab[i+1]= level-64;
3530                     }
3531                 }
3532
3533                 if(s->out_format == FMT_H263){
3534                   for(j=survivor_count-1; j>=0; j--){
3535                         int run= i - survivor[j];
3536                         int score= distortion + score_tab[i-run];
3537                         if(score < last_score){
3538                             last_score= score;
3539                             last_run= run;
3540                             last_level= level-64;
3541                             last_i= i+1;
3542                         }
3543                     }
3544                 }
3545             }
3546         }
3547
3548         score_tab[i+1]= best_score;
3549
3550         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3551         if(last_non_zero <= 27){
3552             for(; survivor_count; survivor_count--){
3553                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3554                     break;
3555             }
3556         }else{
3557             for(; survivor_count; survivor_count--){
3558                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3559                     break;
3560             }
3561         }
3562
3563         survivor[ survivor_count++ ]= i+1;
3564     }
3565
3566     if(s->out_format != FMT_H263){
3567         last_score= 256*256*256*120;
3568         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3569             int score= score_tab[i];
3570             if(i) score += lambda*2; //FIXME exacter?
3571
3572             if(score < last_score){
3573                 last_score= score;
3574                 last_i= i;
3575                 last_level= level_tab[i];
3576                 last_run= run_tab[i];
3577             }
3578         }
3579     }
3580
3581     s->coded_score[n] = last_score;
3582
3583     dc= FFABS(block[0]);
3584     last_non_zero= last_i - 1;
3585     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3586
3587     if(last_non_zero < start_i)
3588         return last_non_zero;
3589
3590     if(last_non_zero == 0 && start_i == 0){
3591         int best_level= 0;
3592         int best_score= dc * dc;
3593
3594         for(i=0; i<coeff_count[0]; i++){
3595             int level= coeff[i][0];
3596             int alevel= FFABS(level);
3597             int unquant_coeff, score, distortion;
3598
3599             if(s->out_format == FMT_H263){
3600                     unquant_coeff= (alevel*qmul + qadd)>>3;
3601             }else{ //MPEG1
3602                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3603                     unquant_coeff =   (unquant_coeff - 1) | 1;
3604             }
3605             unquant_coeff = (unquant_coeff + 4) >> 3;
3606             unquant_coeff<<= 3 + 3;
3607
3608             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3609             level+=64;
3610             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3611             else                    score= distortion + esc_length*lambda;
3612
3613             if(score < best_score){
3614                 best_score= score;
3615                 best_level= level - 64;
3616             }
3617         }
3618         block[0]= best_level;
3619         s->coded_score[n] = best_score - dc*dc;
3620         if(best_level == 0) return -1;
3621         else                return last_non_zero;
3622     }
3623
3624     i= last_i;
3625     assert(last_level);
3626
3627     block[ perm_scantable[last_non_zero] ]= last_level;
3628     i -= last_run + 1;
3629
3630     for(; i>start_i; i -= run_tab[i] + 1){
3631         block[ perm_scantable[i-1] ]= level_tab[i];
3632     }
3633
3634     return last_non_zero;
3635 }
3636
3637 //#define REFINE_STATS 1
3638 static int16_t basis[64][64];
3639
3640 static void build_basis(uint8_t *perm){
3641     int i, j, x, y;
3642     emms_c();
3643     for(i=0; i<8; i++){
3644         for(j=0; j<8; j++){
3645             for(y=0; y<8; y++){
3646                 for(x=0; x<8; x++){
3647                     double s= 0.25*(1<<BASIS_SHIFT);
3648                     int index= 8*i + j;
3649                     int perm_index= perm[index];
3650                     if(i==0) s*= sqrt(0.5);
3651                     if(j==0) s*= sqrt(0.5);
3652                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3653                 }
3654             }
3655         }
3656     }
3657 }
3658
3659 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3660                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3661                         int n, int qscale){
3662     int16_t rem[64];
3663     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3664     const uint8_t *scantable= s->intra_scantable.scantable;
3665     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3666 //    unsigned int threshold1, threshold2;
3667 //    int bias=0;
3668     int run_tab[65];
3669     int prev_run=0;
3670     int prev_level=0;
3671     int qmul, qadd, start_i, last_non_zero, i, dc;
3672     uint8_t * length;
3673     uint8_t * last_length;
3674     int lambda;
3675     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3676 #ifdef REFINE_STATS
3677 static int count=0;
3678 static int after_last=0;
3679 static int to_zero=0;
3680 static int from_zero=0;
3681 static int raise=0;
3682 static int lower=0;
3683 static int messed_sign=0;
3684 #endif
3685
3686     if(basis[0][0] == 0)
3687         build_basis(s->dsp.idct_permutation);
3688
3689     qmul= qscale*2;
3690     qadd= (qscale-1)|1;
3691     if (s->mb_intra) {
3692         if (!s->h263_aic) {
3693             if (n < 4)
3694                 q = s->y_dc_scale;
3695             else
3696                 q = s->c_dc_scale;
3697         } else{
3698             /* For AIC we skip quant/dequant of INTRADC */
3699             q = 1;
3700             qadd=0;
3701         }
3702         q <<= RECON_SHIFT-3;
3703         /* note: block[0] is assumed to be positive */
3704         dc= block[0]*q;
3705 //        block[0] = (block[0] + (q >> 1)) / q;
3706         start_i = 1;
3707 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3708 //            bias= 1<<(QMAT_SHIFT-1);
3709         length     = s->intra_ac_vlc_length;
3710         last_length= s->intra_ac_vlc_last_length;
3711     } else {
3712         dc= 0;
3713         start_i = 0;
3714         length     = s->inter_ac_vlc_length;
3715         last_length= s->inter_ac_vlc_last_length;
3716     }
3717     last_non_zero = s->block_last_index[n];
3718
3719 #ifdef REFINE_STATS
3720 {START_TIMER
3721 #endif
3722     dc += (1<<(RECON_SHIFT-1));
3723     for(i=0; i<64; i++){
3724         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3725     }
3726 #ifdef REFINE_STATS
3727 STOP_TIMER("memset rem[]")}
3728 #endif
3729     sum=0;
3730     for(i=0; i<64; i++){
3731         int one= 36;
3732         int qns=4;
3733         int w;
3734
3735         w= FFABS(weight[i]) + qns*one;
3736         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3737
3738         weight[i] = w;
3739 //        w=weight[i] = (63*qns + (w/2)) / w;
3740
3741         assert(w>0);
3742         assert(w<(1<<6));
3743         sum += w*w;
3744     }
3745     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3746 #ifdef REFINE_STATS
3747 {START_TIMER
3748 #endif
3749     run=0;
3750     rle_index=0;
3751     for(i=start_i; i<=last_non_zero; i++){
3752         int j= perm_scantable[i];
3753         const int level= block[j];
3754         int coeff;
3755
3756         if(level){
3757             if(level<0) coeff= qmul*level - qadd;
3758             else        coeff= qmul*level + qadd;
3759             run_tab[rle_index++]=run;
3760             run=0;
3761
3762             s->dsp.add_8x8basis(rem, basis[j], coeff);
3763         }else{
3764             run++;
3765         }
3766     }
3767 #ifdef REFINE_STATS
3768 if(last_non_zero>0){
3769 STOP_TIMER("init rem[]")
3770 }
3771 }
3772
3773 {START_TIMER
3774 #endif
3775     for(;;){
3776         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3777         int best_coeff=0;
3778         int best_change=0;
3779         int run2, best_unquant_change=0, analyze_gradient;
3780 #ifdef REFINE_STATS
3781 {START_TIMER
3782 #endif
3783         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
3784
3785         if(analyze_gradient){
3786 #ifdef REFINE_STATS
3787 {START_TIMER
3788 #endif
3789             for(i=0; i<64; i++){
3790                 int w= weight[i];
3791
3792                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3793             }
3794 #ifdef REFINE_STATS
3795 STOP_TIMER("rem*w*w")}
3796 {START_TIMER
3797 #endif
3798             s->dsp.fdct(d1);
3799 #ifdef REFINE_STATS
3800 STOP_TIMER("dct")}
3801 #endif
3802         }
3803
3804         if(start_i){
3805             const int level= block[0];
3806             int change, old_coeff;
3807
3808             assert(s->mb_intra);
3809
3810             old_coeff= q*level;
3811
3812             for(change=-1; change<=1; change+=2){
3813                 int new_level= level + change;
3814                 int score, new_coeff;
3815
3816                 new_coeff= q*new_level;
3817                 if(new_coeff >= 2048 || new_coeff < 0)
3818                     continue;
3819
3820                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3821                 if(score<best_score){
3822                     best_score= score;
3823                     best_coeff= 0;
3824                     best_change= change;
3825                     best_unquant_change= new_coeff - old_coeff;
3826                 }
3827             }
3828         }
3829
3830         run=0;
3831         rle_index=0;
3832         run2= run_tab[rle_index++];
3833         prev_level=0;
3834         prev_run=0;
3835
3836         for(i=start_i; i<64; i++){
3837             int j= perm_scantable[i];
3838             const int level= block[j];
3839             int change, old_coeff;
3840
3841             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3842                 break;
3843
3844             if(level){
3845                 if(level<0) old_coeff= qmul*level - qadd;
3846                 else        old_coeff= qmul*level + qadd;
3847                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3848             }else{
3849                 old_coeff=0;
3850                 run2--;
3851                 assert(run2>=0 || i >= last_non_zero );
3852             }
3853
3854             for(change=-1; change<=1; change+=2){
3855                 int new_level= level + change;
3856                 int score, new_coeff, unquant_change;
3857
3858                 score=0;
3859                 if(s->avctx->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3860                    continue;
3861
3862                 if(new_level){
3863                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3864                     else            new_coeff= qmul*new_level + qadd;
3865                     if(new_coeff >= 2048 || new_coeff <= -2048)
3866                         continue;
3867                     //FIXME check for overflow
3868
3869                     if(level){
3870                         if(level < 63 && level > -63){
3871                             if(i < last_non_zero)
3872                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3873                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3874                             else
3875                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3876                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3877                         }
3878                     }else{
3879                         assert(FFABS(new_level)==1);
3880
3881                         if(analyze_gradient){
3882                             int g= d1[ scantable[i] ];
3883                             if(g && (g^new_level) >= 0)
3884                                 continue;
3885                         }
3886
3887                         if(i < last_non_zero){
3888                             int next_i= i + run2 + 1;
3889                             int next_level= block[ perm_scantable[next_i] ] + 64;
3890
3891                             if(next_level&(~127))
3892                                 next_level= 0;
3893
3894                             if(next_i < last_non_zero)
3895                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3896                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3897                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3898                             else
3899                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3900                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3901                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3902                         }else{
3903                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3904                             if(prev_level){
3905                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3906                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3907                             }
3908                         }
3909                     }
3910                 }else{
3911                     new_coeff=0;
3912                     assert(FFABS(level)==1);
3913
3914                     if(i < last_non_zero){
3915                         int next_i= i + run2 + 1;
3916                         int next_level= block[ perm_scantable[next_i] ] + 64;
3917
3918                         if(next_level&(~127))
3919                             next_level= 0;
3920
3921                         if(next_i < last_non_zero)
3922                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3923                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3924                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3925                         else
3926                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3927                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3928                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3929                     }else{
3930                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3931                         if(prev_level){
3932                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3933                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3934                         }
3935                     }
3936                 }
3937
3938                 score *= lambda;
3939
3940                 unquant_change= new_coeff - old_coeff;
3941                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3942
3943                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3944                 if(score<best_score){
3945                     best_score= score;
3946                     best_coeff= i;
3947                     best_change= change;
3948                     best_unquant_change= unquant_change;
3949                 }
3950             }
3951             if(level){
3952                 prev_level= level + 64;
3953                 if(prev_level&(~127))
3954                     prev_level= 0;
3955                 prev_run= run;
3956                 run=0;
3957             }else{
3958                 run++;
3959             }
3960         }
3961 #ifdef REFINE_STATS
3962 STOP_TIMER("iterative step")}
3963 #endif
3964
3965         if(best_change){
3966             int j= perm_scantable[ best_coeff ];
3967
3968             block[j] += best_change;
3969
3970             if(best_coeff > last_non_zero){
3971                 last_non_zero= best_coeff;
3972                 assert(block[j]);
3973 #ifdef REFINE_STATS
3974 after_last++;
3975 #endif
3976             }else{
3977 #ifdef REFINE_STATS
3978 if(block[j]){
3979     if(block[j] - best_change){
3980         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3981             raise++;
3982         }else{
3983             lower++;
3984         }
3985     }else{
3986         from_zero++;
3987     }
3988 }else{
3989     to_zero++;
3990 }
3991 #endif
3992                 for(; last_non_zero>=start_i; last_non_zero--){
3993                     if(block[perm_scantable[last_non_zero]])
3994                         break;
3995                 }
3996             }
3997 #ifdef REFINE_STATS
3998 count++;
3999 if(256*256*256*64 % count == 0){
4000     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4001 }
4002 #endif
4003             run=0;
4004             rle_index=0;
4005             for(i=start_i; i<=last_non_zero; i++){
4006                 int j= perm_scantable[i];
4007                 const int level= block[j];
4008
4009                  if(level){
4010                      run_tab[rle_index++]=run;
4011                      run=0;
4012                  }else{
4013                      run++;
4014                  }
4015             }
4016
4017             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4018         }else{
4019             break;
4020         }
4021     }
4022 #ifdef REFINE_STATS
4023 if(last_non_zero>0){
4024 STOP_TIMER("iterative search")
4025 }
4026 }
4027 #endif
4028
4029     return last_non_zero;
4030 }
4031
4032 int dct_quantize_c(MpegEncContext *s,
4033                         DCTELEM *block, int n,
4034                         int qscale, int *overflow)
4035 {
4036     int i, j, level, last_non_zero, q, start_i;
4037     const int *qmat;
4038     const uint8_t *scantable= s->intra_scantable.scantable;
4039     int bias;
4040     int max=0;
4041     unsigned int threshold1, threshold2;
4042
4043     s->dsp.fdct (block);
4044
4045     if(s->dct_error_sum)
4046         s->denoise_dct(s, block);
4047
4048     if (s->mb_intra) {
4049         if (!s->h263_aic) {
4050             if (n < 4)
4051                 q = s->y_dc_scale;
4052             else
4053                 q = s->c_dc_scale;
4054             q = q << 3;
4055         } else
4056             /* For AIC we skip quant/dequant of INTRADC */
4057             q = 1 << 3;
4058
4059         /* note: block[0] is assumed to be positive */
4060         block[0] = (block[0] + (q >> 1)) / q;
4061         start_i = 1;
4062         last_non_zero = 0;
4063         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4064         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4065     } else {
4066         start_i = 0;
4067         last_non_zero = -1;
4068         qmat = s->q_inter_matrix[qscale];
4069         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4070     }
4071     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4072     threshold2= (threshold1<<1);
4073     for(i=63;i>=start_i;i--) {
4074         j = scantable[i];
4075         level = block[j] * qmat[j];
4076
4077         if(((unsigned)(level+threshold1))>threshold2){
4078             last_non_zero = i;
4079             break;
4080         }else{
4081             block[j]=0;
4082         }
4083     }
4084     for(i=start_i; i<=last_non_zero; i++) {
4085         j = scantable[i];
4086         level = block[j] * qmat[j];
4087
4088 //        if(   bias+level >= (1<<QMAT_SHIFT)
4089 //           || bias-level >= (1<<QMAT_SHIFT)){
4090         if(((unsigned)(level+threshold1))>threshold2){
4091             if(level>0){
4092                 level= (bias + level)>>QMAT_SHIFT;
4093                 block[j]= level;
4094             }else{
4095                 level= (bias - level)>>QMAT_SHIFT;
4096                 block[j]= -level;
4097             }
4098             max |=level;
4099         }else{
4100             block[j]=0;
4101         }
4102     }
4103     *overflow= s->max_qcoeff < max; //overflow might have happened
4104
4105     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4106     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4107         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4108
4109     return last_non_zero;
4110 }
4111
4112 #define OFFSET(x) offsetof(MpegEncContext, x)
4113 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4114 static const AVOption h263_options[] = {
4115     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4116     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4117     { NULL },
4118 };
4119
4120 static const AVClass h263_class = {
4121     .class_name = "H.263 encoder",
4122     .item_name  = av_default_item_name,
4123     .option     = h263_options,
4124     .version    = LIBAVUTIL_VERSION_INT,
4125 };
4126
4127 AVCodec ff_h263_encoder = {
4128     .name           = "h263",
4129     .type           = AVMEDIA_TYPE_VIDEO,
4130     .id             = CODEC_ID_H263,
4131     .priv_data_size = sizeof(MpegEncContext),
4132     .init           = MPV_encode_init,
4133     .encode         = MPV_encode_picture,
4134     .close          = MPV_encode_end,
4135     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4136     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4137     .priv_class     = &h263_class,
4138 };
4139
4140 static const AVOption h263p_options[] = {
4141     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4142     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4143     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4144     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4145     { NULL },
4146 };
4147 static const AVClass h263p_class = {
4148     .class_name = "H.263p encoder",
4149     .item_name  = av_default_item_name,
4150     .option     = h263p_options,
4151     .version    = LIBAVUTIL_VERSION_INT,
4152 };
4153
4154 AVCodec ff_h263p_encoder = {
4155     .name           = "h263p",
4156     .type           = AVMEDIA_TYPE_VIDEO,
4157     .id             = CODEC_ID_H263P,
4158     .priv_data_size = sizeof(MpegEncContext),
4159     .init           = MPV_encode_init,
4160     .encode         = MPV_encode_picture,
4161     .close          = MPV_encode_end,
4162     .capabilities = CODEC_CAP_SLICE_THREADS,
4163     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4164     .long_name= NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4165     .priv_class     = &h263p_class,
4166 };
4167
4168 AVCodec ff_msmpeg4v2_encoder = {
4169     .name           = "msmpeg4v2",
4170     .type           = AVMEDIA_TYPE_VIDEO,
4171     .id             = CODEC_ID_MSMPEG4V2,
4172     .priv_data_size = sizeof(MpegEncContext),
4173     .init           = MPV_encode_init,
4174     .encode         = MPV_encode_picture,
4175     .close          = MPV_encode_end,
4176     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4177     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4178 };
4179
4180 AVCodec ff_msmpeg4v3_encoder = {
4181     .name           = "msmpeg4",
4182     .type           = AVMEDIA_TYPE_VIDEO,
4183     .id             = CODEC_ID_MSMPEG4V3,
4184     .priv_data_size = sizeof(MpegEncContext),
4185     .init           = MPV_encode_init,
4186     .encode         = MPV_encode_picture,
4187     .close          = MPV_encode_end,
4188     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4189     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4190 };
4191
4192 AVCodec ff_wmv1_encoder = {
4193     .name           = "wmv1",
4194     .type           = AVMEDIA_TYPE_VIDEO,
4195     .id             = CODEC_ID_WMV1,
4196     .priv_data_size = sizeof(MpegEncContext),
4197     .init           = MPV_encode_init,
4198     .encode         = MPV_encode_picture,
4199     .close          = MPV_encode_end,
4200     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4201     .long_name= NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4202 };