]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
ac3dec: make downmix() take array of pointers to channel data
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/pixdesc.h"
33 #include "libavutil/opt.h"
34 #include "avcodec.h"
35 #include "dsputil.h"
36 #include "mpegvideo.h"
37 #include "h263.h"
38 #include "mathops.h"
39 #include "mjpegenc.h"
40 #include "msmpeg4.h"
41 #include "faandct.h"
42 #include "thread.h"
43 #include "aandcttab.h"
44 #include "flv.h"
45 #include "mpeg4video.h"
46 #include "internal.h"
47 #include "bytestream.h"
48 #include <limits.h>
49
50 //#undef NDEBUG
51 //#include <assert.h>
52
53 static int encode_picture(MpegEncContext *s, int picture_number);
54 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
55 static int sse_mb(MpegEncContext *s);
56 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
57 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
58
59 /* enable all paranoid tests for rounding, overflows, etc... */
60 //#define PARANOID
61
62 //#define DEBUG
63
64 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
65 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
66
67 const AVOption ff_mpv_generic_options[] = {
68     FF_MPV_COMMON_OPTS
69     { NULL },
70 };
71
72 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
73                        uint16_t (*qmat16)[2][64],
74                        const uint16_t *quant_matrix,
75                        int bias, int qmin, int qmax, int intra)
76 {
77     int qscale;
78     int shift = 0;
79
80     for (qscale = qmin; qscale <= qmax; qscale++) {
81         int i;
82         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
83             dsp->fdct == ff_jpeg_fdct_islow_10 ||
84             dsp->fdct == ff_faandct) {
85             for (i = 0; i < 64; i++) {
86                 const int j = dsp->idct_permutation[i];
87                 /* 16 <= qscale * quant_matrix[i] <= 7905
88                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
89                  *             19952 <=              x  <= 249205026
90                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
91                  *           3444240 >= (1 << 36) / (x) >= 275 */
92
93                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
94                                         (qscale * quant_matrix[j]));
95             }
96         } else if (dsp->fdct == ff_fdct_ifast) {
97             for (i = 0; i < 64; i++) {
98                 const int j = dsp->idct_permutation[i];
99                 /* 16 <= qscale * quant_matrix[i] <= 7905
100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
101                  *             19952 <=              x  <= 249205026
102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
103                  *           3444240 >= (1 << 36) / (x) >= 275 */
104
105                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
106                                         (ff_aanscales[i] * qscale *
107                                          quant_matrix[j]));
108             }
109         } else {
110             for (i = 0; i < 64; i++) {
111                 const int j = dsp->idct_permutation[i];
112                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
113                  * Assume x = qscale * quant_matrix[i]
114                  * So             16 <=              x  <= 7905
115                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
116                  * so          32768 >= (1 << 19) / (x) >= 67 */
117                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
118                                         (qscale * quant_matrix[j]));
119                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
120                 //                    (qscale * quant_matrix[i]);
121                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
122                                        (qscale * quant_matrix[j]);
123
124                 if (qmat16[qscale][0][i] == 0 ||
125                     qmat16[qscale][0][i] == 128 * 256)
126                     qmat16[qscale][0][i] = 128 * 256 - 1;
127                 qmat16[qscale][1][i] =
128                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
129                                 qmat16[qscale][0][i]);
130             }
131         }
132
133         for (i = intra; i < 64; i++) {
134             int64_t max = 8191;
135             if (dsp->fdct == ff_fdct_ifast) {
136                 max = (8191LL * ff_aanscales[i]) >> 14;
137             }
138             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
139                 shift++;
140             }
141         }
142     }
143     if (shift) {
144         av_log(NULL, AV_LOG_INFO,
145                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
146                QMAT_SHIFT - shift);
147     }
148 }
149
150 static inline void update_qscale(MpegEncContext *s)
151 {
152     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
153                 (FF_LAMBDA_SHIFT + 7);
154     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
155
156     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
157                  FF_LAMBDA_SHIFT;
158 }
159
160 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
161 {
162     int i;
163
164     if (matrix) {
165         put_bits(pb, 1, 1);
166         for (i = 0; i < 64; i++) {
167             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
168         }
169     } else
170         put_bits(pb, 1, 0);
171 }
172
173 /**
174  * init s->current_picture.qscale_table from s->lambda_table
175  */
176 void ff_init_qscale_tab(MpegEncContext *s)
177 {
178     int8_t * const qscale_table = s->current_picture.f.qscale_table;
179     int i;
180
181     for (i = 0; i < s->mb_num; i++) {
182         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
183         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
184         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
185                                                   s->avctx->qmax);
186     }
187 }
188
189 static void copy_picture_attributes(MpegEncContext *s,
190                                     AVFrame *dst,
191                                     AVFrame *src)
192 {
193     int i;
194
195     dst->pict_type              = src->pict_type;
196     dst->quality                = src->quality;
197     dst->coded_picture_number   = src->coded_picture_number;
198     dst->display_picture_number = src->display_picture_number;
199     //dst->reference              = src->reference;
200     dst->pts                    = src->pts;
201     dst->interlaced_frame       = src->interlaced_frame;
202     dst->top_field_first        = src->top_field_first;
203
204     if (s->avctx->me_threshold) {
205         if (!src->motion_val[0])
206             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
207         if (!src->mb_type)
208             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
209         if (!src->ref_index[0])
210             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
211         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
212             av_log(s->avctx, AV_LOG_ERROR,
213                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
214                    src->motion_subsample_log2, dst->motion_subsample_log2);
215
216         memcpy(dst->mb_type, src->mb_type,
217                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
218
219         for (i = 0; i < 2; i++) {
220             int stride = ((16 * s->mb_width ) >>
221                           src->motion_subsample_log2) + 1;
222             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
223
224             if (src->motion_val[i] &&
225                 src->motion_val[i] != dst->motion_val[i]) {
226                 memcpy(dst->motion_val[i], src->motion_val[i],
227                        2 * stride * height * sizeof(int16_t));
228             }
229             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
230                 memcpy(dst->ref_index[i], src->ref_index[i],
231                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
232             }
233         }
234     }
235 }
236
237 static void update_duplicate_context_after_me(MpegEncContext *dst,
238                                               MpegEncContext *src)
239 {
240 #define COPY(a) dst->a= src->a
241     COPY(pict_type);
242     COPY(current_picture);
243     COPY(f_code);
244     COPY(b_code);
245     COPY(qscale);
246     COPY(lambda);
247     COPY(lambda2);
248     COPY(picture_in_gop_number);
249     COPY(gop_picture_number);
250     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
251     COPY(progressive_frame);    // FIXME don't set in encode_header
252     COPY(partitioned_frame);    // FIXME don't set in encode_header
253 #undef COPY
254 }
255
256 /**
257  * Set the given MpegEncContext to defaults for encoding.
258  * the changed fields will not depend upon the prior state of the MpegEncContext.
259  */
260 static void MPV_encode_defaults(MpegEncContext *s)
261 {
262     int i;
263     ff_MPV_common_defaults(s);
264
265     for (i = -16; i < 16; i++) {
266         default_fcode_tab[i + MAX_MV] = 1;
267     }
268     s->me.mv_penalty = default_mv_penalty;
269     s->fcode_tab     = default_fcode_tab;
270 }
271
272 /* init video encoder */
273 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
274 {
275     MpegEncContext *s = avctx->priv_data;
276     int i;
277     int chroma_h_shift, chroma_v_shift;
278
279     MPV_encode_defaults(s);
280
281     switch (avctx->codec_id) {
282     case AV_CODEC_ID_MPEG2VIDEO:
283         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
284             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
285             av_log(avctx, AV_LOG_ERROR,
286                    "only YUV420 and YUV422 are supported\n");
287             return -1;
288         }
289         break;
290     case AV_CODEC_ID_LJPEG:
291         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
292             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
293             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
294             avctx->pix_fmt != AV_PIX_FMT_BGRA     &&
295             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
296               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
297               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
298              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
299             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
300             return -1;
301         }
302         break;
303     case AV_CODEC_ID_MJPEG:
304         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
305             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
306             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
307               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
308              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
309             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
310             return -1;
311         }
312         break;
313     default:
314         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
315             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
316             return -1;
317         }
318     }
319
320     switch (avctx->pix_fmt) {
321     case AV_PIX_FMT_YUVJ422P:
322     case AV_PIX_FMT_YUV422P:
323         s->chroma_format = CHROMA_422;
324         break;
325     case AV_PIX_FMT_YUVJ420P:
326     case AV_PIX_FMT_YUV420P:
327     default:
328         s->chroma_format = CHROMA_420;
329         break;
330     }
331
332     s->bit_rate = avctx->bit_rate;
333     s->width    = avctx->width;
334     s->height   = avctx->height;
335     if (avctx->gop_size > 600 &&
336         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
337         av_log(avctx, AV_LOG_ERROR,
338                "Warning keyframe interval too large! reducing it ...\n");
339         avctx->gop_size = 600;
340     }
341     s->gop_size     = avctx->gop_size;
342     s->avctx        = avctx;
343     s->flags        = avctx->flags;
344     s->flags2       = avctx->flags2;
345     s->max_b_frames = avctx->max_b_frames;
346     s->codec_id     = avctx->codec->id;
347 #if FF_API_MPV_GLOBAL_OPTS
348     if (avctx->luma_elim_threshold)
349         s->luma_elim_threshold   = avctx->luma_elim_threshold;
350     if (avctx->chroma_elim_threshold)
351         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
352 #endif
353     s->strict_std_compliance = avctx->strict_std_compliance;
354     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
355     s->mpeg_quant         = avctx->mpeg_quant;
356     s->rtp_mode           = !!avctx->rtp_payload_size;
357     s->intra_dc_precision = avctx->intra_dc_precision;
358     s->user_specified_pts = AV_NOPTS_VALUE;
359
360     if (s->gop_size <= 1) {
361         s->intra_only = 1;
362         s->gop_size   = 12;
363     } else {
364         s->intra_only = 0;
365     }
366
367     s->me_method = avctx->me_method;
368
369     /* Fixed QSCALE */
370     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
371
372 #if FF_API_MPV_GLOBAL_OPTS
373     if (s->flags & CODEC_FLAG_QP_RD)
374         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
375 #endif
376
377     s->adaptive_quant = (s->avctx->lumi_masking ||
378                          s->avctx->dark_masking ||
379                          s->avctx->temporal_cplx_masking ||
380                          s->avctx->spatial_cplx_masking  ||
381                          s->avctx->p_masking      ||
382                          s->avctx->border_masking ||
383                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
384                         !s->fixed_qscale;
385
386     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
387
388     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
389         av_log(avctx, AV_LOG_ERROR,
390                "a vbv buffer size is needed, "
391                "for encoding with a maximum bitrate\n");
392         return -1;
393     }
394
395     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
396         av_log(avctx, AV_LOG_INFO,
397                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
398     }
399
400     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
401         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
402         return -1;
403     }
404
405     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
406         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
407         return -1;
408     }
409
410     if (avctx->rc_max_rate &&
411         avctx->rc_max_rate == avctx->bit_rate &&
412         avctx->rc_max_rate != avctx->rc_min_rate) {
413         av_log(avctx, AV_LOG_INFO,
414                "impossible bitrate constraints, this will fail\n");
415     }
416
417     if (avctx->rc_buffer_size &&
418         avctx->bit_rate * (int64_t)avctx->time_base.num >
419             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
420         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
421         return -1;
422     }
423
424     if (!s->fixed_qscale &&
425         avctx->bit_rate * av_q2d(avctx->time_base) >
426             avctx->bit_rate_tolerance) {
427         av_log(avctx, AV_LOG_ERROR,
428                "bitrate tolerance too small for bitrate\n");
429         return -1;
430     }
431
432     if (s->avctx->rc_max_rate &&
433         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
434         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
435          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
436         90000LL * (avctx->rc_buffer_size - 1) >
437             s->avctx->rc_max_rate * 0xFFFFLL) {
438         av_log(avctx, AV_LOG_INFO,
439                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
440                "specified vbv buffer is too large for the given bitrate!\n");
441     }
442
443     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
444         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
445         s->codec_id != AV_CODEC_ID_FLV1) {
446         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
447         return -1;
448     }
449
450     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
451         av_log(avctx, AV_LOG_ERROR,
452                "OBMC is only supported with simple mb decision\n");
453         return -1;
454     }
455
456     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
457         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
458         return -1;
459     }
460
461     if (s->max_b_frames                    &&
462         s->codec_id != AV_CODEC_ID_MPEG4      &&
463         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
464         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
465         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
466         return -1;
467     }
468
469     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
470          s->codec_id == AV_CODEC_ID_H263  ||
471          s->codec_id == AV_CODEC_ID_H263P) &&
472         (avctx->sample_aspect_ratio.num > 255 ||
473          avctx->sample_aspect_ratio.den > 255)) {
474         av_log(avctx, AV_LOG_ERROR,
475                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
476                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
477         return -1;
478     }
479
480     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
481         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
482         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
483         return -1;
484     }
485
486     // FIXME mpeg2 uses that too
487     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
488         av_log(avctx, AV_LOG_ERROR,
489                "mpeg2 style quantization not supported by codec\n");
490         return -1;
491     }
492
493 #if FF_API_MPV_GLOBAL_OPTS
494     if (s->flags & CODEC_FLAG_CBP_RD)
495         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
496 #endif
497
498     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
499         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
500         return -1;
501     }
502
503     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
504         s->avctx->mb_decision != FF_MB_DECISION_RD) {
505         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
506         return -1;
507     }
508
509     if (s->avctx->scenechange_threshold < 1000000000 &&
510         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
511         av_log(avctx, AV_LOG_ERROR,
512                "closed gop with scene change detection are not supported yet, "
513                "set threshold to 1000000000\n");
514         return -1;
515     }
516
517     if (s->flags & CODEC_FLAG_LOW_DELAY) {
518         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
519             av_log(avctx, AV_LOG_ERROR,
520                   "low delay forcing is only available for mpeg2\n");
521             return -1;
522         }
523         if (s->max_b_frames != 0) {
524             av_log(avctx, AV_LOG_ERROR,
525                    "b frames cannot be used with low delay\n");
526             return -1;
527         }
528     }
529
530     if (s->q_scale_type == 1) {
531         if (avctx->qmax > 12) {
532             av_log(avctx, AV_LOG_ERROR,
533                    "non linear quant only supports qmax <= 12 currently\n");
534             return -1;
535         }
536     }
537
538     if (s->avctx->thread_count > 1         &&
539         s->codec_id != AV_CODEC_ID_MPEG4      &&
540         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
541         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
542         (s->codec_id != AV_CODEC_ID_H263P)) {
543         av_log(avctx, AV_LOG_ERROR,
544                "multi threaded encoding not supported by codec\n");
545         return -1;
546     }
547
548     if (s->avctx->thread_count < 1) {
549         av_log(avctx, AV_LOG_ERROR,
550                "automatic thread number detection not supported by codec,"
551                "patch welcome\n");
552         return -1;
553     }
554
555     if (s->avctx->thread_count > 1)
556         s->rtp_mode = 1;
557
558     if (!avctx->time_base.den || !avctx->time_base.num) {
559         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
560         return -1;
561     }
562
563     i = (INT_MAX / 2 + 128) >> 8;
564     if (avctx->me_threshold >= i) {
565         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
566                i - 1);
567         return -1;
568     }
569     if (avctx->mb_threshold >= i) {
570         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
571                i - 1);
572         return -1;
573     }
574
575     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
576         av_log(avctx, AV_LOG_INFO,
577                "notice: b_frame_strategy only affects the first pass\n");
578         avctx->b_frame_strategy = 0;
579     }
580
581     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
582     if (i > 1) {
583         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
584         avctx->time_base.den /= i;
585         avctx->time_base.num /= i;
586         //return -1;
587     }
588
589     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
590         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
591         // (a + x * 3 / 8) / x
592         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
593         s->inter_quant_bias = 0;
594     } else {
595         s->intra_quant_bias = 0;
596         // (a - x / 4) / x
597         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
598     }
599
600     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
601         s->intra_quant_bias = avctx->intra_quant_bias;
602     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
603         s->inter_quant_bias = avctx->inter_quant_bias;
604
605     av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
606                                      &chroma_v_shift);
607
608     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
609         s->avctx->time_base.den > (1 << 16) - 1) {
610         av_log(avctx, AV_LOG_ERROR,
611                "timebase %d/%d not supported by MPEG 4 standard, "
612                "the maximum admitted value for the timebase denominator "
613                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
614                (1 << 16) - 1);
615         return -1;
616     }
617     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
618
619 #if FF_API_MPV_GLOBAL_OPTS
620     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
621         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
622     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
623         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
624     if (avctx->quantizer_noise_shaping)
625         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
626 #endif
627
628     switch (avctx->codec->id) {
629     case AV_CODEC_ID_MPEG1VIDEO:
630         s->out_format = FMT_MPEG1;
631         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
632         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
633         break;
634     case AV_CODEC_ID_MPEG2VIDEO:
635         s->out_format = FMT_MPEG1;
636         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
637         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
638         s->rtp_mode   = 1;
639         break;
640     case AV_CODEC_ID_LJPEG:
641     case AV_CODEC_ID_MJPEG:
642         s->out_format = FMT_MJPEG;
643         s->intra_only = 1; /* force intra only for jpeg */
644         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
645             avctx->pix_fmt   == AV_PIX_FMT_BGRA) {
646             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
647             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
648             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
649         } else {
650             s->mjpeg_vsample[0] = 2;
651             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
652             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
653             s->mjpeg_hsample[0] = 2;
654             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
655             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
656         }
657         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
658             ff_mjpeg_encode_init(s) < 0)
659             return -1;
660         avctx->delay = 0;
661         s->low_delay = 1;
662         break;
663     case AV_CODEC_ID_H261:
664         if (!CONFIG_H261_ENCODER)
665             return -1;
666         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
667             av_log(avctx, AV_LOG_ERROR,
668                    "The specified picture size of %dx%d is not valid for the "
669                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
670                     s->width, s->height);
671             return -1;
672         }
673         s->out_format = FMT_H261;
674         avctx->delay  = 0;
675         s->low_delay  = 1;
676         break;
677     case AV_CODEC_ID_H263:
678         if (!CONFIG_H263_ENCODER)
679         return -1;
680         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
681                              s->width, s->height) == 8) {
682             av_log(avctx, AV_LOG_INFO,
683                    "The specified picture size of %dx%d is not valid for "
684                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
685                    "352x288, 704x576, and 1408x1152."
686                    "Try H.263+.\n", s->width, s->height);
687             return -1;
688         }
689         s->out_format = FMT_H263;
690         avctx->delay  = 0;
691         s->low_delay  = 1;
692         break;
693     case AV_CODEC_ID_H263P:
694         s->out_format = FMT_H263;
695         s->h263_plus  = 1;
696         /* Fx */
697         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
698         s->modified_quant  = s->h263_aic;
699         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
700         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
701
702         /* /Fx */
703         /* These are just to be sure */
704         avctx->delay = 0;
705         s->low_delay = 1;
706         break;
707     case AV_CODEC_ID_FLV1:
708         s->out_format      = FMT_H263;
709         s->h263_flv        = 2; /* format = 1; 11-bit codes */
710         s->unrestricted_mv = 1;
711         s->rtp_mode  = 0; /* don't allow GOB */
712         avctx->delay = 0;
713         s->low_delay = 1;
714         break;
715     case AV_CODEC_ID_RV10:
716         s->out_format = FMT_H263;
717         avctx->delay  = 0;
718         s->low_delay  = 1;
719         break;
720     case AV_CODEC_ID_RV20:
721         s->out_format      = FMT_H263;
722         avctx->delay       = 0;
723         s->low_delay       = 1;
724         s->modified_quant  = 1;
725         s->h263_aic        = 1;
726         s->h263_plus       = 1;
727         s->loop_filter     = 1;
728         s->unrestricted_mv = 0;
729         break;
730     case AV_CODEC_ID_MPEG4:
731         s->out_format      = FMT_H263;
732         s->h263_pred       = 1;
733         s->unrestricted_mv = 1;
734         s->low_delay       = s->max_b_frames ? 0 : 1;
735         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
736         break;
737     case AV_CODEC_ID_MSMPEG4V2:
738         s->out_format      = FMT_H263;
739         s->h263_pred       = 1;
740         s->unrestricted_mv = 1;
741         s->msmpeg4_version = 2;
742         avctx->delay       = 0;
743         s->low_delay       = 1;
744         break;
745     case AV_CODEC_ID_MSMPEG4V3:
746         s->out_format        = FMT_H263;
747         s->h263_pred         = 1;
748         s->unrestricted_mv   = 1;
749         s->msmpeg4_version   = 3;
750         s->flipflop_rounding = 1;
751         avctx->delay         = 0;
752         s->low_delay         = 1;
753         break;
754     case AV_CODEC_ID_WMV1:
755         s->out_format        = FMT_H263;
756         s->h263_pred         = 1;
757         s->unrestricted_mv   = 1;
758         s->msmpeg4_version   = 4;
759         s->flipflop_rounding = 1;
760         avctx->delay         = 0;
761         s->low_delay         = 1;
762         break;
763     case AV_CODEC_ID_WMV2:
764         s->out_format        = FMT_H263;
765         s->h263_pred         = 1;
766         s->unrestricted_mv   = 1;
767         s->msmpeg4_version   = 5;
768         s->flipflop_rounding = 1;
769         avctx->delay         = 0;
770         s->low_delay         = 1;
771         break;
772     default:
773         return -1;
774     }
775
776     avctx->has_b_frames = !s->low_delay;
777
778     s->encoding = 1;
779
780     s->progressive_frame    =
781     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
782                                                 CODEC_FLAG_INTERLACED_ME) ||
783                                 s->alternate_scan);
784
785     /* init */
786     if (ff_MPV_common_init(s) < 0)
787         return -1;
788
789     if (ARCH_X86)
790         ff_MPV_encode_init_x86(s);
791
792     if (!s->dct_quantize)
793         s->dct_quantize = ff_dct_quantize_c;
794     if (!s->denoise_dct)
795         s->denoise_dct  = denoise_dct_c;
796     s->fast_dct_quantize = s->dct_quantize;
797     if (avctx->trellis)
798         s->dct_quantize  = dct_quantize_trellis_c;
799
800     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
801         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
802
803     s->quant_precision = 5;
804
805     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
806     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
807
808     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
809         ff_h261_encode_init(s);
810     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
811         ff_h263_encode_init(s);
812     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
813         ff_msmpeg4_encode_init(s);
814     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
815         && s->out_format == FMT_MPEG1)
816         ff_mpeg1_encode_init(s);
817
818     /* init q matrix */
819     for (i = 0; i < 64; i++) {
820         int j = s->dsp.idct_permutation[i];
821         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
822             s->mpeg_quant) {
823             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
824             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
825         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
826             s->intra_matrix[j] =
827             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
828         } else {
829             /* mpeg1/2 */
830             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
831             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
832         }
833         if (s->avctx->intra_matrix)
834             s->intra_matrix[j] = s->avctx->intra_matrix[i];
835         if (s->avctx->inter_matrix)
836             s->inter_matrix[j] = s->avctx->inter_matrix[i];
837     }
838
839     /* precompute matrix */
840     /* for mjpeg, we do include qscale in the matrix */
841     if (s->out_format != FMT_MJPEG) {
842         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
843                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
844                           31, 1);
845         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
846                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
847                           31, 0);
848     }
849
850     if (ff_rate_control_init(s) < 0)
851         return -1;
852
853     return 0;
854 }
855
856 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
857 {
858     MpegEncContext *s = avctx->priv_data;
859
860     ff_rate_control_uninit(s);
861
862     ff_MPV_common_end(s);
863     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
864         s->out_format == FMT_MJPEG)
865         ff_mjpeg_encode_close(s);
866
867     av_freep(&avctx->extradata);
868
869     return 0;
870 }
871
872 static int get_sae(uint8_t *src, int ref, int stride)
873 {
874     int x,y;
875     int acc = 0;
876
877     for (y = 0; y < 16; y++) {
878         for (x = 0; x < 16; x++) {
879             acc += FFABS(src[x + y * stride] - ref);
880         }
881     }
882
883     return acc;
884 }
885
886 static int get_intra_count(MpegEncContext *s, uint8_t *src,
887                            uint8_t *ref, int stride)
888 {
889     int x, y, w, h;
890     int acc = 0;
891
892     w = s->width  & ~15;
893     h = s->height & ~15;
894
895     for (y = 0; y < h; y += 16) {
896         for (x = 0; x < w; x += 16) {
897             int offset = x + y * stride;
898             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
899                                      16);
900             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
901             int sae  = get_sae(src + offset, mean, stride);
902
903             acc += sae + 500 < sad;
904         }
905     }
906     return acc;
907 }
908
909
910 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
911 {
912     AVFrame *pic = NULL;
913     int64_t pts;
914     int i;
915     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
916                                                  (s->low_delay ? 0 : 1);
917     int direct = 1;
918
919     if (pic_arg) {
920         pts = pic_arg->pts;
921         pic_arg->display_picture_number = s->input_picture_number++;
922
923         if (pts != AV_NOPTS_VALUE) {
924             if (s->user_specified_pts != AV_NOPTS_VALUE) {
925                 int64_t time = pts;
926                 int64_t last = s->user_specified_pts;
927
928                 if (time <= last) {
929                     av_log(s->avctx, AV_LOG_ERROR,
930                            "Error, Invalid timestamp=%"PRId64", "
931                            "last=%"PRId64"\n", pts, s->user_specified_pts);
932                     return -1;
933                 }
934
935                 if (!s->low_delay && pic_arg->display_picture_number == 1)
936                     s->dts_delta = time - last;
937             }
938             s->user_specified_pts = pts;
939         } else {
940             if (s->user_specified_pts != AV_NOPTS_VALUE) {
941                 s->user_specified_pts =
942                 pts = s->user_specified_pts + 1;
943                 av_log(s->avctx, AV_LOG_INFO,
944                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
945                        pts);
946             } else {
947                 pts = pic_arg->display_picture_number;
948             }
949         }
950     }
951
952   if (pic_arg) {
953     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
954         direct = 0;
955     if (pic_arg->linesize[0] != s->linesize)
956         direct = 0;
957     if (pic_arg->linesize[1] != s->uvlinesize)
958         direct = 0;
959     if (pic_arg->linesize[2] != s->uvlinesize)
960         direct = 0;
961
962     av_dlog(s->avctx, "%d %d %d %d\n", pic_arg->linesize[0],
963             pic_arg->linesize[1], s->linesize, s->uvlinesize);
964
965     if (direct) {
966         i = ff_find_unused_picture(s, 1);
967         if (i < 0)
968             return i;
969
970         pic = &s->picture[i].f;
971         pic->reference = 3;
972
973         for (i = 0; i < 4; i++) {
974             pic->data[i]     = pic_arg->data[i];
975             pic->linesize[i] = pic_arg->linesize[i];
976         }
977         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
978             return -1;
979         }
980     } else {
981         i = ff_find_unused_picture(s, 0);
982         if (i < 0)
983             return i;
984
985         pic = &s->picture[i].f;
986         pic->reference = 3;
987
988         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
989             return -1;
990         }
991
992         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
993             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
994             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
995             // empty
996         } else {
997             int h_chroma_shift, v_chroma_shift;
998             av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
999                                              &h_chroma_shift,
1000                                              &v_chroma_shift);
1001
1002             for (i = 0; i < 3; i++) {
1003                 int src_stride = pic_arg->linesize[i];
1004                 int dst_stride = i ? s->uvlinesize : s->linesize;
1005                 int h_shift = i ? h_chroma_shift : 0;
1006                 int v_shift = i ? v_chroma_shift : 0;
1007                 int w = s->width  >> h_shift;
1008                 int h = s->height >> v_shift;
1009                 uint8_t *src = pic_arg->data[i];
1010                 uint8_t *dst = pic->data[i];
1011
1012                 if (!s->avctx->rc_buffer_size)
1013                     dst += INPLACE_OFFSET;
1014
1015                 if (src_stride == dst_stride)
1016                     memcpy(dst, src, src_stride * h);
1017                 else {
1018                     while (h--) {
1019                         memcpy(dst, src, w);
1020                         dst += dst_stride;
1021                         src += src_stride;
1022                     }
1023                 }
1024             }
1025         }
1026     }
1027     copy_picture_attributes(s, pic, pic_arg);
1028     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1029   }
1030
1031     /* shift buffer entries */
1032     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1033         s->input_picture[i - 1] = s->input_picture[i];
1034
1035     s->input_picture[encoding_delay] = (Picture*) pic;
1036
1037     return 0;
1038 }
1039
1040 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1041 {
1042     int x, y, plane;
1043     int score = 0;
1044     int64_t score64 = 0;
1045
1046     for (plane = 0; plane < 3; plane++) {
1047         const int stride = p->f.linesize[plane];
1048         const int bw = plane ? 1 : 2;
1049         for (y = 0; y < s->mb_height * bw; y++) {
1050             for (x = 0; x < s->mb_width * bw; x++) {
1051                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1052                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1053                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1054                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1055
1056                 switch (s->avctx->frame_skip_exp) {
1057                 case 0: score    =  FFMAX(score, v);          break;
1058                 case 1: score   += FFABS(v);                  break;
1059                 case 2: score   += v * v;                     break;
1060                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1061                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1062                 }
1063             }
1064         }
1065     }
1066
1067     if (score)
1068         score64 = score;
1069
1070     if (score64 < s->avctx->frame_skip_threshold)
1071         return 1;
1072     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1073         return 1;
1074     return 0;
1075 }
1076
1077 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1078 {
1079     AVPacket pkt = { 0 };
1080     int ret, got_output;
1081
1082     av_init_packet(&pkt);
1083     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1084     if (ret < 0)
1085         return ret;
1086
1087     ret = pkt.size;
1088     av_free_packet(&pkt);
1089     return ret;
1090 }
1091
1092 static int estimate_best_b_count(MpegEncContext *s)
1093 {
1094     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1095     AVCodecContext *c = avcodec_alloc_context3(NULL);
1096     AVFrame input[FF_MAX_B_FRAMES + 2];
1097     const int scale = s->avctx->brd_scale;
1098     int i, j, out_size, p_lambda, b_lambda, lambda2;
1099     int64_t best_rd  = INT64_MAX;
1100     int best_b_count = -1;
1101
1102     assert(scale >= 0 && scale <= 3);
1103
1104     //emms_c();
1105     //s->next_picture_ptr->quality;
1106     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1107     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1108     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1109     if (!b_lambda) // FIXME we should do this somewhere else
1110         b_lambda = p_lambda;
1111     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1112                FF_LAMBDA_SHIFT;
1113
1114     c->width        = s->width  >> scale;
1115     c->height       = s->height >> scale;
1116     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1117                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1118     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1119     c->mb_decision  = s->avctx->mb_decision;
1120     c->me_cmp       = s->avctx->me_cmp;
1121     c->mb_cmp       = s->avctx->mb_cmp;
1122     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1123     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1124     c->time_base    = s->avctx->time_base;
1125     c->max_b_frames = s->max_b_frames;
1126
1127     if (avcodec_open2(c, codec, NULL) < 0)
1128         return -1;
1129
1130     for (i = 0; i < s->max_b_frames + 2; i++) {
1131         int ysize = c->width * c->height;
1132         int csize = (c->width / 2) * (c->height / 2);
1133         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1134                                                 s->next_picture_ptr;
1135
1136         avcodec_get_frame_defaults(&input[i]);
1137         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1138         input[i].data[1]     = input[i].data[0] + ysize;
1139         input[i].data[2]     = input[i].data[1] + csize;
1140         input[i].linesize[0] = c->width;
1141         input[i].linesize[1] =
1142         input[i].linesize[2] = c->width / 2;
1143
1144         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1145             pre_input = *pre_input_ptr;
1146
1147             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1148                 pre_input.f.data[0] += INPLACE_OFFSET;
1149                 pre_input.f.data[1] += INPLACE_OFFSET;
1150                 pre_input.f.data[2] += INPLACE_OFFSET;
1151             }
1152
1153             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1154                                  pre_input.f.data[0], pre_input.f.linesize[0],
1155                                  c->width,      c->height);
1156             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1157                                  pre_input.f.data[1], pre_input.f.linesize[1],
1158                                  c->width >> 1, c->height >> 1);
1159             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1160                                  pre_input.f.data[2], pre_input.f.linesize[2],
1161                                  c->width >> 1, c->height >> 1);
1162         }
1163     }
1164
1165     for (j = 0; j < s->max_b_frames + 1; j++) {
1166         int64_t rd = 0;
1167
1168         if (!s->input_picture[j])
1169             break;
1170
1171         c->error[0] = c->error[1] = c->error[2] = 0;
1172
1173         input[0].pict_type = AV_PICTURE_TYPE_I;
1174         input[0].quality   = 1 * FF_QP2LAMBDA;
1175
1176         out_size = encode_frame(c, &input[0]);
1177
1178         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1179
1180         for (i = 0; i < s->max_b_frames + 1; i++) {
1181             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1182
1183             input[i + 1].pict_type = is_p ?
1184                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1185             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1186
1187             out_size = encode_frame(c, &input[i + 1]);
1188
1189             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1190         }
1191
1192         /* get the delayed frames */
1193         while (out_size) {
1194             out_size = encode_frame(c, NULL);
1195             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1196         }
1197
1198         rd += c->error[0] + c->error[1] + c->error[2];
1199
1200         if (rd < best_rd) {
1201             best_rd = rd;
1202             best_b_count = j;
1203         }
1204     }
1205
1206     avcodec_close(c);
1207     av_freep(&c);
1208
1209     for (i = 0; i < s->max_b_frames + 2; i++) {
1210         av_freep(&input[i].data[0]);
1211     }
1212
1213     return best_b_count;
1214 }
1215
1216 static int select_input_picture(MpegEncContext *s)
1217 {
1218     int i;
1219
1220     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1221         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1222     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1223
1224     /* set next picture type & ordering */
1225     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1226         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1227             s->next_picture_ptr == NULL || s->intra_only) {
1228             s->reordered_input_picture[0] = s->input_picture[0];
1229             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1230             s->reordered_input_picture[0]->f.coded_picture_number =
1231                 s->coded_picture_number++;
1232         } else {
1233             int b_frames;
1234
1235             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1236                 if (s->picture_in_gop_number < s->gop_size &&
1237                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1238                     // FIXME check that te gop check above is +-1 correct
1239                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1240                         for (i = 0; i < 4; i++)
1241                             s->input_picture[0]->f.data[i] = NULL;
1242                         s->input_picture[0]->f.type = 0;
1243                     } else {
1244                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1245                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1246
1247                         s->avctx->release_buffer(s->avctx,
1248                                                  &s->input_picture[0]->f);
1249                     }
1250
1251                     emms_c();
1252                     ff_vbv_update(s, 0);
1253
1254                     goto no_output_pic;
1255                 }
1256             }
1257
1258             if (s->flags & CODEC_FLAG_PASS2) {
1259                 for (i = 0; i < s->max_b_frames + 1; i++) {
1260                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1261
1262                     if (pict_num >= s->rc_context.num_entries)
1263                         break;
1264                     if (!s->input_picture[i]) {
1265                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1266                         break;
1267                     }
1268
1269                     s->input_picture[i]->f.pict_type =
1270                         s->rc_context.entry[pict_num].new_pict_type;
1271                 }
1272             }
1273
1274             if (s->avctx->b_frame_strategy == 0) {
1275                 b_frames = s->max_b_frames;
1276                 while (b_frames && !s->input_picture[b_frames])
1277                     b_frames--;
1278             } else if (s->avctx->b_frame_strategy == 1) {
1279                 for (i = 1; i < s->max_b_frames + 1; i++) {
1280                     if (s->input_picture[i] &&
1281                         s->input_picture[i]->b_frame_score == 0) {
1282                         s->input_picture[i]->b_frame_score =
1283                             get_intra_count(s,
1284                                             s->input_picture[i    ]->f.data[0],
1285                                             s->input_picture[i - 1]->f.data[0],
1286                                             s->linesize) + 1;
1287                     }
1288                 }
1289                 for (i = 0; i < s->max_b_frames + 1; i++) {
1290                     if (s->input_picture[i] == NULL ||
1291                         s->input_picture[i]->b_frame_score - 1 >
1292                             s->mb_num / s->avctx->b_sensitivity)
1293                         break;
1294                 }
1295
1296                 b_frames = FFMAX(0, i - 1);
1297
1298                 /* reset scores */
1299                 for (i = 0; i < b_frames + 1; i++) {
1300                     s->input_picture[i]->b_frame_score = 0;
1301                 }
1302             } else if (s->avctx->b_frame_strategy == 2) {
1303                 b_frames = estimate_best_b_count(s);
1304             } else {
1305                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1306                 b_frames = 0;
1307             }
1308
1309             emms_c();
1310
1311             for (i = b_frames - 1; i >= 0; i--) {
1312                 int type = s->input_picture[i]->f.pict_type;
1313                 if (type && type != AV_PICTURE_TYPE_B)
1314                     b_frames = i;
1315             }
1316             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1317                 b_frames == s->max_b_frames) {
1318                 av_log(s->avctx, AV_LOG_ERROR,
1319                        "warning, too many b frames in a row\n");
1320             }
1321
1322             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1323                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1324                     s->gop_size > s->picture_in_gop_number) {
1325                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1326                 } else {
1327                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1328                         b_frames = 0;
1329                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1330                 }
1331             }
1332
1333             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1334                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1335                 b_frames--;
1336
1337             s->reordered_input_picture[0] = s->input_picture[b_frames];
1338             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1339                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1340             s->reordered_input_picture[0]->f.coded_picture_number =
1341                 s->coded_picture_number++;
1342             for (i = 0; i < b_frames; i++) {
1343                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1344                 s->reordered_input_picture[i + 1]->f.pict_type =
1345                     AV_PICTURE_TYPE_B;
1346                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1347                     s->coded_picture_number++;
1348             }
1349         }
1350     }
1351 no_output_pic:
1352     if (s->reordered_input_picture[0]) {
1353         s->reordered_input_picture[0]->f.reference =
1354            s->reordered_input_picture[0]->f.pict_type !=
1355                AV_PICTURE_TYPE_B ? 3 : 0;
1356
1357         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1358
1359         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1360             s->avctx->rc_buffer_size) {
1361             // input is a shared pix, so we can't modifiy it -> alloc a new
1362             // one & ensure that the shared one is reuseable
1363
1364             Picture *pic;
1365             int i = ff_find_unused_picture(s, 0);
1366             if (i < 0)
1367                 return i;
1368             pic = &s->picture[i];
1369
1370             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1371             if (ff_alloc_picture(s, pic, 0) < 0) {
1372                 return -1;
1373             }
1374
1375             /* mark us unused / free shared pic */
1376             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1377                 s->avctx->release_buffer(s->avctx,
1378                                          &s->reordered_input_picture[0]->f);
1379             for (i = 0; i < 4; i++)
1380                 s->reordered_input_picture[0]->f.data[i] = NULL;
1381             s->reordered_input_picture[0]->f.type = 0;
1382
1383             copy_picture_attributes(s, &pic->f,
1384                                     &s->reordered_input_picture[0]->f);
1385
1386             s->current_picture_ptr = pic;
1387         } else {
1388             // input is not a shared pix -> reuse buffer for current_pix
1389
1390             assert(s->reordered_input_picture[0]->f.type ==
1391                        FF_BUFFER_TYPE_USER ||
1392                    s->reordered_input_picture[0]->f.type ==
1393                        FF_BUFFER_TYPE_INTERNAL);
1394
1395             s->current_picture_ptr = s->reordered_input_picture[0];
1396             for (i = 0; i < 4; i++) {
1397                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1398             }
1399         }
1400         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1401
1402         s->picture_number = s->new_picture.f.display_picture_number;
1403     } else {
1404         memset(&s->new_picture, 0, sizeof(Picture));
1405     }
1406     return 0;
1407 }
1408
1409 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1410                           const AVFrame *pic_arg, int *got_packet)
1411 {
1412     MpegEncContext *s = avctx->priv_data;
1413     int i, stuffing_count, ret;
1414     int context_count = s->slice_context_count;
1415
1416     s->picture_in_gop_number++;
1417
1418     if (load_input_picture(s, pic_arg) < 0)
1419         return -1;
1420
1421     if (select_input_picture(s) < 0) {
1422         return -1;
1423     }
1424
1425     /* output? */
1426     if (s->new_picture.f.data[0]) {
1427         if (!pkt->data &&
1428             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1429             return ret;
1430         if (s->mb_info) {
1431             s->mb_info_ptr = av_packet_new_side_data(pkt,
1432                                  AV_PKT_DATA_H263_MB_INFO,
1433                                  s->mb_width*s->mb_height*12);
1434             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1435         }
1436
1437         for (i = 0; i < context_count; i++) {
1438             int start_y = s->thread_context[i]->start_mb_y;
1439             int   end_y = s->thread_context[i]->  end_mb_y;
1440             int h       = s->mb_height;
1441             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1442             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1443
1444             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1445         }
1446
1447         s->pict_type = s->new_picture.f.pict_type;
1448         //emms_c();
1449         ff_MPV_frame_start(s, avctx);
1450 vbv_retry:
1451         if (encode_picture(s, s->picture_number) < 0)
1452             return -1;
1453
1454         avctx->header_bits = s->header_bits;
1455         avctx->mv_bits     = s->mv_bits;
1456         avctx->misc_bits   = s->misc_bits;
1457         avctx->i_tex_bits  = s->i_tex_bits;
1458         avctx->p_tex_bits  = s->p_tex_bits;
1459         avctx->i_count     = s->i_count;
1460         // FIXME f/b_count in avctx
1461         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1462         avctx->skip_count  = s->skip_count;
1463
1464         ff_MPV_frame_end(s);
1465
1466         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1467             ff_mjpeg_encode_picture_trailer(s);
1468
1469         if (avctx->rc_buffer_size) {
1470             RateControlContext *rcc = &s->rc_context;
1471             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1472
1473             if (put_bits_count(&s->pb) > max_size &&
1474                 s->lambda < s->avctx->lmax) {
1475                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1476                                        (s->qscale + 1) / s->qscale);
1477                 if (s->adaptive_quant) {
1478                     int i;
1479                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1480                         s->lambda_table[i] =
1481                             FFMAX(s->lambda_table[i] + 1,
1482                                   s->lambda_table[i] * (s->qscale + 1) /
1483                                   s->qscale);
1484                 }
1485                 s->mb_skipped = 0;        // done in MPV_frame_start()
1486                 // done in encode_picture() so we must undo it
1487                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1488                     if (s->flipflop_rounding          ||
1489                         s->codec_id == AV_CODEC_ID_H263P ||
1490                         s->codec_id == AV_CODEC_ID_MPEG4)
1491                         s->no_rounding ^= 1;
1492                 }
1493                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1494                     s->time_base       = s->last_time_base;
1495                     s->last_non_b_time = s->time - s->pp_time;
1496                 }
1497                 for (i = 0; i < context_count; i++) {
1498                     PutBitContext *pb = &s->thread_context[i]->pb;
1499                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1500                 }
1501                 goto vbv_retry;
1502             }
1503
1504             assert(s->avctx->rc_max_rate);
1505         }
1506
1507         if (s->flags & CODEC_FLAG_PASS1)
1508             ff_write_pass1_stats(s);
1509
1510         for (i = 0; i < 4; i++) {
1511             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1512             avctx->error[i] += s->current_picture_ptr->f.error[i];
1513         }
1514
1515         if (s->flags & CODEC_FLAG_PASS1)
1516             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1517                    avctx->i_tex_bits + avctx->p_tex_bits ==
1518                        put_bits_count(&s->pb));
1519         flush_put_bits(&s->pb);
1520         s->frame_bits  = put_bits_count(&s->pb);
1521
1522         stuffing_count = ff_vbv_update(s, s->frame_bits);
1523         if (stuffing_count) {
1524             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1525                     stuffing_count + 50) {
1526                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1527                 return -1;
1528             }
1529
1530             switch (s->codec_id) {
1531             case AV_CODEC_ID_MPEG1VIDEO:
1532             case AV_CODEC_ID_MPEG2VIDEO:
1533                 while (stuffing_count--) {
1534                     put_bits(&s->pb, 8, 0);
1535                 }
1536             break;
1537             case AV_CODEC_ID_MPEG4:
1538                 put_bits(&s->pb, 16, 0);
1539                 put_bits(&s->pb, 16, 0x1C3);
1540                 stuffing_count -= 4;
1541                 while (stuffing_count--) {
1542                     put_bits(&s->pb, 8, 0xFF);
1543                 }
1544             break;
1545             default:
1546                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1547             }
1548             flush_put_bits(&s->pb);
1549             s->frame_bits  = put_bits_count(&s->pb);
1550         }
1551
1552         /* update mpeg1/2 vbv_delay for CBR */
1553         if (s->avctx->rc_max_rate                          &&
1554             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1555             s->out_format == FMT_MPEG1                     &&
1556             90000LL * (avctx->rc_buffer_size - 1) <=
1557                 s->avctx->rc_max_rate * 0xFFFFLL) {
1558             int vbv_delay, min_delay;
1559             double inbits  = s->avctx->rc_max_rate *
1560                              av_q2d(s->avctx->time_base);
1561             int    minbits = s->frame_bits - 8 *
1562                              (s->vbv_delay_ptr - s->pb.buf - 1);
1563             double bits    = s->rc_context.buffer_index + minbits - inbits;
1564
1565             if (bits < 0)
1566                 av_log(s->avctx, AV_LOG_ERROR,
1567                        "Internal error, negative bits\n");
1568
1569             assert(s->repeat_first_field == 0);
1570
1571             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1572             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1573                         s->avctx->rc_max_rate;
1574
1575             vbv_delay = FFMAX(vbv_delay, min_delay);
1576
1577             assert(vbv_delay < 0xFFFF);
1578
1579             s->vbv_delay_ptr[0] &= 0xF8;
1580             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1581             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1582             s->vbv_delay_ptr[2] &= 0x07;
1583             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1584             avctx->vbv_delay     = vbv_delay * 300;
1585         }
1586         s->total_bits     += s->frame_bits;
1587         avctx->frame_bits  = s->frame_bits;
1588
1589         pkt->pts = s->current_picture.f.pts;
1590         if (!s->low_delay) {
1591             if (!s->current_picture.f.coded_picture_number)
1592                 pkt->dts = pkt->pts - s->dts_delta;
1593             else
1594                 pkt->dts = s->reordered_pts;
1595             s->reordered_pts = s->input_picture[0]->f.pts;
1596         } else
1597             pkt->dts = pkt->pts;
1598         if (s->current_picture.f.key_frame)
1599             pkt->flags |= AV_PKT_FLAG_KEY;
1600         if (s->mb_info)
1601             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1602     } else {
1603         s->frame_bits = 0;
1604     }
1605     assert((s->frame_bits & 7) == 0);
1606
1607     pkt->size = s->frame_bits / 8;
1608     *got_packet = !!pkt->size;
1609     return 0;
1610 }
1611
1612 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1613                                                 int n, int threshold)
1614 {
1615     static const char tab[64] = {
1616         3, 2, 2, 1, 1, 1, 1, 1,
1617         1, 1, 1, 1, 1, 1, 1, 1,
1618         1, 1, 1, 1, 1, 1, 1, 1,
1619         0, 0, 0, 0, 0, 0, 0, 0,
1620         0, 0, 0, 0, 0, 0, 0, 0,
1621         0, 0, 0, 0, 0, 0, 0, 0,
1622         0, 0, 0, 0, 0, 0, 0, 0,
1623         0, 0, 0, 0, 0, 0, 0, 0
1624     };
1625     int score = 0;
1626     int run = 0;
1627     int i;
1628     DCTELEM *block = s->block[n];
1629     const int last_index = s->block_last_index[n];
1630     int skip_dc;
1631
1632     if (threshold < 0) {
1633         skip_dc = 0;
1634         threshold = -threshold;
1635     } else
1636         skip_dc = 1;
1637
1638     /* Are all we could set to zero already zero? */
1639     if (last_index <= skip_dc - 1)
1640         return;
1641
1642     for (i = 0; i <= last_index; i++) {
1643         const int j = s->intra_scantable.permutated[i];
1644         const int level = FFABS(block[j]);
1645         if (level == 1) {
1646             if (skip_dc && i == 0)
1647                 continue;
1648             score += tab[run];
1649             run = 0;
1650         } else if (level > 1) {
1651             return;
1652         } else {
1653             run++;
1654         }
1655     }
1656     if (score >= threshold)
1657         return;
1658     for (i = skip_dc; i <= last_index; i++) {
1659         const int j = s->intra_scantable.permutated[i];
1660         block[j] = 0;
1661     }
1662     if (block[0])
1663         s->block_last_index[n] = 0;
1664     else
1665         s->block_last_index[n] = -1;
1666 }
1667
1668 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1669                                int last_index)
1670 {
1671     int i;
1672     const int maxlevel = s->max_qcoeff;
1673     const int minlevel = s->min_qcoeff;
1674     int overflow = 0;
1675
1676     if (s->mb_intra) {
1677         i = 1; // skip clipping of intra dc
1678     } else
1679         i = 0;
1680
1681     for (; i <= last_index; i++) {
1682         const int j = s->intra_scantable.permutated[i];
1683         int level = block[j];
1684
1685         if (level > maxlevel) {
1686             level = maxlevel;
1687             overflow++;
1688         } else if (level < minlevel) {
1689             level = minlevel;
1690             overflow++;
1691         }
1692
1693         block[j] = level;
1694     }
1695
1696     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1697         av_log(s->avctx, AV_LOG_INFO,
1698                "warning, clipping %d dct coefficients to %d..%d\n",
1699                overflow, minlevel, maxlevel);
1700 }
1701
1702 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1703 {
1704     int x, y;
1705     // FIXME optimize
1706     for (y = 0; y < 8; y++) {
1707         for (x = 0; x < 8; x++) {
1708             int x2, y2;
1709             int sum = 0;
1710             int sqr = 0;
1711             int count = 0;
1712
1713             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1714                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1715                     int v = ptr[x2 + y2 * stride];
1716                     sum += v;
1717                     sqr += v * v;
1718                     count++;
1719                 }
1720             }
1721             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1722         }
1723     }
1724 }
1725
1726 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1727                                                 int motion_x, int motion_y,
1728                                                 int mb_block_height,
1729                                                 int mb_block_count)
1730 {
1731     int16_t weight[8][64];
1732     DCTELEM orig[8][64];
1733     const int mb_x = s->mb_x;
1734     const int mb_y = s->mb_y;
1735     int i;
1736     int skip_dct[8];
1737     int dct_offset = s->linesize * 8; // default for progressive frames
1738     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1739     int wrap_y, wrap_c;
1740
1741     for (i = 0; i < mb_block_count; i++)
1742         skip_dct[i] = s->skipdct;
1743
1744     if (s->adaptive_quant) {
1745         const int last_qp = s->qscale;
1746         const int mb_xy = mb_x + mb_y * s->mb_stride;
1747
1748         s->lambda = s->lambda_table[mb_xy];
1749         update_qscale(s);
1750
1751         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1752             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1753             s->dquant = s->qscale - last_qp;
1754
1755             if (s->out_format == FMT_H263) {
1756                 s->dquant = av_clip(s->dquant, -2, 2);
1757
1758                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1759                     if (!s->mb_intra) {
1760                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1761                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1762                                 s->dquant = 0;
1763                         }
1764                         if (s->mv_type == MV_TYPE_8X8)
1765                             s->dquant = 0;
1766                     }
1767                 }
1768             }
1769         }
1770         ff_set_qscale(s, last_qp + s->dquant);
1771     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1772         ff_set_qscale(s, s->qscale + s->dquant);
1773
1774     wrap_y = s->linesize;
1775     wrap_c = s->uvlinesize;
1776     ptr_y  = s->new_picture.f.data[0] +
1777              (mb_y * 16 * wrap_y)              + mb_x * 16;
1778     ptr_cb = s->new_picture.f.data[1] +
1779              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1780     ptr_cr = s->new_picture.f.data[2] +
1781              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1782
1783     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1784         uint8_t *ebuf = s->edge_emu_buffer + 32;
1785         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1786                                 mb_y * 16, s->width, s->height);
1787         ptr_y = ebuf;
1788         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1789                                 mb_block_height, mb_x * 8, mb_y * 8,
1790                                 s->width >> 1, s->height >> 1);
1791         ptr_cb = ebuf + 18 * wrap_y;
1792         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1793                                 mb_block_height, mb_x * 8, mb_y * 8,
1794                                 s->width >> 1, s->height >> 1);
1795         ptr_cr = ebuf + 18 * wrap_y + 8;
1796     }
1797
1798     if (s->mb_intra) {
1799         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1800             int progressive_score, interlaced_score;
1801
1802             s->interlaced_dct = 0;
1803             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1804                                                     NULL, wrap_y, 8) +
1805                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1806                                                     NULL, wrap_y, 8) - 400;
1807
1808             if (progressive_score > 0) {
1809                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1810                                                        NULL, wrap_y * 2, 8) +
1811                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1812                                                        NULL, wrap_y * 2, 8);
1813                 if (progressive_score > interlaced_score) {
1814                     s->interlaced_dct = 1;
1815
1816                     dct_offset = wrap_y;
1817                     wrap_y <<= 1;
1818                     if (s->chroma_format == CHROMA_422)
1819                         wrap_c <<= 1;
1820                 }
1821             }
1822         }
1823
1824         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1825         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1826         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1827         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1828
1829         if (s->flags & CODEC_FLAG_GRAY) {
1830             skip_dct[4] = 1;
1831             skip_dct[5] = 1;
1832         } else {
1833             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1834             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1835             if (!s->chroma_y_shift) { /* 422 */
1836                 s->dsp.get_pixels(s->block[6],
1837                                   ptr_cb + (dct_offset >> 1), wrap_c);
1838                 s->dsp.get_pixels(s->block[7],
1839                                   ptr_cr + (dct_offset >> 1), wrap_c);
1840             }
1841         }
1842     } else {
1843         op_pixels_func (*op_pix)[4];
1844         qpel_mc_func (*op_qpix)[16];
1845         uint8_t *dest_y, *dest_cb, *dest_cr;
1846
1847         dest_y  = s->dest[0];
1848         dest_cb = s->dest[1];
1849         dest_cr = s->dest[2];
1850
1851         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1852             op_pix  = s->dsp.put_pixels_tab;
1853             op_qpix = s->dsp.put_qpel_pixels_tab;
1854         } else {
1855             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1856             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1857         }
1858
1859         if (s->mv_dir & MV_DIR_FORWARD) {
1860             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1861                           s->last_picture.f.data,
1862                           op_pix, op_qpix);
1863             op_pix  = s->dsp.avg_pixels_tab;
1864             op_qpix = s->dsp.avg_qpel_pixels_tab;
1865         }
1866         if (s->mv_dir & MV_DIR_BACKWARD) {
1867             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1868                           s->next_picture.f.data,
1869                           op_pix, op_qpix);
1870         }
1871
1872         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1873             int progressive_score, interlaced_score;
1874
1875             s->interlaced_dct = 0;
1876             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1877                                                     ptr_y,              wrap_y,
1878                                                     8) +
1879                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1880                                                     ptr_y + wrap_y * 8, wrap_y,
1881                                                     8) - 400;
1882
1883             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1884                 progressive_score -= 400;
1885
1886             if (progressive_score > 0) {
1887                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1888                                                        ptr_y,
1889                                                        wrap_y * 2, 8) +
1890                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1891                                                        ptr_y + wrap_y,
1892                                                        wrap_y * 2, 8);
1893
1894                 if (progressive_score > interlaced_score) {
1895                     s->interlaced_dct = 1;
1896
1897                     dct_offset = wrap_y;
1898                     wrap_y <<= 1;
1899                     if (s->chroma_format == CHROMA_422)
1900                         wrap_c <<= 1;
1901                 }
1902             }
1903         }
1904
1905         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1906         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1907         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1908                            dest_y + dct_offset, wrap_y);
1909         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1910                            dest_y + dct_offset + 8, wrap_y);
1911
1912         if (s->flags & CODEC_FLAG_GRAY) {
1913             skip_dct[4] = 1;
1914             skip_dct[5] = 1;
1915         } else {
1916             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1917             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1918             if (!s->chroma_y_shift) { /* 422 */
1919                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1920                                    dest_cb + (dct_offset >> 1), wrap_c);
1921                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1922                                    dest_cr + (dct_offset >> 1), wrap_c);
1923             }
1924         }
1925         /* pre quantization */
1926         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1927                 2 * s->qscale * s->qscale) {
1928             // FIXME optimize
1929             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1930                               wrap_y, 8) < 20 * s->qscale)
1931                 skip_dct[0] = 1;
1932             if (s->dsp.sad[1](NULL, ptr_y + 8,
1933                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1934                 skip_dct[1] = 1;
1935             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1936                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1937                 skip_dct[2] = 1;
1938             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1939                               dest_y + dct_offset + 8,
1940                               wrap_y, 8) < 20 * s->qscale)
1941                 skip_dct[3] = 1;
1942             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1943                               wrap_c, 8) < 20 * s->qscale)
1944                 skip_dct[4] = 1;
1945             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1946                               wrap_c, 8) < 20 * s->qscale)
1947                 skip_dct[5] = 1;
1948             if (!s->chroma_y_shift) { /* 422 */
1949                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1950                                   dest_cb + (dct_offset >> 1),
1951                                   wrap_c, 8) < 20 * s->qscale)
1952                     skip_dct[6] = 1;
1953                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1954                                   dest_cr + (dct_offset >> 1),
1955                                   wrap_c, 8) < 20 * s->qscale)
1956                     skip_dct[7] = 1;
1957             }
1958         }
1959     }
1960
1961     if (s->quantizer_noise_shaping) {
1962         if (!skip_dct[0])
1963             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1964         if (!skip_dct[1])
1965             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1966         if (!skip_dct[2])
1967             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1968         if (!skip_dct[3])
1969             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1970         if (!skip_dct[4])
1971             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1972         if (!skip_dct[5])
1973             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1974         if (!s->chroma_y_shift) { /* 422 */
1975             if (!skip_dct[6])
1976                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1977                                   wrap_c);
1978             if (!skip_dct[7])
1979                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1980                                   wrap_c);
1981         }
1982         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
1983     }
1984
1985     /* DCT & quantize */
1986     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1987     {
1988         for (i = 0; i < mb_block_count; i++) {
1989             if (!skip_dct[i]) {
1990                 int overflow;
1991                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1992                 // FIXME we could decide to change to quantizer instead of
1993                 // clipping
1994                 // JS: I don't think that would be a good idea it could lower
1995                 //     quality instead of improve it. Just INTRADC clipping
1996                 //     deserves changes in quantizer
1997                 if (overflow)
1998                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
1999             } else
2000                 s->block_last_index[i] = -1;
2001         }
2002         if (s->quantizer_noise_shaping) {
2003             for (i = 0; i < mb_block_count; i++) {
2004                 if (!skip_dct[i]) {
2005                     s->block_last_index[i] =
2006                         dct_quantize_refine(s, s->block[i], weight[i],
2007                                             orig[i], i, s->qscale);
2008                 }
2009             }
2010         }
2011
2012         if (s->luma_elim_threshold && !s->mb_intra)
2013             for (i = 0; i < 4; i++)
2014                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2015         if (s->chroma_elim_threshold && !s->mb_intra)
2016             for (i = 4; i < mb_block_count; i++)
2017                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2018
2019         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2020             for (i = 0; i < mb_block_count; i++) {
2021                 if (s->block_last_index[i] == -1)
2022                     s->coded_score[i] = INT_MAX / 256;
2023             }
2024         }
2025     }
2026
2027     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2028         s->block_last_index[4] =
2029         s->block_last_index[5] = 0;
2030         s->block[4][0] =
2031         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2032     }
2033
2034     // non c quantize code returns incorrect block_last_index FIXME
2035     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2036         for (i = 0; i < mb_block_count; i++) {
2037             int j;
2038             if (s->block_last_index[i] > 0) {
2039                 for (j = 63; j > 0; j--) {
2040                     if (s->block[i][s->intra_scantable.permutated[j]])
2041                         break;
2042                 }
2043                 s->block_last_index[i] = j;
2044             }
2045         }
2046     }
2047
2048     /* huffman encode */
2049     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2050     case AV_CODEC_ID_MPEG1VIDEO:
2051     case AV_CODEC_ID_MPEG2VIDEO:
2052         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2053             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2054         break;
2055     case AV_CODEC_ID_MPEG4:
2056         if (CONFIG_MPEG4_ENCODER)
2057             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2058         break;
2059     case AV_CODEC_ID_MSMPEG4V2:
2060     case AV_CODEC_ID_MSMPEG4V3:
2061     case AV_CODEC_ID_WMV1:
2062         if (CONFIG_MSMPEG4_ENCODER)
2063             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2064         break;
2065     case AV_CODEC_ID_WMV2:
2066         if (CONFIG_WMV2_ENCODER)
2067             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2068         break;
2069     case AV_CODEC_ID_H261:
2070         if (CONFIG_H261_ENCODER)
2071             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2072         break;
2073     case AV_CODEC_ID_H263:
2074     case AV_CODEC_ID_H263P:
2075     case AV_CODEC_ID_FLV1:
2076     case AV_CODEC_ID_RV10:
2077     case AV_CODEC_ID_RV20:
2078         if (CONFIG_H263_ENCODER)
2079             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2080         break;
2081     case AV_CODEC_ID_MJPEG:
2082         if (CONFIG_MJPEG_ENCODER)
2083             ff_mjpeg_encode_mb(s, s->block);
2084         break;
2085     default:
2086         assert(0);
2087     }
2088 }
2089
2090 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2091 {
2092     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2093     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2094 }
2095
2096 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2097     int i;
2098
2099     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2100
2101     /* mpeg1 */
2102     d->mb_skip_run= s->mb_skip_run;
2103     for(i=0; i<3; i++)
2104         d->last_dc[i] = s->last_dc[i];
2105
2106     /* statistics */
2107     d->mv_bits= s->mv_bits;
2108     d->i_tex_bits= s->i_tex_bits;
2109     d->p_tex_bits= s->p_tex_bits;
2110     d->i_count= s->i_count;
2111     d->f_count= s->f_count;
2112     d->b_count= s->b_count;
2113     d->skip_count= s->skip_count;
2114     d->misc_bits= s->misc_bits;
2115     d->last_bits= 0;
2116
2117     d->mb_skipped= 0;
2118     d->qscale= s->qscale;
2119     d->dquant= s->dquant;
2120
2121     d->esc3_level_length= s->esc3_level_length;
2122 }
2123
2124 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2125     int i;
2126
2127     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2128     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2129
2130     /* mpeg1 */
2131     d->mb_skip_run= s->mb_skip_run;
2132     for(i=0; i<3; i++)
2133         d->last_dc[i] = s->last_dc[i];
2134
2135     /* statistics */
2136     d->mv_bits= s->mv_bits;
2137     d->i_tex_bits= s->i_tex_bits;
2138     d->p_tex_bits= s->p_tex_bits;
2139     d->i_count= s->i_count;
2140     d->f_count= s->f_count;
2141     d->b_count= s->b_count;
2142     d->skip_count= s->skip_count;
2143     d->misc_bits= s->misc_bits;
2144
2145     d->mb_intra= s->mb_intra;
2146     d->mb_skipped= s->mb_skipped;
2147     d->mv_type= s->mv_type;
2148     d->mv_dir= s->mv_dir;
2149     d->pb= s->pb;
2150     if(s->data_partitioning){
2151         d->pb2= s->pb2;
2152         d->tex_pb= s->tex_pb;
2153     }
2154     d->block= s->block;
2155     for(i=0; i<8; i++)
2156         d->block_last_index[i]= s->block_last_index[i];
2157     d->interlaced_dct= s->interlaced_dct;
2158     d->qscale= s->qscale;
2159
2160     d->esc3_level_length= s->esc3_level_length;
2161 }
2162
2163 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2164                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2165                            int *dmin, int *next_block, int motion_x, int motion_y)
2166 {
2167     int score;
2168     uint8_t *dest_backup[3];
2169
2170     copy_context_before_encode(s, backup, type);
2171
2172     s->block= s->blocks[*next_block];
2173     s->pb= pb[*next_block];
2174     if(s->data_partitioning){
2175         s->pb2   = pb2   [*next_block];
2176         s->tex_pb= tex_pb[*next_block];
2177     }
2178
2179     if(*next_block){
2180         memcpy(dest_backup, s->dest, sizeof(s->dest));
2181         s->dest[0] = s->rd_scratchpad;
2182         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2183         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2184         assert(s->linesize >= 32); //FIXME
2185     }
2186
2187     encode_mb(s, motion_x, motion_y);
2188
2189     score= put_bits_count(&s->pb);
2190     if(s->data_partitioning){
2191         score+= put_bits_count(&s->pb2);
2192         score+= put_bits_count(&s->tex_pb);
2193     }
2194
2195     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2196         ff_MPV_decode_mb(s, s->block);
2197
2198         score *= s->lambda2;
2199         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2200     }
2201
2202     if(*next_block){
2203         memcpy(s->dest, dest_backup, sizeof(s->dest));
2204     }
2205
2206     if(score<*dmin){
2207         *dmin= score;
2208         *next_block^=1;
2209
2210         copy_context_after_encode(best, s, type);
2211     }
2212 }
2213
2214 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2215     uint32_t *sq = ff_squareTbl + 256;
2216     int acc=0;
2217     int x,y;
2218
2219     if(w==16 && h==16)
2220         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2221     else if(w==8 && h==8)
2222         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2223
2224     for(y=0; y<h; y++){
2225         for(x=0; x<w; x++){
2226             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2227         }
2228     }
2229
2230     assert(acc>=0);
2231
2232     return acc;
2233 }
2234
2235 static int sse_mb(MpegEncContext *s){
2236     int w= 16;
2237     int h= 16;
2238
2239     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2240     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2241
2242     if(w==16 && h==16)
2243       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2244         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2245                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2246                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2247       }else{
2248         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2249                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2250                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2251       }
2252     else
2253         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2254                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2255                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2256 }
2257
2258 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2259     MpegEncContext *s= *(void**)arg;
2260
2261
2262     s->me.pre_pass=1;
2263     s->me.dia_size= s->avctx->pre_dia_size;
2264     s->first_slice_line=1;
2265     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2266         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2267             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2268         }
2269         s->first_slice_line=0;
2270     }
2271
2272     s->me.pre_pass=0;
2273
2274     return 0;
2275 }
2276
2277 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2278     MpegEncContext *s= *(void**)arg;
2279
2280     ff_check_alignment();
2281
2282     s->me.dia_size= s->avctx->dia_size;
2283     s->first_slice_line=1;
2284     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2285         s->mb_x=0; //for block init below
2286         ff_init_block_index(s);
2287         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2288             s->block_index[0]+=2;
2289             s->block_index[1]+=2;
2290             s->block_index[2]+=2;
2291             s->block_index[3]+=2;
2292
2293             /* compute motion vector & mb_type and store in context */
2294             if(s->pict_type==AV_PICTURE_TYPE_B)
2295                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2296             else
2297                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2298         }
2299         s->first_slice_line=0;
2300     }
2301     return 0;
2302 }
2303
2304 static int mb_var_thread(AVCodecContext *c, void *arg){
2305     MpegEncContext *s= *(void**)arg;
2306     int mb_x, mb_y;
2307
2308     ff_check_alignment();
2309
2310     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2311         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2312             int xx = mb_x * 16;
2313             int yy = mb_y * 16;
2314             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2315             int varc;
2316             int sum = s->dsp.pix_sum(pix, s->linesize);
2317
2318             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2319
2320             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2321             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2322             s->me.mb_var_sum_temp    += varc;
2323         }
2324     }
2325     return 0;
2326 }
2327
2328 static void write_slice_end(MpegEncContext *s){
2329     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2330         if(s->partitioned_frame){
2331             ff_mpeg4_merge_partitions(s);
2332         }
2333
2334         ff_mpeg4_stuffing(&s->pb);
2335     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2336         ff_mjpeg_encode_stuffing(&s->pb);
2337     }
2338
2339     avpriv_align_put_bits(&s->pb);
2340     flush_put_bits(&s->pb);
2341
2342     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2343         s->misc_bits+= get_bits_diff(s);
2344 }
2345
2346 static void write_mb_info(MpegEncContext *s)
2347 {
2348     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2349     int offset = put_bits_count(&s->pb);
2350     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2351     int gobn = s->mb_y / s->gob_index;
2352     int pred_x, pred_y;
2353     if (CONFIG_H263_ENCODER)
2354         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2355     bytestream_put_le32(&ptr, offset);
2356     bytestream_put_byte(&ptr, s->qscale);
2357     bytestream_put_byte(&ptr, gobn);
2358     bytestream_put_le16(&ptr, mba);
2359     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2360     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2361     /* 4MV not implemented */
2362     bytestream_put_byte(&ptr, 0); /* hmv2 */
2363     bytestream_put_byte(&ptr, 0); /* vmv2 */
2364 }
2365
2366 static void update_mb_info(MpegEncContext *s, int startcode)
2367 {
2368     if (!s->mb_info)
2369         return;
2370     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2371         s->mb_info_size += 12;
2372         s->prev_mb_info = s->last_mb_info;
2373     }
2374     if (startcode) {
2375         s->prev_mb_info = put_bits_count(&s->pb)/8;
2376         /* This might have incremented mb_info_size above, and we return without
2377          * actually writing any info into that slot yet. But in that case,
2378          * this will be called again at the start of the after writing the
2379          * start code, actually writing the mb info. */
2380         return;
2381     }
2382
2383     s->last_mb_info = put_bits_count(&s->pb)/8;
2384     if (!s->mb_info_size)
2385         s->mb_info_size += 12;
2386     write_mb_info(s);
2387 }
2388
2389 static int encode_thread(AVCodecContext *c, void *arg){
2390     MpegEncContext *s= *(void**)arg;
2391     int mb_x, mb_y, pdif = 0;
2392     int chr_h= 16>>s->chroma_y_shift;
2393     int i, j;
2394     MpegEncContext best_s, backup_s;
2395     uint8_t bit_buf[2][MAX_MB_BYTES];
2396     uint8_t bit_buf2[2][MAX_MB_BYTES];
2397     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2398     PutBitContext pb[2], pb2[2], tex_pb[2];
2399
2400     ff_check_alignment();
2401
2402     for(i=0; i<2; i++){
2403         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2404         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2405         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2406     }
2407
2408     s->last_bits= put_bits_count(&s->pb);
2409     s->mv_bits=0;
2410     s->misc_bits=0;
2411     s->i_tex_bits=0;
2412     s->p_tex_bits=0;
2413     s->i_count=0;
2414     s->f_count=0;
2415     s->b_count=0;
2416     s->skip_count=0;
2417
2418     for(i=0; i<3; i++){
2419         /* init last dc values */
2420         /* note: quant matrix value (8) is implied here */
2421         s->last_dc[i] = 128 << s->intra_dc_precision;
2422
2423         s->current_picture.f.error[i] = 0;
2424     }
2425     s->mb_skip_run = 0;
2426     memset(s->last_mv, 0, sizeof(s->last_mv));
2427
2428     s->last_mv_dir = 0;
2429
2430     switch(s->codec_id){
2431     case AV_CODEC_ID_H263:
2432     case AV_CODEC_ID_H263P:
2433     case AV_CODEC_ID_FLV1:
2434         if (CONFIG_H263_ENCODER)
2435             s->gob_index = ff_h263_get_gob_height(s);
2436         break;
2437     case AV_CODEC_ID_MPEG4:
2438         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2439             ff_mpeg4_init_partitions(s);
2440         break;
2441     }
2442
2443     s->resync_mb_x=0;
2444     s->resync_mb_y=0;
2445     s->first_slice_line = 1;
2446     s->ptr_lastgob = s->pb.buf;
2447     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2448         s->mb_x=0;
2449         s->mb_y= mb_y;
2450
2451         ff_set_qscale(s, s->qscale);
2452         ff_init_block_index(s);
2453
2454         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2455             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2456             int mb_type= s->mb_type[xy];
2457 //            int d;
2458             int dmin= INT_MAX;
2459             int dir;
2460
2461             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2462                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2463                 return -1;
2464             }
2465             if(s->data_partitioning){
2466                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2467                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2468                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2469                     return -1;
2470                 }
2471             }
2472
2473             s->mb_x = mb_x;
2474             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2475             ff_update_block_index(s);
2476
2477             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2478                 ff_h261_reorder_mb_index(s);
2479                 xy= s->mb_y*s->mb_stride + s->mb_x;
2480                 mb_type= s->mb_type[xy];
2481             }
2482
2483             /* write gob / video packet header  */
2484             if(s->rtp_mode){
2485                 int current_packet_size, is_gob_start;
2486
2487                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2488
2489                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2490
2491                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2492
2493                 switch(s->codec_id){
2494                 case AV_CODEC_ID_H263:
2495                 case AV_CODEC_ID_H263P:
2496                     if(!s->h263_slice_structured)
2497                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2498                     break;
2499                 case AV_CODEC_ID_MPEG2VIDEO:
2500                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2501                 case AV_CODEC_ID_MPEG1VIDEO:
2502                     if(s->mb_skip_run) is_gob_start=0;
2503                     break;
2504                 }
2505
2506                 if(is_gob_start){
2507                     if(s->start_mb_y != mb_y || mb_x!=0){
2508                         write_slice_end(s);
2509
2510                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2511                             ff_mpeg4_init_partitions(s);
2512                         }
2513                     }
2514
2515                     assert((put_bits_count(&s->pb)&7) == 0);
2516                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2517
2518                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2519                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2520                         int d= 100 / s->avctx->error_rate;
2521                         if(r % d == 0){
2522                             current_packet_size=0;
2523                             s->pb.buf_ptr= s->ptr_lastgob;
2524                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2525                         }
2526                     }
2527
2528                     if (s->avctx->rtp_callback){
2529                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2530                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2531                     }
2532                     update_mb_info(s, 1);
2533
2534                     switch(s->codec_id){
2535                     case AV_CODEC_ID_MPEG4:
2536                         if (CONFIG_MPEG4_ENCODER) {
2537                             ff_mpeg4_encode_video_packet_header(s);
2538                             ff_mpeg4_clean_buffers(s);
2539                         }
2540                     break;
2541                     case AV_CODEC_ID_MPEG1VIDEO:
2542                     case AV_CODEC_ID_MPEG2VIDEO:
2543                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2544                             ff_mpeg1_encode_slice_header(s);
2545                             ff_mpeg1_clean_buffers(s);
2546                         }
2547                     break;
2548                     case AV_CODEC_ID_H263:
2549                     case AV_CODEC_ID_H263P:
2550                         if (CONFIG_H263_ENCODER)
2551                             ff_h263_encode_gob_header(s, mb_y);
2552                     break;
2553                     }
2554
2555                     if(s->flags&CODEC_FLAG_PASS1){
2556                         int bits= put_bits_count(&s->pb);
2557                         s->misc_bits+= bits - s->last_bits;
2558                         s->last_bits= bits;
2559                     }
2560
2561                     s->ptr_lastgob += current_packet_size;
2562                     s->first_slice_line=1;
2563                     s->resync_mb_x=mb_x;
2564                     s->resync_mb_y=mb_y;
2565                 }
2566             }
2567
2568             if(  (s->resync_mb_x   == s->mb_x)
2569                && s->resync_mb_y+1 == s->mb_y){
2570                 s->first_slice_line=0;
2571             }
2572
2573             s->mb_skipped=0;
2574             s->dquant=0; //only for QP_RD
2575
2576             update_mb_info(s, 0);
2577
2578             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2579                 int next_block=0;
2580                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2581
2582                 copy_context_before_encode(&backup_s, s, -1);
2583                 backup_s.pb= s->pb;
2584                 best_s.data_partitioning= s->data_partitioning;
2585                 best_s.partitioned_frame= s->partitioned_frame;
2586                 if(s->data_partitioning){
2587                     backup_s.pb2= s->pb2;
2588                     backup_s.tex_pb= s->tex_pb;
2589                 }
2590
2591                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2592                     s->mv_dir = MV_DIR_FORWARD;
2593                     s->mv_type = MV_TYPE_16X16;
2594                     s->mb_intra= 0;
2595                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2596                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2597                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2598                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2599                 }
2600                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2601                     s->mv_dir = MV_DIR_FORWARD;
2602                     s->mv_type = MV_TYPE_FIELD;
2603                     s->mb_intra= 0;
2604                     for(i=0; i<2; i++){
2605                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2606                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2607                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2608                     }
2609                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2610                                  &dmin, &next_block, 0, 0);
2611                 }
2612                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2613                     s->mv_dir = MV_DIR_FORWARD;
2614                     s->mv_type = MV_TYPE_16X16;
2615                     s->mb_intra= 0;
2616                     s->mv[0][0][0] = 0;
2617                     s->mv[0][0][1] = 0;
2618                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2619                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2620                 }
2621                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2622                     s->mv_dir = MV_DIR_FORWARD;
2623                     s->mv_type = MV_TYPE_8X8;
2624                     s->mb_intra= 0;
2625                     for(i=0; i<4; i++){
2626                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2627                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2628                     }
2629                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2630                                  &dmin, &next_block, 0, 0);
2631                 }
2632                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2633                     s->mv_dir = MV_DIR_FORWARD;
2634                     s->mv_type = MV_TYPE_16X16;
2635                     s->mb_intra= 0;
2636                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2637                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2638                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2639                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2640                 }
2641                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2642                     s->mv_dir = MV_DIR_BACKWARD;
2643                     s->mv_type = MV_TYPE_16X16;
2644                     s->mb_intra= 0;
2645                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2646                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2647                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2648                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2649                 }
2650                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2651                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2652                     s->mv_type = MV_TYPE_16X16;
2653                     s->mb_intra= 0;
2654                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2655                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2656                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2657                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2658                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2659                                  &dmin, &next_block, 0, 0);
2660                 }
2661                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2662                     s->mv_dir = MV_DIR_FORWARD;
2663                     s->mv_type = MV_TYPE_FIELD;
2664                     s->mb_intra= 0;
2665                     for(i=0; i<2; i++){
2666                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2667                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2668                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2669                     }
2670                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2671                                  &dmin, &next_block, 0, 0);
2672                 }
2673                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2674                     s->mv_dir = MV_DIR_BACKWARD;
2675                     s->mv_type = MV_TYPE_FIELD;
2676                     s->mb_intra= 0;
2677                     for(i=0; i<2; i++){
2678                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2679                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2680                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2681                     }
2682                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2683                                  &dmin, &next_block, 0, 0);
2684                 }
2685                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2686                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2687                     s->mv_type = MV_TYPE_FIELD;
2688                     s->mb_intra= 0;
2689                     for(dir=0; dir<2; dir++){
2690                         for(i=0; i<2; i++){
2691                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2692                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2693                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2694                         }
2695                     }
2696                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2697                                  &dmin, &next_block, 0, 0);
2698                 }
2699                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2700                     s->mv_dir = 0;
2701                     s->mv_type = MV_TYPE_16X16;
2702                     s->mb_intra= 1;
2703                     s->mv[0][0][0] = 0;
2704                     s->mv[0][0][1] = 0;
2705                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2706                                  &dmin, &next_block, 0, 0);
2707                     if(s->h263_pred || s->h263_aic){
2708                         if(best_s.mb_intra)
2709                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2710                         else
2711                             ff_clean_intra_table_entries(s); //old mode?
2712                     }
2713                 }
2714
2715                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2716                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2717                         const int last_qp= backup_s.qscale;
2718                         int qpi, qp, dc[6];
2719                         DCTELEM ac[6][16];
2720                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2721                         static const int dquant_tab[4]={-1,1,-2,2};
2722
2723                         assert(backup_s.dquant == 0);
2724
2725                         //FIXME intra
2726                         s->mv_dir= best_s.mv_dir;
2727                         s->mv_type = MV_TYPE_16X16;
2728                         s->mb_intra= best_s.mb_intra;
2729                         s->mv[0][0][0] = best_s.mv[0][0][0];
2730                         s->mv[0][0][1] = best_s.mv[0][0][1];
2731                         s->mv[1][0][0] = best_s.mv[1][0][0];
2732                         s->mv[1][0][1] = best_s.mv[1][0][1];
2733
2734                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2735                         for(; qpi<4; qpi++){
2736                             int dquant= dquant_tab[qpi];
2737                             qp= last_qp + dquant;
2738                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2739                                 continue;
2740                             backup_s.dquant= dquant;
2741                             if(s->mb_intra && s->dc_val[0]){
2742                                 for(i=0; i<6; i++){
2743                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2744                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2745                                 }
2746                             }
2747
2748                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2749                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2750                             if(best_s.qscale != qp){
2751                                 if(s->mb_intra && s->dc_val[0]){
2752                                     for(i=0; i<6; i++){
2753                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2754                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2755                                     }
2756                                 }
2757                             }
2758                         }
2759                     }
2760                 }
2761                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2762                     int mx= s->b_direct_mv_table[xy][0];
2763                     int my= s->b_direct_mv_table[xy][1];
2764
2765                     backup_s.dquant = 0;
2766                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2767                     s->mb_intra= 0;
2768                     ff_mpeg4_set_direct_mv(s, mx, my);
2769                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2770                                  &dmin, &next_block, mx, my);
2771                 }
2772                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2773                     backup_s.dquant = 0;
2774                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2775                     s->mb_intra= 0;
2776                     ff_mpeg4_set_direct_mv(s, 0, 0);
2777                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2778                                  &dmin, &next_block, 0, 0);
2779                 }
2780                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2781                     int coded=0;
2782                     for(i=0; i<6; i++)
2783                         coded |= s->block_last_index[i];
2784                     if(coded){
2785                         int mx,my;
2786                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2787                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2788                             mx=my=0; //FIXME find the one we actually used
2789                             ff_mpeg4_set_direct_mv(s, mx, my);
2790                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2791                             mx= s->mv[1][0][0];
2792                             my= s->mv[1][0][1];
2793                         }else{
2794                             mx= s->mv[0][0][0];
2795                             my= s->mv[0][0][1];
2796                         }
2797
2798                         s->mv_dir= best_s.mv_dir;
2799                         s->mv_type = best_s.mv_type;
2800                         s->mb_intra= 0;
2801 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2802                         s->mv[0][0][1] = best_s.mv[0][0][1];
2803                         s->mv[1][0][0] = best_s.mv[1][0][0];
2804                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2805                         backup_s.dquant= 0;
2806                         s->skipdct=1;
2807                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2808                                         &dmin, &next_block, mx, my);
2809                         s->skipdct=0;
2810                     }
2811                 }
2812
2813                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2814
2815                 copy_context_after_encode(s, &best_s, -1);
2816
2817                 pb_bits_count= put_bits_count(&s->pb);
2818                 flush_put_bits(&s->pb);
2819                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2820                 s->pb= backup_s.pb;
2821
2822                 if(s->data_partitioning){
2823                     pb2_bits_count= put_bits_count(&s->pb2);
2824                     flush_put_bits(&s->pb2);
2825                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2826                     s->pb2= backup_s.pb2;
2827
2828                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2829                     flush_put_bits(&s->tex_pb);
2830                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2831                     s->tex_pb= backup_s.tex_pb;
2832                 }
2833                 s->last_bits= put_bits_count(&s->pb);
2834
2835                 if (CONFIG_H263_ENCODER &&
2836                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2837                     ff_h263_update_motion_val(s);
2838
2839                 if(next_block==0){ //FIXME 16 vs linesize16
2840                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2841                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2842                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2843                 }
2844
2845                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2846                     ff_MPV_decode_mb(s, s->block);
2847             } else {
2848                 int motion_x = 0, motion_y = 0;
2849                 s->mv_type=MV_TYPE_16X16;
2850                 // only one MB-Type possible
2851
2852                 switch(mb_type){
2853                 case CANDIDATE_MB_TYPE_INTRA:
2854                     s->mv_dir = 0;
2855                     s->mb_intra= 1;
2856                     motion_x= s->mv[0][0][0] = 0;
2857                     motion_y= s->mv[0][0][1] = 0;
2858                     break;
2859                 case CANDIDATE_MB_TYPE_INTER:
2860                     s->mv_dir = MV_DIR_FORWARD;
2861                     s->mb_intra= 0;
2862                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2863                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2864                     break;
2865                 case CANDIDATE_MB_TYPE_INTER_I:
2866                     s->mv_dir = MV_DIR_FORWARD;
2867                     s->mv_type = MV_TYPE_FIELD;
2868                     s->mb_intra= 0;
2869                     for(i=0; i<2; i++){
2870                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2871                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2872                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2873                     }
2874                     break;
2875                 case CANDIDATE_MB_TYPE_INTER4V:
2876                     s->mv_dir = MV_DIR_FORWARD;
2877                     s->mv_type = MV_TYPE_8X8;
2878                     s->mb_intra= 0;
2879                     for(i=0; i<4; i++){
2880                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2881                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2882                     }
2883                     break;
2884                 case CANDIDATE_MB_TYPE_DIRECT:
2885                     if (CONFIG_MPEG4_ENCODER) {
2886                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2887                         s->mb_intra= 0;
2888                         motion_x=s->b_direct_mv_table[xy][0];
2889                         motion_y=s->b_direct_mv_table[xy][1];
2890                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2891                     }
2892                     break;
2893                 case CANDIDATE_MB_TYPE_DIRECT0:
2894                     if (CONFIG_MPEG4_ENCODER) {
2895                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2896                         s->mb_intra= 0;
2897                         ff_mpeg4_set_direct_mv(s, 0, 0);
2898                     }
2899                     break;
2900                 case CANDIDATE_MB_TYPE_BIDIR:
2901                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2902                     s->mb_intra= 0;
2903                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2904                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2905                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2906                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2907                     break;
2908                 case CANDIDATE_MB_TYPE_BACKWARD:
2909                     s->mv_dir = MV_DIR_BACKWARD;
2910                     s->mb_intra= 0;
2911                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2912                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2913                     break;
2914                 case CANDIDATE_MB_TYPE_FORWARD:
2915                     s->mv_dir = MV_DIR_FORWARD;
2916                     s->mb_intra= 0;
2917                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2918                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2919                     break;
2920                 case CANDIDATE_MB_TYPE_FORWARD_I:
2921                     s->mv_dir = MV_DIR_FORWARD;
2922                     s->mv_type = MV_TYPE_FIELD;
2923                     s->mb_intra= 0;
2924                     for(i=0; i<2; i++){
2925                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2926                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2927                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2928                     }
2929                     break;
2930                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2931                     s->mv_dir = MV_DIR_BACKWARD;
2932                     s->mv_type = MV_TYPE_FIELD;
2933                     s->mb_intra= 0;
2934                     for(i=0; i<2; i++){
2935                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2936                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2937                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2938                     }
2939                     break;
2940                 case CANDIDATE_MB_TYPE_BIDIR_I:
2941                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2942                     s->mv_type = MV_TYPE_FIELD;
2943                     s->mb_intra= 0;
2944                     for(dir=0; dir<2; dir++){
2945                         for(i=0; i<2; i++){
2946                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2947                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2948                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2949                         }
2950                     }
2951                     break;
2952                 default:
2953                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2954                 }
2955
2956                 encode_mb(s, motion_x, motion_y);
2957
2958                 // RAL: Update last macroblock type
2959                 s->last_mv_dir = s->mv_dir;
2960
2961                 if (CONFIG_H263_ENCODER &&
2962                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2963                     ff_h263_update_motion_val(s);
2964
2965                 ff_MPV_decode_mb(s, s->block);
2966             }
2967
2968             /* clean the MV table in IPS frames for direct mode in B frames */
2969             if(s->mb_intra /* && I,P,S_TYPE */){
2970                 s->p_mv_table[xy][0]=0;
2971                 s->p_mv_table[xy][1]=0;
2972             }
2973
2974             if(s->flags&CODEC_FLAG_PSNR){
2975                 int w= 16;
2976                 int h= 16;
2977
2978                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2979                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2980
2981                 s->current_picture.f.error[0] += sse(
2982                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2983                     s->dest[0], w, h, s->linesize);
2984                 s->current_picture.f.error[1] += sse(
2985                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2986                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2987                 s->current_picture.f.error[2] += sse(
2988                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2989                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2990             }
2991             if(s->loop_filter){
2992                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2993                     ff_h263_loop_filter(s);
2994             }
2995             av_dlog(s->avctx, "MB %d %d bits\n",
2996                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
2997         }
2998     }
2999
3000     //not beautiful here but we must write it before flushing so it has to be here
3001     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3002         ff_msmpeg4_encode_ext_header(s);
3003
3004     write_slice_end(s);
3005
3006     /* Send the last GOB if RTP */
3007     if (s->avctx->rtp_callback) {
3008         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3009         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3010         /* Call the RTP callback to send the last GOB */
3011         emms_c();
3012         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3013     }
3014
3015     return 0;
3016 }
3017
3018 #define MERGE(field) dst->field += src->field; src->field=0
3019 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3020     MERGE(me.scene_change_score);
3021     MERGE(me.mc_mb_var_sum_temp);
3022     MERGE(me.mb_var_sum_temp);
3023 }
3024
3025 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3026     int i;
3027
3028     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3029     MERGE(dct_count[1]);
3030     MERGE(mv_bits);
3031     MERGE(i_tex_bits);
3032     MERGE(p_tex_bits);
3033     MERGE(i_count);
3034     MERGE(f_count);
3035     MERGE(b_count);
3036     MERGE(skip_count);
3037     MERGE(misc_bits);
3038     MERGE(error_count);
3039     MERGE(padding_bug_score);
3040     MERGE(current_picture.f.error[0]);
3041     MERGE(current_picture.f.error[1]);
3042     MERGE(current_picture.f.error[2]);
3043
3044     if(dst->avctx->noise_reduction){
3045         for(i=0; i<64; i++){
3046             MERGE(dct_error_sum[0][i]);
3047             MERGE(dct_error_sum[1][i]);
3048         }
3049     }
3050
3051     assert(put_bits_count(&src->pb) % 8 ==0);
3052     assert(put_bits_count(&dst->pb) % 8 ==0);
3053     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3054     flush_put_bits(&dst->pb);
3055 }
3056
3057 static int estimate_qp(MpegEncContext *s, int dry_run){
3058     if (s->next_lambda){
3059         s->current_picture_ptr->f.quality =
3060         s->current_picture.f.quality = s->next_lambda;
3061         if(!dry_run) s->next_lambda= 0;
3062     } else if (!s->fixed_qscale) {
3063         s->current_picture_ptr->f.quality =
3064         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3065         if (s->current_picture.f.quality < 0)
3066             return -1;
3067     }
3068
3069     if(s->adaptive_quant){
3070         switch(s->codec_id){
3071         case AV_CODEC_ID_MPEG4:
3072             if (CONFIG_MPEG4_ENCODER)
3073                 ff_clean_mpeg4_qscales(s);
3074             break;
3075         case AV_CODEC_ID_H263:
3076         case AV_CODEC_ID_H263P:
3077         case AV_CODEC_ID_FLV1:
3078             if (CONFIG_H263_ENCODER)
3079                 ff_clean_h263_qscales(s);
3080             break;
3081         default:
3082             ff_init_qscale_tab(s);
3083         }
3084
3085         s->lambda= s->lambda_table[0];
3086         //FIXME broken
3087     }else
3088         s->lambda = s->current_picture.f.quality;
3089     update_qscale(s);
3090     return 0;
3091 }
3092
3093 /* must be called before writing the header */
3094 static void set_frame_distances(MpegEncContext * s){
3095     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3096     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3097
3098     if(s->pict_type==AV_PICTURE_TYPE_B){
3099         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3100         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3101     }else{
3102         s->pp_time= s->time - s->last_non_b_time;
3103         s->last_non_b_time= s->time;
3104         assert(s->picture_number==0 || s->pp_time > 0);
3105     }
3106 }
3107
3108 static int encode_picture(MpegEncContext *s, int picture_number)
3109 {
3110     int i;
3111     int bits;
3112     int context_count = s->slice_context_count;
3113
3114     s->picture_number = picture_number;
3115
3116     /* Reset the average MB variance */
3117     s->me.mb_var_sum_temp    =
3118     s->me.mc_mb_var_sum_temp = 0;
3119
3120     /* we need to initialize some time vars before we can encode b-frames */
3121     // RAL: Condition added for MPEG1VIDEO
3122     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3123         set_frame_distances(s);
3124     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3125         ff_set_mpeg4_time(s);
3126
3127     s->me.scene_change_score=0;
3128
3129 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3130
3131     if(s->pict_type==AV_PICTURE_TYPE_I){
3132         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3133         else                        s->no_rounding=0;
3134     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3135         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3136             s->no_rounding ^= 1;
3137     }
3138
3139     if(s->flags & CODEC_FLAG_PASS2){
3140         if (estimate_qp(s,1) < 0)
3141             return -1;
3142         ff_get_2pass_fcode(s);
3143     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3144         if(s->pict_type==AV_PICTURE_TYPE_B)
3145             s->lambda= s->last_lambda_for[s->pict_type];
3146         else
3147             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3148         update_qscale(s);
3149     }
3150
3151     s->mb_intra=0; //for the rate distortion & bit compare functions
3152     for(i=1; i<context_count; i++){
3153         ff_update_duplicate_context(s->thread_context[i], s);
3154     }
3155
3156     if(ff_init_me(s)<0)
3157         return -1;
3158
3159     /* Estimate motion for every MB */
3160     if(s->pict_type != AV_PICTURE_TYPE_I){
3161         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3162         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3163         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3164             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3165                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3166             }
3167         }
3168
3169         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3170     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3171         /* I-Frame */
3172         for(i=0; i<s->mb_stride*s->mb_height; i++)
3173             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3174
3175         if(!s->fixed_qscale){
3176             /* finding spatial complexity for I-frame rate control */
3177             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3178         }
3179     }
3180     for(i=1; i<context_count; i++){
3181         merge_context_after_me(s, s->thread_context[i]);
3182     }
3183     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3184     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3185     emms_c();
3186
3187     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3188         s->pict_type= AV_PICTURE_TYPE_I;
3189         for(i=0; i<s->mb_stride*s->mb_height; i++)
3190             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3191         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3192                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3193     }
3194
3195     if(!s->umvplus){
3196         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3197             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3198
3199             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3200                 int a,b;
3201                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3202                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3203                 s->f_code= FFMAX3(s->f_code, a, b);
3204             }
3205
3206             ff_fix_long_p_mvs(s);
3207             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3208             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3209                 int j;
3210                 for(i=0; i<2; i++){
3211                     for(j=0; j<2; j++)
3212                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3213                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3214                 }
3215             }
3216         }
3217
3218         if(s->pict_type==AV_PICTURE_TYPE_B){
3219             int a, b;
3220
3221             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3222             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3223             s->f_code = FFMAX(a, b);
3224
3225             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3226             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3227             s->b_code = FFMAX(a, b);
3228
3229             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3230             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3231             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3232             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3233             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3234                 int dir, j;
3235                 for(dir=0; dir<2; dir++){
3236                     for(i=0; i<2; i++){
3237                         for(j=0; j<2; j++){
3238                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3239                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3240                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3241                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3242                         }
3243                     }
3244                 }
3245             }
3246         }
3247     }
3248
3249     if (estimate_qp(s, 0) < 0)
3250         return -1;
3251
3252     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3253         s->qscale= 3; //reduce clipping problems
3254
3255     if (s->out_format == FMT_MJPEG) {
3256         /* for mjpeg, we do include qscale in the matrix */
3257         for(i=1;i<64;i++){
3258             int j= s->dsp.idct_permutation[i];
3259
3260             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3261         }
3262         s->y_dc_scale_table=
3263         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3264         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3265         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3266                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3267         s->qscale= 8;
3268     }
3269
3270     //FIXME var duplication
3271     s->current_picture_ptr->f.key_frame =
3272     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3273     s->current_picture_ptr->f.pict_type =
3274     s->current_picture.f.pict_type = s->pict_type;
3275
3276     if (s->current_picture.f.key_frame)
3277         s->picture_in_gop_number=0;
3278
3279     s->last_bits= put_bits_count(&s->pb);
3280     switch(s->out_format) {
3281     case FMT_MJPEG:
3282         if (CONFIG_MJPEG_ENCODER)
3283             ff_mjpeg_encode_picture_header(s);
3284         break;
3285     case FMT_H261:
3286         if (CONFIG_H261_ENCODER)
3287             ff_h261_encode_picture_header(s, picture_number);
3288         break;
3289     case FMT_H263:
3290         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3291             ff_wmv2_encode_picture_header(s, picture_number);
3292         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3293             ff_msmpeg4_encode_picture_header(s, picture_number);
3294         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3295             ff_mpeg4_encode_picture_header(s, picture_number);
3296         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3297             ff_rv10_encode_picture_header(s, picture_number);
3298         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3299             ff_rv20_encode_picture_header(s, picture_number);
3300         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3301             ff_flv_encode_picture_header(s, picture_number);
3302         else if (CONFIG_H263_ENCODER)
3303             ff_h263_encode_picture_header(s, picture_number);
3304         break;
3305     case FMT_MPEG1:
3306         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3307             ff_mpeg1_encode_picture_header(s, picture_number);
3308         break;
3309     case FMT_H264:
3310         break;
3311     default:
3312         assert(0);
3313     }
3314     bits= put_bits_count(&s->pb);
3315     s->header_bits= bits - s->last_bits;
3316
3317     for(i=1; i<context_count; i++){
3318         update_duplicate_context_after_me(s->thread_context[i], s);
3319     }
3320     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3321     for(i=1; i<context_count; i++){
3322         merge_context_after_encode(s, s->thread_context[i]);
3323     }
3324     emms_c();
3325     return 0;
3326 }
3327
3328 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3329     const int intra= s->mb_intra;
3330     int i;
3331
3332     s->dct_count[intra]++;
3333
3334     for(i=0; i<64; i++){
3335         int level= block[i];
3336
3337         if(level){
3338             if(level>0){
3339                 s->dct_error_sum[intra][i] += level;
3340                 level -= s->dct_offset[intra][i];
3341                 if(level<0) level=0;
3342             }else{
3343                 s->dct_error_sum[intra][i] -= level;
3344                 level += s->dct_offset[intra][i];
3345                 if(level>0) level=0;
3346             }
3347             block[i]= level;
3348         }
3349     }
3350 }
3351
3352 static int dct_quantize_trellis_c(MpegEncContext *s,
3353                                   DCTELEM *block, int n,
3354                                   int qscale, int *overflow){
3355     const int *qmat;
3356     const uint8_t *scantable= s->intra_scantable.scantable;
3357     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3358     int max=0;
3359     unsigned int threshold1, threshold2;
3360     int bias=0;
3361     int run_tab[65];
3362     int level_tab[65];
3363     int score_tab[65];
3364     int survivor[65];
3365     int survivor_count;
3366     int last_run=0;
3367     int last_level=0;
3368     int last_score= 0;
3369     int last_i;
3370     int coeff[2][64];
3371     int coeff_count[64];
3372     int qmul, qadd, start_i, last_non_zero, i, dc;
3373     const int esc_length= s->ac_esc_length;
3374     uint8_t * length;
3375     uint8_t * last_length;
3376     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3377
3378     s->dsp.fdct (block);
3379
3380     if(s->dct_error_sum)
3381         s->denoise_dct(s, block);
3382     qmul= qscale*16;
3383     qadd= ((qscale-1)|1)*8;
3384
3385     if (s->mb_intra) {
3386         int q;
3387         if (!s->h263_aic) {
3388             if (n < 4)
3389                 q = s->y_dc_scale;
3390             else
3391                 q = s->c_dc_scale;
3392             q = q << 3;
3393         } else{
3394             /* For AIC we skip quant/dequant of INTRADC */
3395             q = 1 << 3;
3396             qadd=0;
3397         }
3398
3399         /* note: block[0] is assumed to be positive */
3400         block[0] = (block[0] + (q >> 1)) / q;
3401         start_i = 1;
3402         last_non_zero = 0;
3403         qmat = s->q_intra_matrix[qscale];
3404         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3405             bias= 1<<(QMAT_SHIFT-1);
3406         length     = s->intra_ac_vlc_length;
3407         last_length= s->intra_ac_vlc_last_length;
3408     } else {
3409         start_i = 0;
3410         last_non_zero = -1;
3411         qmat = s->q_inter_matrix[qscale];
3412         length     = s->inter_ac_vlc_length;
3413         last_length= s->inter_ac_vlc_last_length;
3414     }
3415     last_i= start_i;
3416
3417     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3418     threshold2= (threshold1<<1);
3419
3420     for(i=63; i>=start_i; i--) {
3421         const int j = scantable[i];
3422         int level = block[j] * qmat[j];
3423
3424         if(((unsigned)(level+threshold1))>threshold2){
3425             last_non_zero = i;
3426             break;
3427         }
3428     }
3429
3430     for(i=start_i; i<=last_non_zero; i++) {
3431         const int j = scantable[i];
3432         int level = block[j] * qmat[j];
3433
3434 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3435 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3436         if(((unsigned)(level+threshold1))>threshold2){
3437             if(level>0){
3438                 level= (bias + level)>>QMAT_SHIFT;
3439                 coeff[0][i]= level;
3440                 coeff[1][i]= level-1;
3441 //                coeff[2][k]= level-2;
3442             }else{
3443                 level= (bias - level)>>QMAT_SHIFT;
3444                 coeff[0][i]= -level;
3445                 coeff[1][i]= -level+1;
3446 //                coeff[2][k]= -level+2;
3447             }
3448             coeff_count[i]= FFMIN(level, 2);
3449             assert(coeff_count[i]);
3450             max |=level;
3451         }else{
3452             coeff[0][i]= (level>>31)|1;
3453             coeff_count[i]= 1;
3454         }
3455     }
3456
3457     *overflow= s->max_qcoeff < max; //overflow might have happened
3458
3459     if(last_non_zero < start_i){
3460         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3461         return last_non_zero;
3462     }
3463
3464     score_tab[start_i]= 0;
3465     survivor[0]= start_i;
3466     survivor_count= 1;
3467
3468     for(i=start_i; i<=last_non_zero; i++){
3469         int level_index, j, zero_distortion;
3470         int dct_coeff= FFABS(block[ scantable[i] ]);
3471         int best_score=256*256*256*120;
3472
3473         if (s->dsp.fdct == ff_fdct_ifast)
3474             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3475         zero_distortion= dct_coeff*dct_coeff;
3476
3477         for(level_index=0; level_index < coeff_count[i]; level_index++){
3478             int distortion;
3479             int level= coeff[level_index][i];
3480             const int alevel= FFABS(level);
3481             int unquant_coeff;
3482
3483             assert(level);
3484
3485             if(s->out_format == FMT_H263){
3486                 unquant_coeff= alevel*qmul + qadd;
3487             }else{ //MPEG1
3488                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3489                 if(s->mb_intra){
3490                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3491                         unquant_coeff =   (unquant_coeff - 1) | 1;
3492                 }else{
3493                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3494                         unquant_coeff =   (unquant_coeff - 1) | 1;
3495                 }
3496                 unquant_coeff<<= 3;
3497             }
3498
3499             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3500             level+=64;
3501             if((level&(~127)) == 0){
3502                 for(j=survivor_count-1; j>=0; j--){
3503                     int run= i - survivor[j];
3504                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3505                     score += score_tab[i-run];
3506
3507                     if(score < best_score){
3508                         best_score= score;
3509                         run_tab[i+1]= run;
3510                         level_tab[i+1]= level-64;
3511                     }
3512                 }
3513
3514                 if(s->out_format == FMT_H263){
3515                     for(j=survivor_count-1; j>=0; j--){
3516                         int run= i - survivor[j];
3517                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3518                         score += score_tab[i-run];
3519                         if(score < last_score){
3520                             last_score= score;
3521                             last_run= run;
3522                             last_level= level-64;
3523                             last_i= i+1;
3524                         }
3525                     }
3526                 }
3527             }else{
3528                 distortion += esc_length*lambda;
3529                 for(j=survivor_count-1; j>=0; j--){
3530                     int run= i - survivor[j];
3531                     int score= distortion + score_tab[i-run];
3532
3533                     if(score < best_score){
3534                         best_score= score;
3535                         run_tab[i+1]= run;
3536                         level_tab[i+1]= level-64;
3537                     }
3538                 }
3539
3540                 if(s->out_format == FMT_H263){
3541                   for(j=survivor_count-1; j>=0; j--){
3542                         int run= i - survivor[j];
3543                         int score= distortion + score_tab[i-run];
3544                         if(score < last_score){
3545                             last_score= score;
3546                             last_run= run;
3547                             last_level= level-64;
3548                             last_i= i+1;
3549                         }
3550                     }
3551                 }
3552             }
3553         }
3554
3555         score_tab[i+1]= best_score;
3556
3557         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3558         if(last_non_zero <= 27){
3559             for(; survivor_count; survivor_count--){
3560                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3561                     break;
3562             }
3563         }else{
3564             for(; survivor_count; survivor_count--){
3565                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3566                     break;
3567             }
3568         }
3569
3570         survivor[ survivor_count++ ]= i+1;
3571     }
3572
3573     if(s->out_format != FMT_H263){
3574         last_score= 256*256*256*120;
3575         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3576             int score= score_tab[i];
3577             if(i) score += lambda*2; //FIXME exacter?
3578
3579             if(score < last_score){
3580                 last_score= score;
3581                 last_i= i;
3582                 last_level= level_tab[i];
3583                 last_run= run_tab[i];
3584             }
3585         }
3586     }
3587
3588     s->coded_score[n] = last_score;
3589
3590     dc= FFABS(block[0]);
3591     last_non_zero= last_i - 1;
3592     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3593
3594     if(last_non_zero < start_i)
3595         return last_non_zero;
3596
3597     if(last_non_zero == 0 && start_i == 0){
3598         int best_level= 0;
3599         int best_score= dc * dc;
3600
3601         for(i=0; i<coeff_count[0]; i++){
3602             int level= coeff[i][0];
3603             int alevel= FFABS(level);
3604             int unquant_coeff, score, distortion;
3605
3606             if(s->out_format == FMT_H263){
3607                     unquant_coeff= (alevel*qmul + qadd)>>3;
3608             }else{ //MPEG1
3609                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3610                     unquant_coeff =   (unquant_coeff - 1) | 1;
3611             }
3612             unquant_coeff = (unquant_coeff + 4) >> 3;
3613             unquant_coeff<<= 3 + 3;
3614
3615             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3616             level+=64;
3617             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3618             else                    score= distortion + esc_length*lambda;
3619
3620             if(score < best_score){
3621                 best_score= score;
3622                 best_level= level - 64;
3623             }
3624         }
3625         block[0]= best_level;
3626         s->coded_score[n] = best_score - dc*dc;
3627         if(best_level == 0) return -1;
3628         else                return last_non_zero;
3629     }
3630
3631     i= last_i;
3632     assert(last_level);
3633
3634     block[ perm_scantable[last_non_zero] ]= last_level;
3635     i -= last_run + 1;
3636
3637     for(; i>start_i; i -= run_tab[i] + 1){
3638         block[ perm_scantable[i-1] ]= level_tab[i];
3639     }
3640
3641     return last_non_zero;
3642 }
3643
3644 //#define REFINE_STATS 1
3645 static int16_t basis[64][64];
3646
3647 static void build_basis(uint8_t *perm){
3648     int i, j, x, y;
3649     emms_c();
3650     for(i=0; i<8; i++){
3651         for(j=0; j<8; j++){
3652             for(y=0; y<8; y++){
3653                 for(x=0; x<8; x++){
3654                     double s= 0.25*(1<<BASIS_SHIFT);
3655                     int index= 8*i + j;
3656                     int perm_index= perm[index];
3657                     if(i==0) s*= sqrt(0.5);
3658                     if(j==0) s*= sqrt(0.5);
3659                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3660                 }
3661             }
3662         }
3663     }
3664 }
3665
3666 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3667                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3668                         int n, int qscale){
3669     int16_t rem[64];
3670     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3671     const uint8_t *scantable= s->intra_scantable.scantable;
3672     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3673 //    unsigned int threshold1, threshold2;
3674 //    int bias=0;
3675     int run_tab[65];
3676     int prev_run=0;
3677     int prev_level=0;
3678     int qmul, qadd, start_i, last_non_zero, i, dc;
3679     uint8_t * length;
3680     uint8_t * last_length;
3681     int lambda;
3682     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3683 #ifdef REFINE_STATS
3684 static int count=0;
3685 static int after_last=0;
3686 static int to_zero=0;
3687 static int from_zero=0;
3688 static int raise=0;
3689 static int lower=0;
3690 static int messed_sign=0;
3691 #endif
3692
3693     if(basis[0][0] == 0)
3694         build_basis(s->dsp.idct_permutation);
3695
3696     qmul= qscale*2;
3697     qadd= (qscale-1)|1;
3698     if (s->mb_intra) {
3699         if (!s->h263_aic) {
3700             if (n < 4)
3701                 q = s->y_dc_scale;
3702             else
3703                 q = s->c_dc_scale;
3704         } else{
3705             /* For AIC we skip quant/dequant of INTRADC */
3706             q = 1;
3707             qadd=0;
3708         }
3709         q <<= RECON_SHIFT-3;
3710         /* note: block[0] is assumed to be positive */
3711         dc= block[0]*q;
3712 //        block[0] = (block[0] + (q >> 1)) / q;
3713         start_i = 1;
3714 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3715 //            bias= 1<<(QMAT_SHIFT-1);
3716         length     = s->intra_ac_vlc_length;
3717         last_length= s->intra_ac_vlc_last_length;
3718     } else {
3719         dc= 0;
3720         start_i = 0;
3721         length     = s->inter_ac_vlc_length;
3722         last_length= s->inter_ac_vlc_last_length;
3723     }
3724     last_non_zero = s->block_last_index[n];
3725
3726 #ifdef REFINE_STATS
3727 {START_TIMER
3728 #endif
3729     dc += (1<<(RECON_SHIFT-1));
3730     for(i=0; i<64; i++){
3731         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3732     }
3733 #ifdef REFINE_STATS
3734 STOP_TIMER("memset rem[]")}
3735 #endif
3736     sum=0;
3737     for(i=0; i<64; i++){
3738         int one= 36;
3739         int qns=4;
3740         int w;
3741
3742         w= FFABS(weight[i]) + qns*one;
3743         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3744
3745         weight[i] = w;
3746 //        w=weight[i] = (63*qns + (w/2)) / w;
3747
3748         assert(w>0);
3749         assert(w<(1<<6));
3750         sum += w*w;
3751     }
3752     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3753 #ifdef REFINE_STATS
3754 {START_TIMER
3755 #endif
3756     run=0;
3757     rle_index=0;
3758     for(i=start_i; i<=last_non_zero; i++){
3759         int j= perm_scantable[i];
3760         const int level= block[j];
3761         int coeff;
3762
3763         if(level){
3764             if(level<0) coeff= qmul*level - qadd;
3765             else        coeff= qmul*level + qadd;
3766             run_tab[rle_index++]=run;
3767             run=0;
3768
3769             s->dsp.add_8x8basis(rem, basis[j], coeff);
3770         }else{
3771             run++;
3772         }
3773     }
3774 #ifdef REFINE_STATS
3775 if(last_non_zero>0){
3776 STOP_TIMER("init rem[]")
3777 }
3778 }
3779
3780 {START_TIMER
3781 #endif
3782     for(;;){
3783         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3784         int best_coeff=0;
3785         int best_change=0;
3786         int run2, best_unquant_change=0, analyze_gradient;
3787 #ifdef REFINE_STATS
3788 {START_TIMER
3789 #endif
3790         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3791
3792         if(analyze_gradient){
3793 #ifdef REFINE_STATS
3794 {START_TIMER
3795 #endif
3796             for(i=0; i<64; i++){
3797                 int w= weight[i];
3798
3799                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3800             }
3801 #ifdef REFINE_STATS
3802 STOP_TIMER("rem*w*w")}
3803 {START_TIMER
3804 #endif
3805             s->dsp.fdct(d1);
3806 #ifdef REFINE_STATS
3807 STOP_TIMER("dct")}
3808 #endif
3809         }
3810
3811         if(start_i){
3812             const int level= block[0];
3813             int change, old_coeff;
3814
3815             assert(s->mb_intra);
3816
3817             old_coeff= q*level;
3818
3819             for(change=-1; change<=1; change+=2){
3820                 int new_level= level + change;
3821                 int score, new_coeff;
3822
3823                 new_coeff= q*new_level;
3824                 if(new_coeff >= 2048 || new_coeff < 0)
3825                     continue;
3826
3827                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3828                 if(score<best_score){
3829                     best_score= score;
3830                     best_coeff= 0;
3831                     best_change= change;
3832                     best_unquant_change= new_coeff - old_coeff;
3833                 }
3834             }
3835         }
3836
3837         run=0;
3838         rle_index=0;
3839         run2= run_tab[rle_index++];
3840         prev_level=0;
3841         prev_run=0;
3842
3843         for(i=start_i; i<64; i++){
3844             int j= perm_scantable[i];
3845             const int level= block[j];
3846             int change, old_coeff;
3847
3848             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3849                 break;
3850
3851             if(level){
3852                 if(level<0) old_coeff= qmul*level - qadd;
3853                 else        old_coeff= qmul*level + qadd;
3854                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3855             }else{
3856                 old_coeff=0;
3857                 run2--;
3858                 assert(run2>=0 || i >= last_non_zero );
3859             }
3860
3861             for(change=-1; change<=1; change+=2){
3862                 int new_level= level + change;
3863                 int score, new_coeff, unquant_change;
3864
3865                 score=0;
3866                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3867                    continue;
3868
3869                 if(new_level){
3870                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3871                     else            new_coeff= qmul*new_level + qadd;
3872                     if(new_coeff >= 2048 || new_coeff <= -2048)
3873                         continue;
3874                     //FIXME check for overflow
3875
3876                     if(level){
3877                         if(level < 63 && level > -63){
3878                             if(i < last_non_zero)
3879                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3880                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3881                             else
3882                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3883                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3884                         }
3885                     }else{
3886                         assert(FFABS(new_level)==1);
3887
3888                         if(analyze_gradient){
3889                             int g= d1[ scantable[i] ];
3890                             if(g && (g^new_level) >= 0)
3891                                 continue;
3892                         }
3893
3894                         if(i < last_non_zero){
3895                             int next_i= i + run2 + 1;
3896                             int next_level= block[ perm_scantable[next_i] ] + 64;
3897
3898                             if(next_level&(~127))
3899                                 next_level= 0;
3900
3901                             if(next_i < last_non_zero)
3902                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3903                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3904                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3905                             else
3906                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3907                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3908                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3909                         }else{
3910                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3911                             if(prev_level){
3912                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3913                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3914                             }
3915                         }
3916                     }
3917                 }else{
3918                     new_coeff=0;
3919                     assert(FFABS(level)==1);
3920
3921                     if(i < last_non_zero){
3922                         int next_i= i + run2 + 1;
3923                         int next_level= block[ perm_scantable[next_i] ] + 64;
3924
3925                         if(next_level&(~127))
3926                             next_level= 0;
3927
3928                         if(next_i < last_non_zero)
3929                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3930                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3931                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3932                         else
3933                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3934                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3935                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3936                     }else{
3937                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3938                         if(prev_level){
3939                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3940                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3941                         }
3942                     }
3943                 }
3944
3945                 score *= lambda;
3946
3947                 unquant_change= new_coeff - old_coeff;
3948                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3949
3950                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3951                 if(score<best_score){
3952                     best_score= score;
3953                     best_coeff= i;
3954                     best_change= change;
3955                     best_unquant_change= unquant_change;
3956                 }
3957             }
3958             if(level){
3959                 prev_level= level + 64;
3960                 if(prev_level&(~127))
3961                     prev_level= 0;
3962                 prev_run= run;
3963                 run=0;
3964             }else{
3965                 run++;
3966             }
3967         }
3968 #ifdef REFINE_STATS
3969 STOP_TIMER("iterative step")}
3970 #endif
3971
3972         if(best_change){
3973             int j= perm_scantable[ best_coeff ];
3974
3975             block[j] += best_change;
3976
3977             if(best_coeff > last_non_zero){
3978                 last_non_zero= best_coeff;
3979                 assert(block[j]);
3980 #ifdef REFINE_STATS
3981 after_last++;
3982 #endif
3983             }else{
3984 #ifdef REFINE_STATS
3985 if(block[j]){
3986     if(block[j] - best_change){
3987         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3988             raise++;
3989         }else{
3990             lower++;
3991         }
3992     }else{
3993         from_zero++;
3994     }
3995 }else{
3996     to_zero++;
3997 }
3998 #endif
3999                 for(; last_non_zero>=start_i; last_non_zero--){
4000                     if(block[perm_scantable[last_non_zero]])
4001                         break;
4002                 }
4003             }
4004 #ifdef REFINE_STATS
4005 count++;
4006 if(256*256*256*64 % count == 0){
4007     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4008 }
4009 #endif
4010             run=0;
4011             rle_index=0;
4012             for(i=start_i; i<=last_non_zero; i++){
4013                 int j= perm_scantable[i];
4014                 const int level= block[j];
4015
4016                  if(level){
4017                      run_tab[rle_index++]=run;
4018                      run=0;
4019                  }else{
4020                      run++;
4021                  }
4022             }
4023
4024             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4025         }else{
4026             break;
4027         }
4028     }
4029 #ifdef REFINE_STATS
4030 if(last_non_zero>0){
4031 STOP_TIMER("iterative search")
4032 }
4033 }
4034 #endif
4035
4036     return last_non_zero;
4037 }
4038
4039 int ff_dct_quantize_c(MpegEncContext *s,
4040                         DCTELEM *block, int n,
4041                         int qscale, int *overflow)
4042 {
4043     int i, j, level, last_non_zero, q, start_i;
4044     const int *qmat;
4045     const uint8_t *scantable= s->intra_scantable.scantable;
4046     int bias;
4047     int max=0;
4048     unsigned int threshold1, threshold2;
4049
4050     s->dsp.fdct (block);
4051
4052     if(s->dct_error_sum)
4053         s->denoise_dct(s, block);
4054
4055     if (s->mb_intra) {
4056         if (!s->h263_aic) {
4057             if (n < 4)
4058                 q = s->y_dc_scale;
4059             else
4060                 q = s->c_dc_scale;
4061             q = q << 3;
4062         } else
4063             /* For AIC we skip quant/dequant of INTRADC */
4064             q = 1 << 3;
4065
4066         /* note: block[0] is assumed to be positive */
4067         block[0] = (block[0] + (q >> 1)) / q;
4068         start_i = 1;
4069         last_non_zero = 0;
4070         qmat = s->q_intra_matrix[qscale];
4071         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4072     } else {
4073         start_i = 0;
4074         last_non_zero = -1;
4075         qmat = s->q_inter_matrix[qscale];
4076         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4077     }
4078     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4079     threshold2= (threshold1<<1);
4080     for(i=63;i>=start_i;i--) {
4081         j = scantable[i];
4082         level = block[j] * qmat[j];
4083
4084         if(((unsigned)(level+threshold1))>threshold2){
4085             last_non_zero = i;
4086             break;
4087         }else{
4088             block[j]=0;
4089         }
4090     }
4091     for(i=start_i; i<=last_non_zero; i++) {
4092         j = scantable[i];
4093         level = block[j] * qmat[j];
4094
4095 //        if(   bias+level >= (1<<QMAT_SHIFT)
4096 //           || bias-level >= (1<<QMAT_SHIFT)){
4097         if(((unsigned)(level+threshold1))>threshold2){
4098             if(level>0){
4099                 level= (bias + level)>>QMAT_SHIFT;
4100                 block[j]= level;
4101             }else{
4102                 level= (bias - level)>>QMAT_SHIFT;
4103                 block[j]= -level;
4104             }
4105             max |=level;
4106         }else{
4107             block[j]=0;
4108         }
4109     }
4110     *overflow= s->max_qcoeff < max; //overflow might have happened
4111
4112     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4113     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4114         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4115
4116     return last_non_zero;
4117 }
4118
4119 #define OFFSET(x) offsetof(MpegEncContext, x)
4120 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4121 static const AVOption h263_options[] = {
4122     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4123     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4124     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4125     FF_MPV_COMMON_OPTS
4126     { NULL },
4127 };
4128
4129 static const AVClass h263_class = {
4130     .class_name = "H.263 encoder",
4131     .item_name  = av_default_item_name,
4132     .option     = h263_options,
4133     .version    = LIBAVUTIL_VERSION_INT,
4134 };
4135
4136 AVCodec ff_h263_encoder = {
4137     .name           = "h263",
4138     .type           = AVMEDIA_TYPE_VIDEO,
4139     .id             = AV_CODEC_ID_H263,
4140     .priv_data_size = sizeof(MpegEncContext),
4141     .init           = ff_MPV_encode_init,
4142     .encode2        = ff_MPV_encode_picture,
4143     .close          = ff_MPV_encode_end,
4144     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4145     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4146     .priv_class     = &h263_class,
4147 };
4148
4149 static const AVOption h263p_options[] = {
4150     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4151     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4152     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4153     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4154     FF_MPV_COMMON_OPTS
4155     { NULL },
4156 };
4157 static const AVClass h263p_class = {
4158     .class_name = "H.263p encoder",
4159     .item_name  = av_default_item_name,
4160     .option     = h263p_options,
4161     .version    = LIBAVUTIL_VERSION_INT,
4162 };
4163
4164 AVCodec ff_h263p_encoder = {
4165     .name           = "h263p",
4166     .type           = AVMEDIA_TYPE_VIDEO,
4167     .id             = AV_CODEC_ID_H263P,
4168     .priv_data_size = sizeof(MpegEncContext),
4169     .init           = ff_MPV_encode_init,
4170     .encode2        = ff_MPV_encode_picture,
4171     .close          = ff_MPV_encode_end,
4172     .capabilities   = CODEC_CAP_SLICE_THREADS,
4173     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4174     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4175     .priv_class     = &h263p_class,
4176 };
4177
4178 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4179
4180 AVCodec ff_msmpeg4v2_encoder = {
4181     .name           = "msmpeg4v2",
4182     .type           = AVMEDIA_TYPE_VIDEO,
4183     .id             = AV_CODEC_ID_MSMPEG4V2,
4184     .priv_data_size = sizeof(MpegEncContext),
4185     .init           = ff_MPV_encode_init,
4186     .encode2        = ff_MPV_encode_picture,
4187     .close          = ff_MPV_encode_end,
4188     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4189     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4190     .priv_class     = &msmpeg4v2_class,
4191 };
4192
4193 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4194
4195 AVCodec ff_msmpeg4v3_encoder = {
4196     .name           = "msmpeg4",
4197     .type           = AVMEDIA_TYPE_VIDEO,
4198     .id             = AV_CODEC_ID_MSMPEG4V3,
4199     .priv_data_size = sizeof(MpegEncContext),
4200     .init           = ff_MPV_encode_init,
4201     .encode2        = ff_MPV_encode_picture,
4202     .close          = ff_MPV_encode_end,
4203     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4204     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4205     .priv_class     = &msmpeg4v3_class,
4206 };
4207
4208 FF_MPV_GENERIC_CLASS(wmv1)
4209
4210 AVCodec ff_wmv1_encoder = {
4211     .name           = "wmv1",
4212     .type           = AVMEDIA_TYPE_VIDEO,
4213     .id             = AV_CODEC_ID_WMV1,
4214     .priv_data_size = sizeof(MpegEncContext),
4215     .init           = ff_MPV_encode_init,
4216     .encode2        = ff_MPV_encode_picture,
4217     .close          = ff_MPV_encode_end,
4218     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4219     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4220     .priv_class     = &wmv1_class,
4221 };