]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
lavc: introduce VideoDSPContext
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/pixdesc.h"
33 #include "libavutil/opt.h"
34 #include "avcodec.h"
35 #include "dsputil.h"
36 #include "mpegvideo.h"
37 #include "h263.h"
38 #include "mathops.h"
39 #include "mjpegenc.h"
40 #include "msmpeg4.h"
41 #include "faandct.h"
42 #include "thread.h"
43 #include "aandcttab.h"
44 #include "flv.h"
45 #include "mpeg4video.h"
46 #include "internal.h"
47 #include "bytestream.h"
48 #include <limits.h>
49
50 //#undef NDEBUG
51 //#include <assert.h>
52
53 static int encode_picture(MpegEncContext *s, int picture_number);
54 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
55 static int sse_mb(MpegEncContext *s);
56 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
57 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
58
59 /* enable all paranoid tests for rounding, overflows, etc... */
60 //#define PARANOID
61
62 //#define DEBUG
63
64 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
65 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
66
67 const AVOption ff_mpv_generic_options[] = {
68     FF_MPV_COMMON_OPTS
69     { NULL },
70 };
71
72 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
73                        uint16_t (*qmat16)[2][64],
74                        const uint16_t *quant_matrix,
75                        int bias, int qmin, int qmax, int intra)
76 {
77     int qscale;
78     int shift = 0;
79
80     for (qscale = qmin; qscale <= qmax; qscale++) {
81         int i;
82         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
83             dsp->fdct == ff_jpeg_fdct_islow_10 ||
84             dsp->fdct == ff_faandct) {
85             for (i = 0; i < 64; i++) {
86                 const int j = dsp->idct_permutation[i];
87                 /* 16 <= qscale * quant_matrix[i] <= 7905
88                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
89                  *             19952 <=              x  <= 249205026
90                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
91                  *           3444240 >= (1 << 36) / (x) >= 275 */
92
93                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
94                                         (qscale * quant_matrix[j]));
95             }
96         } else if (dsp->fdct == ff_fdct_ifast) {
97             for (i = 0; i < 64; i++) {
98                 const int j = dsp->idct_permutation[i];
99                 /* 16 <= qscale * quant_matrix[i] <= 7905
100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
101                  *             19952 <=              x  <= 249205026
102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
103                  *           3444240 >= (1 << 36) / (x) >= 275 */
104
105                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
106                                         (ff_aanscales[i] * qscale *
107                                          quant_matrix[j]));
108             }
109         } else {
110             for (i = 0; i < 64; i++) {
111                 const int j = dsp->idct_permutation[i];
112                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
113                  * Assume x = qscale * quant_matrix[i]
114                  * So             16 <=              x  <= 7905
115                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
116                  * so          32768 >= (1 << 19) / (x) >= 67 */
117                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
118                                         (qscale * quant_matrix[j]));
119                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
120                 //                    (qscale * quant_matrix[i]);
121                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
122                                        (qscale * quant_matrix[j]);
123
124                 if (qmat16[qscale][0][i] == 0 ||
125                     qmat16[qscale][0][i] == 128 * 256)
126                     qmat16[qscale][0][i] = 128 * 256 - 1;
127                 qmat16[qscale][1][i] =
128                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
129                                 qmat16[qscale][0][i]);
130             }
131         }
132
133         for (i = intra; i < 64; i++) {
134             int64_t max = 8191;
135             if (dsp->fdct == ff_fdct_ifast) {
136                 max = (8191LL * ff_aanscales[i]) >> 14;
137             }
138             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
139                 shift++;
140             }
141         }
142     }
143     if (shift) {
144         av_log(NULL, AV_LOG_INFO,
145                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
146                QMAT_SHIFT - shift);
147     }
148 }
149
150 static inline void update_qscale(MpegEncContext *s)
151 {
152     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
153                 (FF_LAMBDA_SHIFT + 7);
154     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
155
156     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
157                  FF_LAMBDA_SHIFT;
158 }
159
160 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
161 {
162     int i;
163
164     if (matrix) {
165         put_bits(pb, 1, 1);
166         for (i = 0; i < 64; i++) {
167             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
168         }
169     } else
170         put_bits(pb, 1, 0);
171 }
172
173 /**
174  * init s->current_picture.qscale_table from s->lambda_table
175  */
176 void ff_init_qscale_tab(MpegEncContext *s)
177 {
178     int8_t * const qscale_table = s->current_picture.f.qscale_table;
179     int i;
180
181     for (i = 0; i < s->mb_num; i++) {
182         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
183         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
184         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
185                                                   s->avctx->qmax);
186     }
187 }
188
189 static void copy_picture_attributes(MpegEncContext *s,
190                                     AVFrame *dst,
191                                     AVFrame *src)
192 {
193     int i;
194
195     dst->pict_type              = src->pict_type;
196     dst->quality                = src->quality;
197     dst->coded_picture_number   = src->coded_picture_number;
198     dst->display_picture_number = src->display_picture_number;
199     //dst->reference              = src->reference;
200     dst->pts                    = src->pts;
201     dst->interlaced_frame       = src->interlaced_frame;
202     dst->top_field_first        = src->top_field_first;
203
204     if (s->avctx->me_threshold) {
205         if (!src->motion_val[0])
206             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
207         if (!src->mb_type)
208             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
209         if (!src->ref_index[0])
210             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
211         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
212             av_log(s->avctx, AV_LOG_ERROR,
213                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
214                    src->motion_subsample_log2, dst->motion_subsample_log2);
215
216         memcpy(dst->mb_type, src->mb_type,
217                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
218
219         for (i = 0; i < 2; i++) {
220             int stride = ((16 * s->mb_width ) >>
221                           src->motion_subsample_log2) + 1;
222             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
223
224             if (src->motion_val[i] &&
225                 src->motion_val[i] != dst->motion_val[i]) {
226                 memcpy(dst->motion_val[i], src->motion_val[i],
227                        2 * stride * height * sizeof(int16_t));
228             }
229             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
230                 memcpy(dst->ref_index[i], src->ref_index[i],
231                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
232             }
233         }
234     }
235 }
236
237 static void update_duplicate_context_after_me(MpegEncContext *dst,
238                                               MpegEncContext *src)
239 {
240 #define COPY(a) dst->a= src->a
241     COPY(pict_type);
242     COPY(current_picture);
243     COPY(f_code);
244     COPY(b_code);
245     COPY(qscale);
246     COPY(lambda);
247     COPY(lambda2);
248     COPY(picture_in_gop_number);
249     COPY(gop_picture_number);
250     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
251     COPY(progressive_frame);    // FIXME don't set in encode_header
252     COPY(partitioned_frame);    // FIXME don't set in encode_header
253 #undef COPY
254 }
255
256 /**
257  * Set the given MpegEncContext to defaults for encoding.
258  * the changed fields will not depend upon the prior state of the MpegEncContext.
259  */
260 static void MPV_encode_defaults(MpegEncContext *s)
261 {
262     int i;
263     ff_MPV_common_defaults(s);
264
265     for (i = -16; i < 16; i++) {
266         default_fcode_tab[i + MAX_MV] = 1;
267     }
268     s->me.mv_penalty = default_mv_penalty;
269     s->fcode_tab     = default_fcode_tab;
270 }
271
272 /* init video encoder */
273 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
274 {
275     MpegEncContext *s = avctx->priv_data;
276     int i;
277     int chroma_h_shift, chroma_v_shift;
278
279     MPV_encode_defaults(s);
280
281     switch (avctx->codec_id) {
282     case AV_CODEC_ID_MPEG2VIDEO:
283         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
284             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
285             av_log(avctx, AV_LOG_ERROR,
286                    "only YUV420 and YUV422 are supported\n");
287             return -1;
288         }
289         break;
290     case AV_CODEC_ID_LJPEG:
291         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
292             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
293             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
294             avctx->pix_fmt != AV_PIX_FMT_BGRA     &&
295             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
296               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
297               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
298              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
299             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
300             return -1;
301         }
302         break;
303     case AV_CODEC_ID_MJPEG:
304         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
305             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
306             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
307               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
308              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
309             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
310             return -1;
311         }
312         break;
313     default:
314         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
315             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
316             return -1;
317         }
318     }
319
320     switch (avctx->pix_fmt) {
321     case AV_PIX_FMT_YUVJ422P:
322     case AV_PIX_FMT_YUV422P:
323         s->chroma_format = CHROMA_422;
324         break;
325     case AV_PIX_FMT_YUVJ420P:
326     case AV_PIX_FMT_YUV420P:
327     default:
328         s->chroma_format = CHROMA_420;
329         break;
330     }
331
332     s->bit_rate = avctx->bit_rate;
333     s->width    = avctx->width;
334     s->height   = avctx->height;
335     if (avctx->gop_size > 600 &&
336         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
337         av_log(avctx, AV_LOG_ERROR,
338                "Warning keyframe interval too large! reducing it ...\n");
339         avctx->gop_size = 600;
340     }
341     s->gop_size     = avctx->gop_size;
342     s->avctx        = avctx;
343     s->flags        = avctx->flags;
344     s->flags2       = avctx->flags2;
345     s->max_b_frames = avctx->max_b_frames;
346     s->codec_id     = avctx->codec->id;
347 #if FF_API_MPV_GLOBAL_OPTS
348     if (avctx->luma_elim_threshold)
349         s->luma_elim_threshold   = avctx->luma_elim_threshold;
350     if (avctx->chroma_elim_threshold)
351         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
352 #endif
353     s->strict_std_compliance = avctx->strict_std_compliance;
354     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
355     s->mpeg_quant         = avctx->mpeg_quant;
356     s->rtp_mode           = !!avctx->rtp_payload_size;
357     s->intra_dc_precision = avctx->intra_dc_precision;
358     s->user_specified_pts = AV_NOPTS_VALUE;
359
360     if (s->gop_size <= 1) {
361         s->intra_only = 1;
362         s->gop_size   = 12;
363     } else {
364         s->intra_only = 0;
365     }
366
367     s->me_method = avctx->me_method;
368
369     /* Fixed QSCALE */
370     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
371
372 #if FF_API_MPV_GLOBAL_OPTS
373     if (s->flags & CODEC_FLAG_QP_RD)
374         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
375 #endif
376
377     s->adaptive_quant = (s->avctx->lumi_masking ||
378                          s->avctx->dark_masking ||
379                          s->avctx->temporal_cplx_masking ||
380                          s->avctx->spatial_cplx_masking  ||
381                          s->avctx->p_masking      ||
382                          s->avctx->border_masking ||
383                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
384                         !s->fixed_qscale;
385
386     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
387
388     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
389         av_log(avctx, AV_LOG_ERROR,
390                "a vbv buffer size is needed, "
391                "for encoding with a maximum bitrate\n");
392         return -1;
393     }
394
395     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
396         av_log(avctx, AV_LOG_INFO,
397                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
398     }
399
400     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
401         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
402         return -1;
403     }
404
405     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
406         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
407         return -1;
408     }
409
410     if (avctx->rc_max_rate &&
411         avctx->rc_max_rate == avctx->bit_rate &&
412         avctx->rc_max_rate != avctx->rc_min_rate) {
413         av_log(avctx, AV_LOG_INFO,
414                "impossible bitrate constraints, this will fail\n");
415     }
416
417     if (avctx->rc_buffer_size &&
418         avctx->bit_rate * (int64_t)avctx->time_base.num >
419             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
420         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
421         return -1;
422     }
423
424     if (!s->fixed_qscale &&
425         avctx->bit_rate * av_q2d(avctx->time_base) >
426             avctx->bit_rate_tolerance) {
427         av_log(avctx, AV_LOG_ERROR,
428                "bitrate tolerance too small for bitrate\n");
429         return -1;
430     }
431
432     if (s->avctx->rc_max_rate &&
433         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
434         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
435          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
436         90000LL * (avctx->rc_buffer_size - 1) >
437             s->avctx->rc_max_rate * 0xFFFFLL) {
438         av_log(avctx, AV_LOG_INFO,
439                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
440                "specified vbv buffer is too large for the given bitrate!\n");
441     }
442
443     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
444         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
445         s->codec_id != AV_CODEC_ID_FLV1) {
446         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
447         return -1;
448     }
449
450     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
451         av_log(avctx, AV_LOG_ERROR,
452                "OBMC is only supported with simple mb decision\n");
453         return -1;
454     }
455
456     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
457         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
458         return -1;
459     }
460
461     if (s->max_b_frames                    &&
462         s->codec_id != AV_CODEC_ID_MPEG4      &&
463         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
464         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
465         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
466         return -1;
467     }
468
469     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
470          s->codec_id == AV_CODEC_ID_H263  ||
471          s->codec_id == AV_CODEC_ID_H263P) &&
472         (avctx->sample_aspect_ratio.num > 255 ||
473          avctx->sample_aspect_ratio.den > 255)) {
474         av_log(avctx, AV_LOG_ERROR,
475                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
476                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
477         return -1;
478     }
479
480     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
481         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
482         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
483         return -1;
484     }
485
486     // FIXME mpeg2 uses that too
487     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
488         av_log(avctx, AV_LOG_ERROR,
489                "mpeg2 style quantization not supported by codec\n");
490         return -1;
491     }
492
493 #if FF_API_MPV_GLOBAL_OPTS
494     if (s->flags & CODEC_FLAG_CBP_RD)
495         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
496 #endif
497
498     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
499         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
500         return -1;
501     }
502
503     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
504         s->avctx->mb_decision != FF_MB_DECISION_RD) {
505         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
506         return -1;
507     }
508
509     if (s->avctx->scenechange_threshold < 1000000000 &&
510         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
511         av_log(avctx, AV_LOG_ERROR,
512                "closed gop with scene change detection are not supported yet, "
513                "set threshold to 1000000000\n");
514         return -1;
515     }
516
517     if (s->flags & CODEC_FLAG_LOW_DELAY) {
518         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
519             av_log(avctx, AV_LOG_ERROR,
520                   "low delay forcing is only available for mpeg2\n");
521             return -1;
522         }
523         if (s->max_b_frames != 0) {
524             av_log(avctx, AV_LOG_ERROR,
525                    "b frames cannot be used with low delay\n");
526             return -1;
527         }
528     }
529
530     if (s->q_scale_type == 1) {
531         if (avctx->qmax > 12) {
532             av_log(avctx, AV_LOG_ERROR,
533                    "non linear quant only supports qmax <= 12 currently\n");
534             return -1;
535         }
536     }
537
538     if (s->avctx->thread_count > 1         &&
539         s->codec_id != AV_CODEC_ID_MPEG4      &&
540         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
541         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
542         (s->codec_id != AV_CODEC_ID_H263P)) {
543         av_log(avctx, AV_LOG_ERROR,
544                "multi threaded encoding not supported by codec\n");
545         return -1;
546     }
547
548     if (s->avctx->thread_count < 1) {
549         av_log(avctx, AV_LOG_ERROR,
550                "automatic thread number detection not supported by codec,"
551                "patch welcome\n");
552         return -1;
553     }
554
555     if (s->avctx->thread_count > 1)
556         s->rtp_mode = 1;
557
558     if (!avctx->time_base.den || !avctx->time_base.num) {
559         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
560         return -1;
561     }
562
563     i = (INT_MAX / 2 + 128) >> 8;
564     if (avctx->me_threshold >= i) {
565         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
566                i - 1);
567         return -1;
568     }
569     if (avctx->mb_threshold >= i) {
570         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
571                i - 1);
572         return -1;
573     }
574
575     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
576         av_log(avctx, AV_LOG_INFO,
577                "notice: b_frame_strategy only affects the first pass\n");
578         avctx->b_frame_strategy = 0;
579     }
580
581     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
582     if (i > 1) {
583         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
584         avctx->time_base.den /= i;
585         avctx->time_base.num /= i;
586         //return -1;
587     }
588
589     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
590         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
591         // (a + x * 3 / 8) / x
592         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
593         s->inter_quant_bias = 0;
594     } else {
595         s->intra_quant_bias = 0;
596         // (a - x / 4) / x
597         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
598     }
599
600     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
601         s->intra_quant_bias = avctx->intra_quant_bias;
602     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
603         s->inter_quant_bias = avctx->inter_quant_bias;
604
605     av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
606                                      &chroma_v_shift);
607
608     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
609         s->avctx->time_base.den > (1 << 16) - 1) {
610         av_log(avctx, AV_LOG_ERROR,
611                "timebase %d/%d not supported by MPEG 4 standard, "
612                "the maximum admitted value for the timebase denominator "
613                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
614                (1 << 16) - 1);
615         return -1;
616     }
617     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
618
619 #if FF_API_MPV_GLOBAL_OPTS
620     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
621         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
622     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
623         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
624     if (avctx->quantizer_noise_shaping)
625         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
626 #endif
627
628     switch (avctx->codec->id) {
629     case AV_CODEC_ID_MPEG1VIDEO:
630         s->out_format = FMT_MPEG1;
631         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
632         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
633         break;
634     case AV_CODEC_ID_MPEG2VIDEO:
635         s->out_format = FMT_MPEG1;
636         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
637         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
638         s->rtp_mode   = 1;
639         break;
640     case AV_CODEC_ID_LJPEG:
641     case AV_CODEC_ID_MJPEG:
642         s->out_format = FMT_MJPEG;
643         s->intra_only = 1; /* force intra only for jpeg */
644         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
645             avctx->pix_fmt   == AV_PIX_FMT_BGRA) {
646             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
647             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
648             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
649         } else {
650             s->mjpeg_vsample[0] = 2;
651             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
652             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
653             s->mjpeg_hsample[0] = 2;
654             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
655             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
656         }
657         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
658             ff_mjpeg_encode_init(s) < 0)
659             return -1;
660         avctx->delay = 0;
661         s->low_delay = 1;
662         break;
663     case AV_CODEC_ID_H261:
664         if (!CONFIG_H261_ENCODER)
665             return -1;
666         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
667             av_log(avctx, AV_LOG_ERROR,
668                    "The specified picture size of %dx%d is not valid for the "
669                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
670                     s->width, s->height);
671             return -1;
672         }
673         s->out_format = FMT_H261;
674         avctx->delay  = 0;
675         s->low_delay  = 1;
676         break;
677     case AV_CODEC_ID_H263:
678         if (!CONFIG_H263_ENCODER)
679         return -1;
680         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
681                              s->width, s->height) == 8) {
682             av_log(avctx, AV_LOG_INFO,
683                    "The specified picture size of %dx%d is not valid for "
684                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
685                    "352x288, 704x576, and 1408x1152."
686                    "Try H.263+.\n", s->width, s->height);
687             return -1;
688         }
689         s->out_format = FMT_H263;
690         avctx->delay  = 0;
691         s->low_delay  = 1;
692         break;
693     case AV_CODEC_ID_H263P:
694         s->out_format = FMT_H263;
695         s->h263_plus  = 1;
696         /* Fx */
697         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
698         s->modified_quant  = s->h263_aic;
699         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
700         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
701
702         /* /Fx */
703         /* These are just to be sure */
704         avctx->delay = 0;
705         s->low_delay = 1;
706         break;
707     case AV_CODEC_ID_FLV1:
708         s->out_format      = FMT_H263;
709         s->h263_flv        = 2; /* format = 1; 11-bit codes */
710         s->unrestricted_mv = 1;
711         s->rtp_mode  = 0; /* don't allow GOB */
712         avctx->delay = 0;
713         s->low_delay = 1;
714         break;
715     case AV_CODEC_ID_RV10:
716         s->out_format = FMT_H263;
717         avctx->delay  = 0;
718         s->low_delay  = 1;
719         break;
720     case AV_CODEC_ID_RV20:
721         s->out_format      = FMT_H263;
722         avctx->delay       = 0;
723         s->low_delay       = 1;
724         s->modified_quant  = 1;
725         s->h263_aic        = 1;
726         s->h263_plus       = 1;
727         s->loop_filter     = 1;
728         s->unrestricted_mv = 0;
729         break;
730     case AV_CODEC_ID_MPEG4:
731         s->out_format      = FMT_H263;
732         s->h263_pred       = 1;
733         s->unrestricted_mv = 1;
734         s->low_delay       = s->max_b_frames ? 0 : 1;
735         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
736         break;
737     case AV_CODEC_ID_MSMPEG4V2:
738         s->out_format      = FMT_H263;
739         s->h263_pred       = 1;
740         s->unrestricted_mv = 1;
741         s->msmpeg4_version = 2;
742         avctx->delay       = 0;
743         s->low_delay       = 1;
744         break;
745     case AV_CODEC_ID_MSMPEG4V3:
746         s->out_format        = FMT_H263;
747         s->h263_pred         = 1;
748         s->unrestricted_mv   = 1;
749         s->msmpeg4_version   = 3;
750         s->flipflop_rounding = 1;
751         avctx->delay         = 0;
752         s->low_delay         = 1;
753         break;
754     case AV_CODEC_ID_WMV1:
755         s->out_format        = FMT_H263;
756         s->h263_pred         = 1;
757         s->unrestricted_mv   = 1;
758         s->msmpeg4_version   = 4;
759         s->flipflop_rounding = 1;
760         avctx->delay         = 0;
761         s->low_delay         = 1;
762         break;
763     case AV_CODEC_ID_WMV2:
764         s->out_format        = FMT_H263;
765         s->h263_pred         = 1;
766         s->unrestricted_mv   = 1;
767         s->msmpeg4_version   = 5;
768         s->flipflop_rounding = 1;
769         avctx->delay         = 0;
770         s->low_delay         = 1;
771         break;
772     default:
773         return -1;
774     }
775
776     avctx->has_b_frames = !s->low_delay;
777
778     s->encoding = 1;
779
780     s->progressive_frame    =
781     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
782                                                 CODEC_FLAG_INTERLACED_ME) ||
783                                 s->alternate_scan);
784
785     /* init */
786     if (ff_MPV_common_init(s) < 0)
787         return -1;
788
789     if (ARCH_X86)
790         ff_MPV_encode_init_x86(s);
791
792     if (!s->dct_quantize)
793         s->dct_quantize = ff_dct_quantize_c;
794     if (!s->denoise_dct)
795         s->denoise_dct  = denoise_dct_c;
796     s->fast_dct_quantize = s->dct_quantize;
797     if (avctx->trellis)
798         s->dct_quantize  = dct_quantize_trellis_c;
799
800     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
801         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
802
803     s->quant_precision = 5;
804
805     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
806     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
807
808     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
809         ff_h261_encode_init(s);
810     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
811         ff_h263_encode_init(s);
812     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
813         ff_msmpeg4_encode_init(s);
814     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
815         && s->out_format == FMT_MPEG1)
816         ff_mpeg1_encode_init(s);
817
818     /* init q matrix */
819     for (i = 0; i < 64; i++) {
820         int j = s->dsp.idct_permutation[i];
821         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
822             s->mpeg_quant) {
823             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
824             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
825         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
826             s->intra_matrix[j] =
827             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
828         } else {
829             /* mpeg1/2 */
830             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
831             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
832         }
833         if (s->avctx->intra_matrix)
834             s->intra_matrix[j] = s->avctx->intra_matrix[i];
835         if (s->avctx->inter_matrix)
836             s->inter_matrix[j] = s->avctx->inter_matrix[i];
837     }
838
839     /* precompute matrix */
840     /* for mjpeg, we do include qscale in the matrix */
841     if (s->out_format != FMT_MJPEG) {
842         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
843                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
844                           31, 1);
845         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
846                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
847                           31, 0);
848     }
849
850     if (ff_rate_control_init(s) < 0)
851         return -1;
852
853     return 0;
854 }
855
856 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
857 {
858     MpegEncContext *s = avctx->priv_data;
859
860     ff_rate_control_uninit(s);
861
862     ff_MPV_common_end(s);
863     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
864         s->out_format == FMT_MJPEG)
865         ff_mjpeg_encode_close(s);
866
867     av_freep(&avctx->extradata);
868
869     return 0;
870 }
871
872 static int get_sae(uint8_t *src, int ref, int stride)
873 {
874     int x,y;
875     int acc = 0;
876
877     for (y = 0; y < 16; y++) {
878         for (x = 0; x < 16; x++) {
879             acc += FFABS(src[x + y * stride] - ref);
880         }
881     }
882
883     return acc;
884 }
885
886 static int get_intra_count(MpegEncContext *s, uint8_t *src,
887                            uint8_t *ref, int stride)
888 {
889     int x, y, w, h;
890     int acc = 0;
891
892     w = s->width  & ~15;
893     h = s->height & ~15;
894
895     for (y = 0; y < h; y += 16) {
896         for (x = 0; x < w; x += 16) {
897             int offset = x + y * stride;
898             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
899                                      16);
900             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
901             int sae  = get_sae(src + offset, mean, stride);
902
903             acc += sae + 500 < sad;
904         }
905     }
906     return acc;
907 }
908
909
910 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
911 {
912     AVFrame *pic = NULL;
913     int64_t pts;
914     int i;
915     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
916                                                  (s->low_delay ? 0 : 1);
917     int direct = 1;
918
919     if (pic_arg) {
920         pts = pic_arg->pts;
921         pic_arg->display_picture_number = s->input_picture_number++;
922
923         if (pts != AV_NOPTS_VALUE) {
924             if (s->user_specified_pts != AV_NOPTS_VALUE) {
925                 int64_t time = pts;
926                 int64_t last = s->user_specified_pts;
927
928                 if (time <= last) {
929                     av_log(s->avctx, AV_LOG_ERROR,
930                            "Error, Invalid timestamp=%"PRId64", "
931                            "last=%"PRId64"\n", pts, s->user_specified_pts);
932                     return -1;
933                 }
934
935                 if (!s->low_delay && pic_arg->display_picture_number == 1)
936                     s->dts_delta = time - last;
937             }
938             s->user_specified_pts = pts;
939         } else {
940             if (s->user_specified_pts != AV_NOPTS_VALUE) {
941                 s->user_specified_pts =
942                 pts = s->user_specified_pts + 1;
943                 av_log(s->avctx, AV_LOG_INFO,
944                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
945                        pts);
946             } else {
947                 pts = pic_arg->display_picture_number;
948             }
949         }
950     }
951
952   if (pic_arg) {
953     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
954         direct = 0;
955     if (pic_arg->linesize[0] != s->linesize)
956         direct = 0;
957     if (pic_arg->linesize[1] != s->uvlinesize)
958         direct = 0;
959     if (pic_arg->linesize[2] != s->uvlinesize)
960         direct = 0;
961
962     av_dlog(s->avctx, "%d %d %d %d\n", pic_arg->linesize[0],
963             pic_arg->linesize[1], s->linesize, s->uvlinesize);
964
965     if (direct) {
966         i = ff_find_unused_picture(s, 1);
967         if (i < 0)
968             return i;
969
970         pic = &s->picture[i].f;
971         pic->reference = 3;
972
973         for (i = 0; i < 4; i++) {
974             pic->data[i]     = pic_arg->data[i];
975             pic->linesize[i] = pic_arg->linesize[i];
976         }
977         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
978             return -1;
979         }
980     } else {
981         i = ff_find_unused_picture(s, 0);
982         if (i < 0)
983             return i;
984
985         pic = &s->picture[i].f;
986         pic->reference = 3;
987
988         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
989             return -1;
990         }
991
992         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
993             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
994             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
995             // empty
996         } else {
997             int h_chroma_shift, v_chroma_shift;
998             av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
999                                              &h_chroma_shift,
1000                                              &v_chroma_shift);
1001
1002             for (i = 0; i < 3; i++) {
1003                 int src_stride = pic_arg->linesize[i];
1004                 int dst_stride = i ? s->uvlinesize : s->linesize;
1005                 int h_shift = i ? h_chroma_shift : 0;
1006                 int v_shift = i ? v_chroma_shift : 0;
1007                 int w = s->width  >> h_shift;
1008                 int h = s->height >> v_shift;
1009                 uint8_t *src = pic_arg->data[i];
1010                 uint8_t *dst = pic->data[i];
1011
1012                 if (!s->avctx->rc_buffer_size)
1013                     dst += INPLACE_OFFSET;
1014
1015                 if (src_stride == dst_stride)
1016                     memcpy(dst, src, src_stride * h);
1017                 else {
1018                     while (h--) {
1019                         memcpy(dst, src, w);
1020                         dst += dst_stride;
1021                         src += src_stride;
1022                     }
1023                 }
1024             }
1025         }
1026     }
1027     copy_picture_attributes(s, pic, pic_arg);
1028     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1029   }
1030
1031     /* shift buffer entries */
1032     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1033         s->input_picture[i - 1] = s->input_picture[i];
1034
1035     s->input_picture[encoding_delay] = (Picture*) pic;
1036
1037     return 0;
1038 }
1039
1040 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1041 {
1042     int x, y, plane;
1043     int score = 0;
1044     int64_t score64 = 0;
1045
1046     for (plane = 0; plane < 3; plane++) {
1047         const int stride = p->f.linesize[plane];
1048         const int bw = plane ? 1 : 2;
1049         for (y = 0; y < s->mb_height * bw; y++) {
1050             for (x = 0; x < s->mb_width * bw; x++) {
1051                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1052                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1053                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1054                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1055
1056                 switch (s->avctx->frame_skip_exp) {
1057                 case 0: score    =  FFMAX(score, v);          break;
1058                 case 1: score   += FFABS(v);                  break;
1059                 case 2: score   += v * v;                     break;
1060                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1061                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1062                 }
1063             }
1064         }
1065     }
1066
1067     if (score)
1068         score64 = score;
1069
1070     if (score64 < s->avctx->frame_skip_threshold)
1071         return 1;
1072     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1073         return 1;
1074     return 0;
1075 }
1076
1077 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1078 {
1079     AVPacket pkt = { 0 };
1080     int ret, got_output;
1081
1082     av_init_packet(&pkt);
1083     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1084     if (ret < 0)
1085         return ret;
1086
1087     ret = pkt.size;
1088     av_free_packet(&pkt);
1089     return ret;
1090 }
1091
1092 static int estimate_best_b_count(MpegEncContext *s)
1093 {
1094     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1095     AVCodecContext *c = avcodec_alloc_context3(NULL);
1096     AVFrame input[FF_MAX_B_FRAMES + 2];
1097     const int scale = s->avctx->brd_scale;
1098     int i, j, out_size, p_lambda, b_lambda, lambda2;
1099     int64_t best_rd  = INT64_MAX;
1100     int best_b_count = -1;
1101
1102     assert(scale >= 0 && scale <= 3);
1103
1104     //emms_c();
1105     //s->next_picture_ptr->quality;
1106     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1107     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1108     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1109     if (!b_lambda) // FIXME we should do this somewhere else
1110         b_lambda = p_lambda;
1111     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1112                FF_LAMBDA_SHIFT;
1113
1114     c->width        = s->width  >> scale;
1115     c->height       = s->height >> scale;
1116     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1117                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1118     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1119     c->mb_decision  = s->avctx->mb_decision;
1120     c->me_cmp       = s->avctx->me_cmp;
1121     c->mb_cmp       = s->avctx->mb_cmp;
1122     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1123     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1124     c->time_base    = s->avctx->time_base;
1125     c->max_b_frames = s->max_b_frames;
1126
1127     if (avcodec_open2(c, codec, NULL) < 0)
1128         return -1;
1129
1130     for (i = 0; i < s->max_b_frames + 2; i++) {
1131         int ysize = c->width * c->height;
1132         int csize = (c->width / 2) * (c->height / 2);
1133         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1134                                                 s->next_picture_ptr;
1135
1136         avcodec_get_frame_defaults(&input[i]);
1137         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1138         input[i].data[1]     = input[i].data[0] + ysize;
1139         input[i].data[2]     = input[i].data[1] + csize;
1140         input[i].linesize[0] = c->width;
1141         input[i].linesize[1] =
1142         input[i].linesize[2] = c->width / 2;
1143
1144         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1145             pre_input = *pre_input_ptr;
1146
1147             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1148                 pre_input.f.data[0] += INPLACE_OFFSET;
1149                 pre_input.f.data[1] += INPLACE_OFFSET;
1150                 pre_input.f.data[2] += INPLACE_OFFSET;
1151             }
1152
1153             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1154                                  pre_input.f.data[0], pre_input.f.linesize[0],
1155                                  c->width,      c->height);
1156             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1157                                  pre_input.f.data[1], pre_input.f.linesize[1],
1158                                  c->width >> 1, c->height >> 1);
1159             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1160                                  pre_input.f.data[2], pre_input.f.linesize[2],
1161                                  c->width >> 1, c->height >> 1);
1162         }
1163     }
1164
1165     for (j = 0; j < s->max_b_frames + 1; j++) {
1166         int64_t rd = 0;
1167
1168         if (!s->input_picture[j])
1169             break;
1170
1171         c->error[0] = c->error[1] = c->error[2] = 0;
1172
1173         input[0].pict_type = AV_PICTURE_TYPE_I;
1174         input[0].quality   = 1 * FF_QP2LAMBDA;
1175
1176         out_size = encode_frame(c, &input[0]);
1177
1178         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1179
1180         for (i = 0; i < s->max_b_frames + 1; i++) {
1181             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1182
1183             input[i + 1].pict_type = is_p ?
1184                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1185             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1186
1187             out_size = encode_frame(c, &input[i + 1]);
1188
1189             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1190         }
1191
1192         /* get the delayed frames */
1193         while (out_size) {
1194             out_size = encode_frame(c, NULL);
1195             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1196         }
1197
1198         rd += c->error[0] + c->error[1] + c->error[2];
1199
1200         if (rd < best_rd) {
1201             best_rd = rd;
1202             best_b_count = j;
1203         }
1204     }
1205
1206     avcodec_close(c);
1207     av_freep(&c);
1208
1209     for (i = 0; i < s->max_b_frames + 2; i++) {
1210         av_freep(&input[i].data[0]);
1211     }
1212
1213     return best_b_count;
1214 }
1215
1216 static int select_input_picture(MpegEncContext *s)
1217 {
1218     int i;
1219
1220     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1221         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1222     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1223
1224     /* set next picture type & ordering */
1225     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1226         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1227             s->next_picture_ptr == NULL || s->intra_only) {
1228             s->reordered_input_picture[0] = s->input_picture[0];
1229             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1230             s->reordered_input_picture[0]->f.coded_picture_number =
1231                 s->coded_picture_number++;
1232         } else {
1233             int b_frames;
1234
1235             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1236                 if (s->picture_in_gop_number < s->gop_size &&
1237                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1238                     // FIXME check that te gop check above is +-1 correct
1239                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1240                         for (i = 0; i < 4; i++)
1241                             s->input_picture[0]->f.data[i] = NULL;
1242                         s->input_picture[0]->f.type = 0;
1243                     } else {
1244                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1245                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1246
1247                         s->avctx->release_buffer(s->avctx,
1248                                                  &s->input_picture[0]->f);
1249                     }
1250
1251                     emms_c();
1252                     ff_vbv_update(s, 0);
1253
1254                     goto no_output_pic;
1255                 }
1256             }
1257
1258             if (s->flags & CODEC_FLAG_PASS2) {
1259                 for (i = 0; i < s->max_b_frames + 1; i++) {
1260                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1261
1262                     if (pict_num >= s->rc_context.num_entries)
1263                         break;
1264                     if (!s->input_picture[i]) {
1265                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1266                         break;
1267                     }
1268
1269                     s->input_picture[i]->f.pict_type =
1270                         s->rc_context.entry[pict_num].new_pict_type;
1271                 }
1272             }
1273
1274             if (s->avctx->b_frame_strategy == 0) {
1275                 b_frames = s->max_b_frames;
1276                 while (b_frames && !s->input_picture[b_frames])
1277                     b_frames--;
1278             } else if (s->avctx->b_frame_strategy == 1) {
1279                 for (i = 1; i < s->max_b_frames + 1; i++) {
1280                     if (s->input_picture[i] &&
1281                         s->input_picture[i]->b_frame_score == 0) {
1282                         s->input_picture[i]->b_frame_score =
1283                             get_intra_count(s,
1284                                             s->input_picture[i    ]->f.data[0],
1285                                             s->input_picture[i - 1]->f.data[0],
1286                                             s->linesize) + 1;
1287                     }
1288                 }
1289                 for (i = 0; i < s->max_b_frames + 1; i++) {
1290                     if (s->input_picture[i] == NULL ||
1291                         s->input_picture[i]->b_frame_score - 1 >
1292                             s->mb_num / s->avctx->b_sensitivity)
1293                         break;
1294                 }
1295
1296                 b_frames = FFMAX(0, i - 1);
1297
1298                 /* reset scores */
1299                 for (i = 0; i < b_frames + 1; i++) {
1300                     s->input_picture[i]->b_frame_score = 0;
1301                 }
1302             } else if (s->avctx->b_frame_strategy == 2) {
1303                 b_frames = estimate_best_b_count(s);
1304             } else {
1305                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1306                 b_frames = 0;
1307             }
1308
1309             emms_c();
1310
1311             for (i = b_frames - 1; i >= 0; i--) {
1312                 int type = s->input_picture[i]->f.pict_type;
1313                 if (type && type != AV_PICTURE_TYPE_B)
1314                     b_frames = i;
1315             }
1316             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1317                 b_frames == s->max_b_frames) {
1318                 av_log(s->avctx, AV_LOG_ERROR,
1319                        "warning, too many b frames in a row\n");
1320             }
1321
1322             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1323                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1324                     s->gop_size > s->picture_in_gop_number) {
1325                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1326                 } else {
1327                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1328                         b_frames = 0;
1329                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1330                 }
1331             }
1332
1333             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1334                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1335                 b_frames--;
1336
1337             s->reordered_input_picture[0] = s->input_picture[b_frames];
1338             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1339                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1340             s->reordered_input_picture[0]->f.coded_picture_number =
1341                 s->coded_picture_number++;
1342             for (i = 0; i < b_frames; i++) {
1343                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1344                 s->reordered_input_picture[i + 1]->f.pict_type =
1345                     AV_PICTURE_TYPE_B;
1346                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1347                     s->coded_picture_number++;
1348             }
1349         }
1350     }
1351 no_output_pic:
1352     if (s->reordered_input_picture[0]) {
1353         s->reordered_input_picture[0]->f.reference =
1354            s->reordered_input_picture[0]->f.pict_type !=
1355                AV_PICTURE_TYPE_B ? 3 : 0;
1356
1357         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1358
1359         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1360             s->avctx->rc_buffer_size) {
1361             // input is a shared pix, so we can't modifiy it -> alloc a new
1362             // one & ensure that the shared one is reuseable
1363
1364             Picture *pic;
1365             int i = ff_find_unused_picture(s, 0);
1366             if (i < 0)
1367                 return i;
1368             pic = &s->picture[i];
1369
1370             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1371             if (ff_alloc_picture(s, pic, 0) < 0) {
1372                 return -1;
1373             }
1374
1375             /* mark us unused / free shared pic */
1376             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1377                 s->avctx->release_buffer(s->avctx,
1378                                          &s->reordered_input_picture[0]->f);
1379             for (i = 0; i < 4; i++)
1380                 s->reordered_input_picture[0]->f.data[i] = NULL;
1381             s->reordered_input_picture[0]->f.type = 0;
1382
1383             copy_picture_attributes(s, &pic->f,
1384                                     &s->reordered_input_picture[0]->f);
1385
1386             s->current_picture_ptr = pic;
1387         } else {
1388             // input is not a shared pix -> reuse buffer for current_pix
1389
1390             assert(s->reordered_input_picture[0]->f.type ==
1391                        FF_BUFFER_TYPE_USER ||
1392                    s->reordered_input_picture[0]->f.type ==
1393                        FF_BUFFER_TYPE_INTERNAL);
1394
1395             s->current_picture_ptr = s->reordered_input_picture[0];
1396             for (i = 0; i < 4; i++) {
1397                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1398             }
1399         }
1400         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1401
1402         s->picture_number = s->new_picture.f.display_picture_number;
1403     } else {
1404         memset(&s->new_picture, 0, sizeof(Picture));
1405     }
1406     return 0;
1407 }
1408
1409 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1410                           const AVFrame *pic_arg, int *got_packet)
1411 {
1412     MpegEncContext *s = avctx->priv_data;
1413     int i, stuffing_count, ret;
1414     int context_count = s->slice_context_count;
1415
1416     s->picture_in_gop_number++;
1417
1418     if (load_input_picture(s, pic_arg) < 0)
1419         return -1;
1420
1421     if (select_input_picture(s) < 0) {
1422         return -1;
1423     }
1424
1425     /* output? */
1426     if (s->new_picture.f.data[0]) {
1427         if (!pkt->data &&
1428             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1429             return ret;
1430         if (s->mb_info) {
1431             s->mb_info_ptr = av_packet_new_side_data(pkt,
1432                                  AV_PKT_DATA_H263_MB_INFO,
1433                                  s->mb_width*s->mb_height*12);
1434             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1435         }
1436
1437         for (i = 0; i < context_count; i++) {
1438             int start_y = s->thread_context[i]->start_mb_y;
1439             int   end_y = s->thread_context[i]->  end_mb_y;
1440             int h       = s->mb_height;
1441             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1442             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1443
1444             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1445         }
1446
1447         s->pict_type = s->new_picture.f.pict_type;
1448         //emms_c();
1449         ff_MPV_frame_start(s, avctx);
1450 vbv_retry:
1451         if (encode_picture(s, s->picture_number) < 0)
1452             return -1;
1453
1454         avctx->header_bits = s->header_bits;
1455         avctx->mv_bits     = s->mv_bits;
1456         avctx->misc_bits   = s->misc_bits;
1457         avctx->i_tex_bits  = s->i_tex_bits;
1458         avctx->p_tex_bits  = s->p_tex_bits;
1459         avctx->i_count     = s->i_count;
1460         // FIXME f/b_count in avctx
1461         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1462         avctx->skip_count  = s->skip_count;
1463
1464         ff_MPV_frame_end(s);
1465
1466         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1467             ff_mjpeg_encode_picture_trailer(s);
1468
1469         if (avctx->rc_buffer_size) {
1470             RateControlContext *rcc = &s->rc_context;
1471             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1472
1473             if (put_bits_count(&s->pb) > max_size &&
1474                 s->lambda < s->avctx->lmax) {
1475                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1476                                        (s->qscale + 1) / s->qscale);
1477                 if (s->adaptive_quant) {
1478                     int i;
1479                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1480                         s->lambda_table[i] =
1481                             FFMAX(s->lambda_table[i] + 1,
1482                                   s->lambda_table[i] * (s->qscale + 1) /
1483                                   s->qscale);
1484                 }
1485                 s->mb_skipped = 0;        // done in MPV_frame_start()
1486                 // done in encode_picture() so we must undo it
1487                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1488                     if (s->flipflop_rounding          ||
1489                         s->codec_id == AV_CODEC_ID_H263P ||
1490                         s->codec_id == AV_CODEC_ID_MPEG4)
1491                         s->no_rounding ^= 1;
1492                 }
1493                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1494                     s->time_base       = s->last_time_base;
1495                     s->last_non_b_time = s->time - s->pp_time;
1496                 }
1497                 for (i = 0; i < context_count; i++) {
1498                     PutBitContext *pb = &s->thread_context[i]->pb;
1499                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1500                 }
1501                 goto vbv_retry;
1502             }
1503
1504             assert(s->avctx->rc_max_rate);
1505         }
1506
1507         if (s->flags & CODEC_FLAG_PASS1)
1508             ff_write_pass1_stats(s);
1509
1510         for (i = 0; i < 4; i++) {
1511             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1512             avctx->error[i] += s->current_picture_ptr->f.error[i];
1513         }
1514
1515         if (s->flags & CODEC_FLAG_PASS1)
1516             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1517                    avctx->i_tex_bits + avctx->p_tex_bits ==
1518                        put_bits_count(&s->pb));
1519         flush_put_bits(&s->pb);
1520         s->frame_bits  = put_bits_count(&s->pb);
1521
1522         stuffing_count = ff_vbv_update(s, s->frame_bits);
1523         if (stuffing_count) {
1524             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1525                     stuffing_count + 50) {
1526                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1527                 return -1;
1528             }
1529
1530             switch (s->codec_id) {
1531             case AV_CODEC_ID_MPEG1VIDEO:
1532             case AV_CODEC_ID_MPEG2VIDEO:
1533                 while (stuffing_count--) {
1534                     put_bits(&s->pb, 8, 0);
1535                 }
1536             break;
1537             case AV_CODEC_ID_MPEG4:
1538                 put_bits(&s->pb, 16, 0);
1539                 put_bits(&s->pb, 16, 0x1C3);
1540                 stuffing_count -= 4;
1541                 while (stuffing_count--) {
1542                     put_bits(&s->pb, 8, 0xFF);
1543                 }
1544             break;
1545             default:
1546                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1547             }
1548             flush_put_bits(&s->pb);
1549             s->frame_bits  = put_bits_count(&s->pb);
1550         }
1551
1552         /* update mpeg1/2 vbv_delay for CBR */
1553         if (s->avctx->rc_max_rate                          &&
1554             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1555             s->out_format == FMT_MPEG1                     &&
1556             90000LL * (avctx->rc_buffer_size - 1) <=
1557                 s->avctx->rc_max_rate * 0xFFFFLL) {
1558             int vbv_delay, min_delay;
1559             double inbits  = s->avctx->rc_max_rate *
1560                              av_q2d(s->avctx->time_base);
1561             int    minbits = s->frame_bits - 8 *
1562                              (s->vbv_delay_ptr - s->pb.buf - 1);
1563             double bits    = s->rc_context.buffer_index + minbits - inbits;
1564
1565             if (bits < 0)
1566                 av_log(s->avctx, AV_LOG_ERROR,
1567                        "Internal error, negative bits\n");
1568
1569             assert(s->repeat_first_field == 0);
1570
1571             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1572             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1573                         s->avctx->rc_max_rate;
1574
1575             vbv_delay = FFMAX(vbv_delay, min_delay);
1576
1577             assert(vbv_delay < 0xFFFF);
1578
1579             s->vbv_delay_ptr[0] &= 0xF8;
1580             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1581             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1582             s->vbv_delay_ptr[2] &= 0x07;
1583             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1584             avctx->vbv_delay     = vbv_delay * 300;
1585         }
1586         s->total_bits     += s->frame_bits;
1587         avctx->frame_bits  = s->frame_bits;
1588
1589         pkt->pts = s->current_picture.f.pts;
1590         if (!s->low_delay) {
1591             if (!s->current_picture.f.coded_picture_number)
1592                 pkt->dts = pkt->pts - s->dts_delta;
1593             else
1594                 pkt->dts = s->reordered_pts;
1595             s->reordered_pts = s->input_picture[0]->f.pts;
1596         } else
1597             pkt->dts = pkt->pts;
1598         if (s->current_picture.f.key_frame)
1599             pkt->flags |= AV_PKT_FLAG_KEY;
1600         if (s->mb_info)
1601             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1602     } else {
1603         s->frame_bits = 0;
1604     }
1605     assert((s->frame_bits & 7) == 0);
1606
1607     pkt->size = s->frame_bits / 8;
1608     *got_packet = !!pkt->size;
1609     return 0;
1610 }
1611
1612 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1613                                                 int n, int threshold)
1614 {
1615     static const char tab[64] = {
1616         3, 2, 2, 1, 1, 1, 1, 1,
1617         1, 1, 1, 1, 1, 1, 1, 1,
1618         1, 1, 1, 1, 1, 1, 1, 1,
1619         0, 0, 0, 0, 0, 0, 0, 0,
1620         0, 0, 0, 0, 0, 0, 0, 0,
1621         0, 0, 0, 0, 0, 0, 0, 0,
1622         0, 0, 0, 0, 0, 0, 0, 0,
1623         0, 0, 0, 0, 0, 0, 0, 0
1624     };
1625     int score = 0;
1626     int run = 0;
1627     int i;
1628     DCTELEM *block = s->block[n];
1629     const int last_index = s->block_last_index[n];
1630     int skip_dc;
1631
1632     if (threshold < 0) {
1633         skip_dc = 0;
1634         threshold = -threshold;
1635     } else
1636         skip_dc = 1;
1637
1638     /* Are all we could set to zero already zero? */
1639     if (last_index <= skip_dc - 1)
1640         return;
1641
1642     for (i = 0; i <= last_index; i++) {
1643         const int j = s->intra_scantable.permutated[i];
1644         const int level = FFABS(block[j]);
1645         if (level == 1) {
1646             if (skip_dc && i == 0)
1647                 continue;
1648             score += tab[run];
1649             run = 0;
1650         } else if (level > 1) {
1651             return;
1652         } else {
1653             run++;
1654         }
1655     }
1656     if (score >= threshold)
1657         return;
1658     for (i = skip_dc; i <= last_index; i++) {
1659         const int j = s->intra_scantable.permutated[i];
1660         block[j] = 0;
1661     }
1662     if (block[0])
1663         s->block_last_index[n] = 0;
1664     else
1665         s->block_last_index[n] = -1;
1666 }
1667
1668 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1669                                int last_index)
1670 {
1671     int i;
1672     const int maxlevel = s->max_qcoeff;
1673     const int minlevel = s->min_qcoeff;
1674     int overflow = 0;
1675
1676     if (s->mb_intra) {
1677         i = 1; // skip clipping of intra dc
1678     } else
1679         i = 0;
1680
1681     for (; i <= last_index; i++) {
1682         const int j = s->intra_scantable.permutated[i];
1683         int level = block[j];
1684
1685         if (level > maxlevel) {
1686             level = maxlevel;
1687             overflow++;
1688         } else if (level < minlevel) {
1689             level = minlevel;
1690             overflow++;
1691         }
1692
1693         block[j] = level;
1694     }
1695
1696     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1697         av_log(s->avctx, AV_LOG_INFO,
1698                "warning, clipping %d dct coefficients to %d..%d\n",
1699                overflow, minlevel, maxlevel);
1700 }
1701
1702 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1703 {
1704     int x, y;
1705     // FIXME optimize
1706     for (y = 0; y < 8; y++) {
1707         for (x = 0; x < 8; x++) {
1708             int x2, y2;
1709             int sum = 0;
1710             int sqr = 0;
1711             int count = 0;
1712
1713             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1714                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1715                     int v = ptr[x2 + y2 * stride];
1716                     sum += v;
1717                     sqr += v * v;
1718                     count++;
1719                 }
1720             }
1721             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1722         }
1723     }
1724 }
1725
1726 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1727                                                 int motion_x, int motion_y,
1728                                                 int mb_block_height,
1729                                                 int mb_block_count)
1730 {
1731     int16_t weight[8][64];
1732     DCTELEM orig[8][64];
1733     const int mb_x = s->mb_x;
1734     const int mb_y = s->mb_y;
1735     int i;
1736     int skip_dct[8];
1737     int dct_offset = s->linesize * 8; // default for progressive frames
1738     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1739     int wrap_y, wrap_c;
1740
1741     for (i = 0; i < mb_block_count; i++)
1742         skip_dct[i] = s->skipdct;
1743
1744     if (s->adaptive_quant) {
1745         const int last_qp = s->qscale;
1746         const int mb_xy = mb_x + mb_y * s->mb_stride;
1747
1748         s->lambda = s->lambda_table[mb_xy];
1749         update_qscale(s);
1750
1751         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1752             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1753             s->dquant = s->qscale - last_qp;
1754
1755             if (s->out_format == FMT_H263) {
1756                 s->dquant = av_clip(s->dquant, -2, 2);
1757
1758                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1759                     if (!s->mb_intra) {
1760                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1761                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1762                                 s->dquant = 0;
1763                         }
1764                         if (s->mv_type == MV_TYPE_8X8)
1765                             s->dquant = 0;
1766                     }
1767                 }
1768             }
1769         }
1770         ff_set_qscale(s, last_qp + s->dquant);
1771     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1772         ff_set_qscale(s, s->qscale + s->dquant);
1773
1774     wrap_y = s->linesize;
1775     wrap_c = s->uvlinesize;
1776     ptr_y  = s->new_picture.f.data[0] +
1777              (mb_y * 16 * wrap_y)              + mb_x * 16;
1778     ptr_cb = s->new_picture.f.data[1] +
1779              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1780     ptr_cr = s->new_picture.f.data[2] +
1781              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1782
1783     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1784         uint8_t *ebuf = s->edge_emu_buffer + 32;
1785         s->vdsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1786                                  mb_y * 16, s->width, s->height);
1787         ptr_y = ebuf;
1788         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1789                                  mb_block_height, mb_x * 8, mb_y * 8,
1790                                  s->width >> 1, s->height >> 1);
1791         ptr_cb = ebuf + 18 * wrap_y;
1792         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1793                                  mb_block_height, mb_x * 8, mb_y * 8,
1794                                  s->width >> 1, s->height >> 1);
1795         ptr_cr = ebuf + 18 * wrap_y + 8;
1796     }
1797
1798     if (s->mb_intra) {
1799         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1800             int progressive_score, interlaced_score;
1801
1802             s->interlaced_dct = 0;
1803             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1804                                                     NULL, wrap_y, 8) +
1805                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1806                                                     NULL, wrap_y, 8) - 400;
1807
1808             if (progressive_score > 0) {
1809                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1810                                                        NULL, wrap_y * 2, 8) +
1811                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1812                                                        NULL, wrap_y * 2, 8);
1813                 if (progressive_score > interlaced_score) {
1814                     s->interlaced_dct = 1;
1815
1816                     dct_offset = wrap_y;
1817                     wrap_y <<= 1;
1818                     if (s->chroma_format == CHROMA_422)
1819                         wrap_c <<= 1;
1820                 }
1821             }
1822         }
1823
1824         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1825         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1826         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1827         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1828
1829         if (s->flags & CODEC_FLAG_GRAY) {
1830             skip_dct[4] = 1;
1831             skip_dct[5] = 1;
1832         } else {
1833             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1834             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1835             if (!s->chroma_y_shift) { /* 422 */
1836                 s->dsp.get_pixels(s->block[6],
1837                                   ptr_cb + (dct_offset >> 1), wrap_c);
1838                 s->dsp.get_pixels(s->block[7],
1839                                   ptr_cr + (dct_offset >> 1), wrap_c);
1840             }
1841         }
1842     } else {
1843         op_pixels_func (*op_pix)[4];
1844         qpel_mc_func (*op_qpix)[16];
1845         uint8_t *dest_y, *dest_cb, *dest_cr;
1846
1847         dest_y  = s->dest[0];
1848         dest_cb = s->dest[1];
1849         dest_cr = s->dest[2];
1850
1851         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1852             op_pix  = s->dsp.put_pixels_tab;
1853             op_qpix = s->dsp.put_qpel_pixels_tab;
1854         } else {
1855             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1856             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1857         }
1858
1859         if (s->mv_dir & MV_DIR_FORWARD) {
1860             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1861                           s->last_picture.f.data,
1862                           op_pix, op_qpix);
1863             op_pix  = s->dsp.avg_pixels_tab;
1864             op_qpix = s->dsp.avg_qpel_pixels_tab;
1865         }
1866         if (s->mv_dir & MV_DIR_BACKWARD) {
1867             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1868                           s->next_picture.f.data,
1869                           op_pix, op_qpix);
1870         }
1871
1872         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1873             int progressive_score, interlaced_score;
1874
1875             s->interlaced_dct = 0;
1876             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1877                                                     ptr_y,              wrap_y,
1878                                                     8) +
1879                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1880                                                     ptr_y + wrap_y * 8, wrap_y,
1881                                                     8) - 400;
1882
1883             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1884                 progressive_score -= 400;
1885
1886             if (progressive_score > 0) {
1887                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1888                                                        ptr_y,
1889                                                        wrap_y * 2, 8) +
1890                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1891                                                        ptr_y + wrap_y,
1892                                                        wrap_y * 2, 8);
1893
1894                 if (progressive_score > interlaced_score) {
1895                     s->interlaced_dct = 1;
1896
1897                     dct_offset = wrap_y;
1898                     wrap_y <<= 1;
1899                     if (s->chroma_format == CHROMA_422)
1900                         wrap_c <<= 1;
1901                 }
1902             }
1903         }
1904
1905         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1906         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1907         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1908                            dest_y + dct_offset, wrap_y);
1909         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1910                            dest_y + dct_offset + 8, wrap_y);
1911
1912         if (s->flags & CODEC_FLAG_GRAY) {
1913             skip_dct[4] = 1;
1914             skip_dct[5] = 1;
1915         } else {
1916             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1917             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1918             if (!s->chroma_y_shift) { /* 422 */
1919                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1920                                    dest_cb + (dct_offset >> 1), wrap_c);
1921                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1922                                    dest_cr + (dct_offset >> 1), wrap_c);
1923             }
1924         }
1925         /* pre quantization */
1926         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1927                 2 * s->qscale * s->qscale) {
1928             // FIXME optimize
1929             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1930                               wrap_y, 8) < 20 * s->qscale)
1931                 skip_dct[0] = 1;
1932             if (s->dsp.sad[1](NULL, ptr_y + 8,
1933                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1934                 skip_dct[1] = 1;
1935             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1936                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1937                 skip_dct[2] = 1;
1938             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1939                               dest_y + dct_offset + 8,
1940                               wrap_y, 8) < 20 * s->qscale)
1941                 skip_dct[3] = 1;
1942             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1943                               wrap_c, 8) < 20 * s->qscale)
1944                 skip_dct[4] = 1;
1945             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1946                               wrap_c, 8) < 20 * s->qscale)
1947                 skip_dct[5] = 1;
1948             if (!s->chroma_y_shift) { /* 422 */
1949                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1950                                   dest_cb + (dct_offset >> 1),
1951                                   wrap_c, 8) < 20 * s->qscale)
1952                     skip_dct[6] = 1;
1953                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1954                                   dest_cr + (dct_offset >> 1),
1955                                   wrap_c, 8) < 20 * s->qscale)
1956                     skip_dct[7] = 1;
1957             }
1958         }
1959     }
1960
1961     if (s->quantizer_noise_shaping) {
1962         if (!skip_dct[0])
1963             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1964         if (!skip_dct[1])
1965             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1966         if (!skip_dct[2])
1967             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1968         if (!skip_dct[3])
1969             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1970         if (!skip_dct[4])
1971             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1972         if (!skip_dct[5])
1973             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1974         if (!s->chroma_y_shift) { /* 422 */
1975             if (!skip_dct[6])
1976                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1977                                   wrap_c);
1978             if (!skip_dct[7])
1979                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1980                                   wrap_c);
1981         }
1982         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
1983     }
1984
1985     /* DCT & quantize */
1986     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1987     {
1988         for (i = 0; i < mb_block_count; i++) {
1989             if (!skip_dct[i]) {
1990                 int overflow;
1991                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1992                 // FIXME we could decide to change to quantizer instead of
1993                 // clipping
1994                 // JS: I don't think that would be a good idea it could lower
1995                 //     quality instead of improve it. Just INTRADC clipping
1996                 //     deserves changes in quantizer
1997                 if (overflow)
1998                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
1999             } else
2000                 s->block_last_index[i] = -1;
2001         }
2002         if (s->quantizer_noise_shaping) {
2003             for (i = 0; i < mb_block_count; i++) {
2004                 if (!skip_dct[i]) {
2005                     s->block_last_index[i] =
2006                         dct_quantize_refine(s, s->block[i], weight[i],
2007                                             orig[i], i, s->qscale);
2008                 }
2009             }
2010         }
2011
2012         if (s->luma_elim_threshold && !s->mb_intra)
2013             for (i = 0; i < 4; i++)
2014                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2015         if (s->chroma_elim_threshold && !s->mb_intra)
2016             for (i = 4; i < mb_block_count; i++)
2017                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2018
2019         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2020             for (i = 0; i < mb_block_count; i++) {
2021                 if (s->block_last_index[i] == -1)
2022                     s->coded_score[i] = INT_MAX / 256;
2023             }
2024         }
2025     }
2026
2027     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2028         s->block_last_index[4] =
2029         s->block_last_index[5] = 0;
2030         s->block[4][0] =
2031         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2032     }
2033
2034     // non c quantize code returns incorrect block_last_index FIXME
2035     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2036         for (i = 0; i < mb_block_count; i++) {
2037             int j;
2038             if (s->block_last_index[i] > 0) {
2039                 for (j = 63; j > 0; j--) {
2040                     if (s->block[i][s->intra_scantable.permutated[j]])
2041                         break;
2042                 }
2043                 s->block_last_index[i] = j;
2044             }
2045         }
2046     }
2047
2048     /* huffman encode */
2049     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2050     case AV_CODEC_ID_MPEG1VIDEO:
2051     case AV_CODEC_ID_MPEG2VIDEO:
2052         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2053             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2054         break;
2055     case AV_CODEC_ID_MPEG4:
2056         if (CONFIG_MPEG4_ENCODER)
2057             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2058         break;
2059     case AV_CODEC_ID_MSMPEG4V2:
2060     case AV_CODEC_ID_MSMPEG4V3:
2061     case AV_CODEC_ID_WMV1:
2062         if (CONFIG_MSMPEG4_ENCODER)
2063             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2064         break;
2065     case AV_CODEC_ID_WMV2:
2066         if (CONFIG_WMV2_ENCODER)
2067             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2068         break;
2069     case AV_CODEC_ID_H261:
2070         if (CONFIG_H261_ENCODER)
2071             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2072         break;
2073     case AV_CODEC_ID_H263:
2074     case AV_CODEC_ID_H263P:
2075     case AV_CODEC_ID_FLV1:
2076     case AV_CODEC_ID_RV10:
2077     case AV_CODEC_ID_RV20:
2078         if (CONFIG_H263_ENCODER)
2079             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2080         break;
2081     case AV_CODEC_ID_MJPEG:
2082         if (CONFIG_MJPEG_ENCODER)
2083             ff_mjpeg_encode_mb(s, s->block);
2084         break;
2085     default:
2086         assert(0);
2087     }
2088 }
2089
2090 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2091 {
2092     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2093     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2094 }
2095
2096 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2097     int i;
2098
2099     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2100
2101     /* mpeg1 */
2102     d->mb_skip_run= s->mb_skip_run;
2103     for(i=0; i<3; i++)
2104         d->last_dc[i] = s->last_dc[i];
2105
2106     /* statistics */
2107     d->mv_bits= s->mv_bits;
2108     d->i_tex_bits= s->i_tex_bits;
2109     d->p_tex_bits= s->p_tex_bits;
2110     d->i_count= s->i_count;
2111     d->f_count= s->f_count;
2112     d->b_count= s->b_count;
2113     d->skip_count= s->skip_count;
2114     d->misc_bits= s->misc_bits;
2115     d->last_bits= 0;
2116
2117     d->mb_skipped= 0;
2118     d->qscale= s->qscale;
2119     d->dquant= s->dquant;
2120
2121     d->esc3_level_length= s->esc3_level_length;
2122 }
2123
2124 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2125     int i;
2126
2127     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2128     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2129
2130     /* mpeg1 */
2131     d->mb_skip_run= s->mb_skip_run;
2132     for(i=0; i<3; i++)
2133         d->last_dc[i] = s->last_dc[i];
2134
2135     /* statistics */
2136     d->mv_bits= s->mv_bits;
2137     d->i_tex_bits= s->i_tex_bits;
2138     d->p_tex_bits= s->p_tex_bits;
2139     d->i_count= s->i_count;
2140     d->f_count= s->f_count;
2141     d->b_count= s->b_count;
2142     d->skip_count= s->skip_count;
2143     d->misc_bits= s->misc_bits;
2144
2145     d->mb_intra= s->mb_intra;
2146     d->mb_skipped= s->mb_skipped;
2147     d->mv_type= s->mv_type;
2148     d->mv_dir= s->mv_dir;
2149     d->pb= s->pb;
2150     if(s->data_partitioning){
2151         d->pb2= s->pb2;
2152         d->tex_pb= s->tex_pb;
2153     }
2154     d->block= s->block;
2155     for(i=0; i<8; i++)
2156         d->block_last_index[i]= s->block_last_index[i];
2157     d->interlaced_dct= s->interlaced_dct;
2158     d->qscale= s->qscale;
2159
2160     d->esc3_level_length= s->esc3_level_length;
2161 }
2162
2163 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2164                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2165                            int *dmin, int *next_block, int motion_x, int motion_y)
2166 {
2167     int score;
2168     uint8_t *dest_backup[3];
2169
2170     copy_context_before_encode(s, backup, type);
2171
2172     s->block= s->blocks[*next_block];
2173     s->pb= pb[*next_block];
2174     if(s->data_partitioning){
2175         s->pb2   = pb2   [*next_block];
2176         s->tex_pb= tex_pb[*next_block];
2177     }
2178
2179     if(*next_block){
2180         memcpy(dest_backup, s->dest, sizeof(s->dest));
2181         s->dest[0] = s->rd_scratchpad;
2182         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2183         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2184         assert(s->linesize >= 32); //FIXME
2185     }
2186
2187     encode_mb(s, motion_x, motion_y);
2188
2189     score= put_bits_count(&s->pb);
2190     if(s->data_partitioning){
2191         score+= put_bits_count(&s->pb2);
2192         score+= put_bits_count(&s->tex_pb);
2193     }
2194
2195     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2196         ff_MPV_decode_mb(s, s->block);
2197
2198         score *= s->lambda2;
2199         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2200     }
2201
2202     if(*next_block){
2203         memcpy(s->dest, dest_backup, sizeof(s->dest));
2204     }
2205
2206     if(score<*dmin){
2207         *dmin= score;
2208         *next_block^=1;
2209
2210         copy_context_after_encode(best, s, type);
2211     }
2212 }
2213
2214 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2215     uint32_t *sq = ff_squareTbl + 256;
2216     int acc=0;
2217     int x,y;
2218
2219     if(w==16 && h==16)
2220         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2221     else if(w==8 && h==8)
2222         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2223
2224     for(y=0; y<h; y++){
2225         for(x=0; x<w; x++){
2226             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2227         }
2228     }
2229
2230     assert(acc>=0);
2231
2232     return acc;
2233 }
2234
2235 static int sse_mb(MpegEncContext *s){
2236     int w= 16;
2237     int h= 16;
2238
2239     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2240     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2241
2242     if(w==16 && h==16)
2243       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2244         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2245                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2246                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2247       }else{
2248         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2249                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2250                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2251       }
2252     else
2253         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2254                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2255                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2256 }
2257
2258 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2259     MpegEncContext *s= *(void**)arg;
2260
2261
2262     s->me.pre_pass=1;
2263     s->me.dia_size= s->avctx->pre_dia_size;
2264     s->first_slice_line=1;
2265     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2266         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2267             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2268         }
2269         s->first_slice_line=0;
2270     }
2271
2272     s->me.pre_pass=0;
2273
2274     return 0;
2275 }
2276
2277 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2278     MpegEncContext *s= *(void**)arg;
2279
2280     ff_check_alignment();
2281
2282     s->me.dia_size= s->avctx->dia_size;
2283     s->first_slice_line=1;
2284     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2285         s->mb_x=0; //for block init below
2286         ff_init_block_index(s);
2287         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2288             s->block_index[0]+=2;
2289             s->block_index[1]+=2;
2290             s->block_index[2]+=2;
2291             s->block_index[3]+=2;
2292
2293             /* compute motion vector & mb_type and store in context */
2294             if(s->pict_type==AV_PICTURE_TYPE_B)
2295                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2296             else
2297                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2298         }
2299         s->first_slice_line=0;
2300     }
2301     return 0;
2302 }
2303
2304 static int mb_var_thread(AVCodecContext *c, void *arg){
2305     MpegEncContext *s= *(void**)arg;
2306     int mb_x, mb_y;
2307
2308     ff_check_alignment();
2309
2310     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2311         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2312             int xx = mb_x * 16;
2313             int yy = mb_y * 16;
2314             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2315             int varc;
2316             int sum = s->dsp.pix_sum(pix, s->linesize);
2317
2318             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2319
2320             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2321             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2322             s->me.mb_var_sum_temp    += varc;
2323         }
2324     }
2325     return 0;
2326 }
2327
2328 static void write_slice_end(MpegEncContext *s){
2329     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2330         if(s->partitioned_frame){
2331             ff_mpeg4_merge_partitions(s);
2332         }
2333
2334         ff_mpeg4_stuffing(&s->pb);
2335     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2336         ff_mjpeg_encode_stuffing(&s->pb);
2337     }
2338
2339     avpriv_align_put_bits(&s->pb);
2340     flush_put_bits(&s->pb);
2341
2342     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2343         s->misc_bits+= get_bits_diff(s);
2344 }
2345
2346 static void write_mb_info(MpegEncContext *s)
2347 {
2348     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2349     int offset = put_bits_count(&s->pb);
2350     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2351     int gobn = s->mb_y / s->gob_index;
2352     int pred_x, pred_y;
2353     if (CONFIG_H263_ENCODER)
2354         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2355     bytestream_put_le32(&ptr, offset);
2356     bytestream_put_byte(&ptr, s->qscale);
2357     bytestream_put_byte(&ptr, gobn);
2358     bytestream_put_le16(&ptr, mba);
2359     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2360     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2361     /* 4MV not implemented */
2362     bytestream_put_byte(&ptr, 0); /* hmv2 */
2363     bytestream_put_byte(&ptr, 0); /* vmv2 */
2364 }
2365
2366 static void update_mb_info(MpegEncContext *s, int startcode)
2367 {
2368     if (!s->mb_info)
2369         return;
2370     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2371         s->mb_info_size += 12;
2372         s->prev_mb_info = s->last_mb_info;
2373     }
2374     if (startcode) {
2375         s->prev_mb_info = put_bits_count(&s->pb)/8;
2376         /* This might have incremented mb_info_size above, and we return without
2377          * actually writing any info into that slot yet. But in that case,
2378          * this will be called again at the start of the after writing the
2379          * start code, actually writing the mb info. */
2380         return;
2381     }
2382
2383     s->last_mb_info = put_bits_count(&s->pb)/8;
2384     if (!s->mb_info_size)
2385         s->mb_info_size += 12;
2386     write_mb_info(s);
2387 }
2388
2389 static int encode_thread(AVCodecContext *c, void *arg){
2390     MpegEncContext *s= *(void**)arg;
2391     int mb_x, mb_y, pdif = 0;
2392     int chr_h= 16>>s->chroma_y_shift;
2393     int i, j;
2394     MpegEncContext best_s, backup_s;
2395     uint8_t bit_buf[2][MAX_MB_BYTES];
2396     uint8_t bit_buf2[2][MAX_MB_BYTES];
2397     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2398     PutBitContext pb[2], pb2[2], tex_pb[2];
2399
2400     ff_check_alignment();
2401
2402     for(i=0; i<2; i++){
2403         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2404         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2405         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2406     }
2407
2408     s->last_bits= put_bits_count(&s->pb);
2409     s->mv_bits=0;
2410     s->misc_bits=0;
2411     s->i_tex_bits=0;
2412     s->p_tex_bits=0;
2413     s->i_count=0;
2414     s->f_count=0;
2415     s->b_count=0;
2416     s->skip_count=0;
2417
2418     for(i=0; i<3; i++){
2419         /* init last dc values */
2420         /* note: quant matrix value (8) is implied here */
2421         s->last_dc[i] = 128 << s->intra_dc_precision;
2422
2423         s->current_picture.f.error[i] = 0;
2424     }
2425     s->mb_skip_run = 0;
2426     memset(s->last_mv, 0, sizeof(s->last_mv));
2427
2428     s->last_mv_dir = 0;
2429
2430     switch(s->codec_id){
2431     case AV_CODEC_ID_H263:
2432     case AV_CODEC_ID_H263P:
2433     case AV_CODEC_ID_FLV1:
2434         if (CONFIG_H263_ENCODER)
2435             s->gob_index = ff_h263_get_gob_height(s);
2436         break;
2437     case AV_CODEC_ID_MPEG4:
2438         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2439             ff_mpeg4_init_partitions(s);
2440         break;
2441     }
2442
2443     s->resync_mb_x=0;
2444     s->resync_mb_y=0;
2445     s->first_slice_line = 1;
2446     s->ptr_lastgob = s->pb.buf;
2447     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2448         s->mb_x=0;
2449         s->mb_y= mb_y;
2450
2451         ff_set_qscale(s, s->qscale);
2452         ff_init_block_index(s);
2453
2454         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2455             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2456             int mb_type= s->mb_type[xy];
2457 //            int d;
2458             int dmin= INT_MAX;
2459             int dir;
2460
2461             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2462                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2463                 return -1;
2464             }
2465             if(s->data_partitioning){
2466                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2467                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2468                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2469                     return -1;
2470                 }
2471             }
2472
2473             s->mb_x = mb_x;
2474             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2475             ff_update_block_index(s);
2476
2477             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2478                 ff_h261_reorder_mb_index(s);
2479                 xy= s->mb_y*s->mb_stride + s->mb_x;
2480                 mb_type= s->mb_type[xy];
2481             }
2482
2483             /* write gob / video packet header  */
2484             if(s->rtp_mode){
2485                 int current_packet_size, is_gob_start;
2486
2487                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2488
2489                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2490
2491                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2492
2493                 switch(s->codec_id){
2494                 case AV_CODEC_ID_H263:
2495                 case AV_CODEC_ID_H263P:
2496                     if(!s->h263_slice_structured)
2497                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2498                     break;
2499                 case AV_CODEC_ID_MPEG2VIDEO:
2500                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2501                 case AV_CODEC_ID_MPEG1VIDEO:
2502                     if(s->mb_skip_run) is_gob_start=0;
2503                     break;
2504                 }
2505
2506                 if(is_gob_start){
2507                     if(s->start_mb_y != mb_y || mb_x!=0){
2508                         write_slice_end(s);
2509
2510                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2511                             ff_mpeg4_init_partitions(s);
2512                         }
2513                     }
2514
2515                     assert((put_bits_count(&s->pb)&7) == 0);
2516                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2517
2518                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2519                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2520                         int d= 100 / s->avctx->error_rate;
2521                         if(r % d == 0){
2522                             current_packet_size=0;
2523                             s->pb.buf_ptr= s->ptr_lastgob;
2524                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2525                         }
2526                     }
2527
2528                     if (s->avctx->rtp_callback){
2529                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2530                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2531                     }
2532                     update_mb_info(s, 1);
2533
2534                     switch(s->codec_id){
2535                     case AV_CODEC_ID_MPEG4:
2536                         if (CONFIG_MPEG4_ENCODER) {
2537                             ff_mpeg4_encode_video_packet_header(s);
2538                             ff_mpeg4_clean_buffers(s);
2539                         }
2540                     break;
2541                     case AV_CODEC_ID_MPEG1VIDEO:
2542                     case AV_CODEC_ID_MPEG2VIDEO:
2543                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2544                             ff_mpeg1_encode_slice_header(s);
2545                             ff_mpeg1_clean_buffers(s);
2546                         }
2547                     break;
2548                     case AV_CODEC_ID_H263:
2549                     case AV_CODEC_ID_H263P:
2550                         if (CONFIG_H263_ENCODER)
2551                             ff_h263_encode_gob_header(s, mb_y);
2552                     break;
2553                     }
2554
2555                     if(s->flags&CODEC_FLAG_PASS1){
2556                         int bits= put_bits_count(&s->pb);
2557                         s->misc_bits+= bits - s->last_bits;
2558                         s->last_bits= bits;
2559                     }
2560
2561                     s->ptr_lastgob += current_packet_size;
2562                     s->first_slice_line=1;
2563                     s->resync_mb_x=mb_x;
2564                     s->resync_mb_y=mb_y;
2565                 }
2566             }
2567
2568             if(  (s->resync_mb_x   == s->mb_x)
2569                && s->resync_mb_y+1 == s->mb_y){
2570                 s->first_slice_line=0;
2571             }
2572
2573             s->mb_skipped=0;
2574             s->dquant=0; //only for QP_RD
2575
2576             update_mb_info(s, 0);
2577
2578             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2579                 int next_block=0;
2580                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2581
2582                 copy_context_before_encode(&backup_s, s, -1);
2583                 backup_s.pb= s->pb;
2584                 best_s.data_partitioning= s->data_partitioning;
2585                 best_s.partitioned_frame= s->partitioned_frame;
2586                 if(s->data_partitioning){
2587                     backup_s.pb2= s->pb2;
2588                     backup_s.tex_pb= s->tex_pb;
2589                 }
2590
2591                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2592                     s->mv_dir = MV_DIR_FORWARD;
2593                     s->mv_type = MV_TYPE_16X16;
2594                     s->mb_intra= 0;
2595                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2596                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2597                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2598                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2599                 }
2600                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2601                     s->mv_dir = MV_DIR_FORWARD;
2602                     s->mv_type = MV_TYPE_FIELD;
2603                     s->mb_intra= 0;
2604                     for(i=0; i<2; i++){
2605                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2606                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2607                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2608                     }
2609                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2610                                  &dmin, &next_block, 0, 0);
2611                 }
2612                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2613                     s->mv_dir = MV_DIR_FORWARD;
2614                     s->mv_type = MV_TYPE_16X16;
2615                     s->mb_intra= 0;
2616                     s->mv[0][0][0] = 0;
2617                     s->mv[0][0][1] = 0;
2618                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2619                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2620                 }
2621                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2622                     s->mv_dir = MV_DIR_FORWARD;
2623                     s->mv_type = MV_TYPE_8X8;
2624                     s->mb_intra= 0;
2625                     for(i=0; i<4; i++){
2626                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2627                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2628                     }
2629                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2630                                  &dmin, &next_block, 0, 0);
2631                 }
2632                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2633                     s->mv_dir = MV_DIR_FORWARD;
2634                     s->mv_type = MV_TYPE_16X16;
2635                     s->mb_intra= 0;
2636                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2637                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2638                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2639                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2640                 }
2641                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2642                     s->mv_dir = MV_DIR_BACKWARD;
2643                     s->mv_type = MV_TYPE_16X16;
2644                     s->mb_intra= 0;
2645                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2646                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2647                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2648                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2649                 }
2650                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2651                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2652                     s->mv_type = MV_TYPE_16X16;
2653                     s->mb_intra= 0;
2654                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2655                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2656                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2657                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2658                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2659                                  &dmin, &next_block, 0, 0);
2660                 }
2661                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2662                     s->mv_dir = MV_DIR_FORWARD;
2663                     s->mv_type = MV_TYPE_FIELD;
2664                     s->mb_intra= 0;
2665                     for(i=0; i<2; i++){
2666                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2667                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2668                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2669                     }
2670                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2671                                  &dmin, &next_block, 0, 0);
2672                 }
2673                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2674                     s->mv_dir = MV_DIR_BACKWARD;
2675                     s->mv_type = MV_TYPE_FIELD;
2676                     s->mb_intra= 0;
2677                     for(i=0; i<2; i++){
2678                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2679                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2680                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2681                     }
2682                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2683                                  &dmin, &next_block, 0, 0);
2684                 }
2685                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2686                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2687                     s->mv_type = MV_TYPE_FIELD;
2688                     s->mb_intra= 0;
2689                     for(dir=0; dir<2; dir++){
2690                         for(i=0; i<2; i++){
2691                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2692                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2693                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2694                         }
2695                     }
2696                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2697                                  &dmin, &next_block, 0, 0);
2698                 }
2699                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2700                     s->mv_dir = 0;
2701                     s->mv_type = MV_TYPE_16X16;
2702                     s->mb_intra= 1;
2703                     s->mv[0][0][0] = 0;
2704                     s->mv[0][0][1] = 0;
2705                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2706                                  &dmin, &next_block, 0, 0);
2707                     if(s->h263_pred || s->h263_aic){
2708                         if(best_s.mb_intra)
2709                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2710                         else
2711                             ff_clean_intra_table_entries(s); //old mode?
2712                     }
2713                 }
2714
2715                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2716                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2717                         const int last_qp= backup_s.qscale;
2718                         int qpi, qp, dc[6];
2719                         DCTELEM ac[6][16];
2720                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2721                         static const int dquant_tab[4]={-1,1,-2,2};
2722
2723                         assert(backup_s.dquant == 0);
2724
2725                         //FIXME intra
2726                         s->mv_dir= best_s.mv_dir;
2727                         s->mv_type = MV_TYPE_16X16;
2728                         s->mb_intra= best_s.mb_intra;
2729                         s->mv[0][0][0] = best_s.mv[0][0][0];
2730                         s->mv[0][0][1] = best_s.mv[0][0][1];
2731                         s->mv[1][0][0] = best_s.mv[1][0][0];
2732                         s->mv[1][0][1] = best_s.mv[1][0][1];
2733
2734                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2735                         for(; qpi<4; qpi++){
2736                             int dquant= dquant_tab[qpi];
2737                             qp= last_qp + dquant;
2738                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2739                                 continue;
2740                             backup_s.dquant= dquant;
2741                             if(s->mb_intra && s->dc_val[0]){
2742                                 for(i=0; i<6; i++){
2743                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2744                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2745                                 }
2746                             }
2747
2748                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2749                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2750                             if(best_s.qscale != qp){
2751                                 if(s->mb_intra && s->dc_val[0]){
2752                                     for(i=0; i<6; i++){
2753                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2754                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2755                                     }
2756                                 }
2757                             }
2758                         }
2759                     }
2760                 }
2761                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2762                     int mx= s->b_direct_mv_table[xy][0];
2763                     int my= s->b_direct_mv_table[xy][1];
2764
2765                     backup_s.dquant = 0;
2766                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2767                     s->mb_intra= 0;
2768                     ff_mpeg4_set_direct_mv(s, mx, my);
2769                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2770                                  &dmin, &next_block, mx, my);
2771                 }
2772                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2773                     backup_s.dquant = 0;
2774                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2775                     s->mb_intra= 0;
2776                     ff_mpeg4_set_direct_mv(s, 0, 0);
2777                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2778                                  &dmin, &next_block, 0, 0);
2779                 }
2780                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2781                     int coded=0;
2782                     for(i=0; i<6; i++)
2783                         coded |= s->block_last_index[i];
2784                     if(coded){
2785                         int mx,my;
2786                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2787                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2788                             mx=my=0; //FIXME find the one we actually used
2789                             ff_mpeg4_set_direct_mv(s, mx, my);
2790                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2791                             mx= s->mv[1][0][0];
2792                             my= s->mv[1][0][1];
2793                         }else{
2794                             mx= s->mv[0][0][0];
2795                             my= s->mv[0][0][1];
2796                         }
2797
2798                         s->mv_dir= best_s.mv_dir;
2799                         s->mv_type = best_s.mv_type;
2800                         s->mb_intra= 0;
2801 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2802                         s->mv[0][0][1] = best_s.mv[0][0][1];
2803                         s->mv[1][0][0] = best_s.mv[1][0][0];
2804                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2805                         backup_s.dquant= 0;
2806                         s->skipdct=1;
2807                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2808                                         &dmin, &next_block, mx, my);
2809                         s->skipdct=0;
2810                     }
2811                 }
2812
2813                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2814
2815                 copy_context_after_encode(s, &best_s, -1);
2816
2817                 pb_bits_count= put_bits_count(&s->pb);
2818                 flush_put_bits(&s->pb);
2819                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2820                 s->pb= backup_s.pb;
2821
2822                 if(s->data_partitioning){
2823                     pb2_bits_count= put_bits_count(&s->pb2);
2824                     flush_put_bits(&s->pb2);
2825                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2826                     s->pb2= backup_s.pb2;
2827
2828                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2829                     flush_put_bits(&s->tex_pb);
2830                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2831                     s->tex_pb= backup_s.tex_pb;
2832                 }
2833                 s->last_bits= put_bits_count(&s->pb);
2834
2835                 if (CONFIG_H263_ENCODER &&
2836                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2837                     ff_h263_update_motion_val(s);
2838
2839                 if(next_block==0){ //FIXME 16 vs linesize16
2840                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2841                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2842                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2843                 }
2844
2845                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2846                     ff_MPV_decode_mb(s, s->block);
2847             } else {
2848                 int motion_x = 0, motion_y = 0;
2849                 s->mv_type=MV_TYPE_16X16;
2850                 // only one MB-Type possible
2851
2852                 switch(mb_type){
2853                 case CANDIDATE_MB_TYPE_INTRA:
2854                     s->mv_dir = 0;
2855                     s->mb_intra= 1;
2856                     motion_x= s->mv[0][0][0] = 0;
2857                     motion_y= s->mv[0][0][1] = 0;
2858                     break;
2859                 case CANDIDATE_MB_TYPE_INTER:
2860                     s->mv_dir = MV_DIR_FORWARD;
2861                     s->mb_intra= 0;
2862                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2863                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2864                     break;
2865                 case CANDIDATE_MB_TYPE_INTER_I:
2866                     s->mv_dir = MV_DIR_FORWARD;
2867                     s->mv_type = MV_TYPE_FIELD;
2868                     s->mb_intra= 0;
2869                     for(i=0; i<2; i++){
2870                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2871                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2872                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2873                     }
2874                     break;
2875                 case CANDIDATE_MB_TYPE_INTER4V:
2876                     s->mv_dir = MV_DIR_FORWARD;
2877                     s->mv_type = MV_TYPE_8X8;
2878                     s->mb_intra= 0;
2879                     for(i=0; i<4; i++){
2880                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2881                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2882                     }
2883                     break;
2884                 case CANDIDATE_MB_TYPE_DIRECT:
2885                     if (CONFIG_MPEG4_ENCODER) {
2886                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2887                         s->mb_intra= 0;
2888                         motion_x=s->b_direct_mv_table[xy][0];
2889                         motion_y=s->b_direct_mv_table[xy][1];
2890                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2891                     }
2892                     break;
2893                 case CANDIDATE_MB_TYPE_DIRECT0:
2894                     if (CONFIG_MPEG4_ENCODER) {
2895                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2896                         s->mb_intra= 0;
2897                         ff_mpeg4_set_direct_mv(s, 0, 0);
2898                     }
2899                     break;
2900                 case CANDIDATE_MB_TYPE_BIDIR:
2901                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2902                     s->mb_intra= 0;
2903                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2904                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2905                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2906                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2907                     break;
2908                 case CANDIDATE_MB_TYPE_BACKWARD:
2909                     s->mv_dir = MV_DIR_BACKWARD;
2910                     s->mb_intra= 0;
2911                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2912                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2913                     break;
2914                 case CANDIDATE_MB_TYPE_FORWARD:
2915                     s->mv_dir = MV_DIR_FORWARD;
2916                     s->mb_intra= 0;
2917                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2918                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2919                     break;
2920                 case CANDIDATE_MB_TYPE_FORWARD_I:
2921                     s->mv_dir = MV_DIR_FORWARD;
2922                     s->mv_type = MV_TYPE_FIELD;
2923                     s->mb_intra= 0;
2924                     for(i=0; i<2; i++){
2925                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2926                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2927                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2928                     }
2929                     break;
2930                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2931                     s->mv_dir = MV_DIR_BACKWARD;
2932                     s->mv_type = MV_TYPE_FIELD;
2933                     s->mb_intra= 0;
2934                     for(i=0; i<2; i++){
2935                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2936                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2937                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2938                     }
2939                     break;
2940                 case CANDIDATE_MB_TYPE_BIDIR_I:
2941                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2942                     s->mv_type = MV_TYPE_FIELD;
2943                     s->mb_intra= 0;
2944                     for(dir=0; dir<2; dir++){
2945                         for(i=0; i<2; i++){
2946                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2947                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2948                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2949                         }
2950                     }
2951                     break;
2952                 default:
2953                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2954                 }
2955
2956                 encode_mb(s, motion_x, motion_y);
2957
2958                 // RAL: Update last macroblock type
2959                 s->last_mv_dir = s->mv_dir;
2960
2961                 if (CONFIG_H263_ENCODER &&
2962                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2963                     ff_h263_update_motion_val(s);
2964
2965                 ff_MPV_decode_mb(s, s->block);
2966             }
2967
2968             /* clean the MV table in IPS frames for direct mode in B frames */
2969             if(s->mb_intra /* && I,P,S_TYPE */){
2970                 s->p_mv_table[xy][0]=0;
2971                 s->p_mv_table[xy][1]=0;
2972             }
2973
2974             if(s->flags&CODEC_FLAG_PSNR){
2975                 int w= 16;
2976                 int h= 16;
2977
2978                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2979                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2980
2981                 s->current_picture.f.error[0] += sse(
2982                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2983                     s->dest[0], w, h, s->linesize);
2984                 s->current_picture.f.error[1] += sse(
2985                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2986                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2987                 s->current_picture.f.error[2] += sse(
2988                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2989                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2990             }
2991             if(s->loop_filter){
2992                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2993                     ff_h263_loop_filter(s);
2994             }
2995             av_dlog(s->avctx, "MB %d %d bits\n",
2996                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
2997         }
2998     }
2999
3000     //not beautiful here but we must write it before flushing so it has to be here
3001     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3002         ff_msmpeg4_encode_ext_header(s);
3003
3004     write_slice_end(s);
3005
3006     /* Send the last GOB if RTP */
3007     if (s->avctx->rtp_callback) {
3008         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3009         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3010         /* Call the RTP callback to send the last GOB */
3011         emms_c();
3012         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3013     }
3014
3015     return 0;
3016 }
3017
3018 #define MERGE(field) dst->field += src->field; src->field=0
3019 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3020     MERGE(me.scene_change_score);
3021     MERGE(me.mc_mb_var_sum_temp);
3022     MERGE(me.mb_var_sum_temp);
3023 }
3024
3025 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3026     int i;
3027
3028     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3029     MERGE(dct_count[1]);
3030     MERGE(mv_bits);
3031     MERGE(i_tex_bits);
3032     MERGE(p_tex_bits);
3033     MERGE(i_count);
3034     MERGE(f_count);
3035     MERGE(b_count);
3036     MERGE(skip_count);
3037     MERGE(misc_bits);
3038     MERGE(error_count);
3039     MERGE(padding_bug_score);
3040     MERGE(current_picture.f.error[0]);
3041     MERGE(current_picture.f.error[1]);
3042     MERGE(current_picture.f.error[2]);
3043
3044     if(dst->avctx->noise_reduction){
3045         for(i=0; i<64; i++){
3046             MERGE(dct_error_sum[0][i]);
3047             MERGE(dct_error_sum[1][i]);
3048         }
3049     }
3050
3051     assert(put_bits_count(&src->pb) % 8 ==0);
3052     assert(put_bits_count(&dst->pb) % 8 ==0);
3053     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3054     flush_put_bits(&dst->pb);
3055 }
3056
3057 static int estimate_qp(MpegEncContext *s, int dry_run){
3058     if (s->next_lambda){
3059         s->current_picture_ptr->f.quality =
3060         s->current_picture.f.quality = s->next_lambda;
3061         if(!dry_run) s->next_lambda= 0;
3062     } else if (!s->fixed_qscale) {
3063         s->current_picture_ptr->f.quality =
3064         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3065         if (s->current_picture.f.quality < 0)
3066             return -1;
3067     }
3068
3069     if(s->adaptive_quant){
3070         switch(s->codec_id){
3071         case AV_CODEC_ID_MPEG4:
3072             if (CONFIG_MPEG4_ENCODER)
3073                 ff_clean_mpeg4_qscales(s);
3074             break;
3075         case AV_CODEC_ID_H263:
3076         case AV_CODEC_ID_H263P:
3077         case AV_CODEC_ID_FLV1:
3078             if (CONFIG_H263_ENCODER)
3079                 ff_clean_h263_qscales(s);
3080             break;
3081         default:
3082             ff_init_qscale_tab(s);
3083         }
3084
3085         s->lambda= s->lambda_table[0];
3086         //FIXME broken
3087     }else
3088         s->lambda = s->current_picture.f.quality;
3089     update_qscale(s);
3090     return 0;
3091 }
3092
3093 /* must be called before writing the header */
3094 static void set_frame_distances(MpegEncContext * s){
3095     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3096     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3097
3098     if(s->pict_type==AV_PICTURE_TYPE_B){
3099         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3100         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3101     }else{
3102         s->pp_time= s->time - s->last_non_b_time;
3103         s->last_non_b_time= s->time;
3104         assert(s->picture_number==0 || s->pp_time > 0);
3105     }
3106 }
3107
3108 static int encode_picture(MpegEncContext *s, int picture_number)
3109 {
3110     int i, ret;
3111     int bits;
3112     int context_count = s->slice_context_count;
3113
3114     s->picture_number = picture_number;
3115
3116     /* Reset the average MB variance */
3117     s->me.mb_var_sum_temp    =
3118     s->me.mc_mb_var_sum_temp = 0;
3119
3120     /* we need to initialize some time vars before we can encode b-frames */
3121     // RAL: Condition added for MPEG1VIDEO
3122     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3123         set_frame_distances(s);
3124     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3125         ff_set_mpeg4_time(s);
3126
3127     s->me.scene_change_score=0;
3128
3129 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3130
3131     if(s->pict_type==AV_PICTURE_TYPE_I){
3132         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3133         else                        s->no_rounding=0;
3134     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3135         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3136             s->no_rounding ^= 1;
3137     }
3138
3139     if(s->flags & CODEC_FLAG_PASS2){
3140         if (estimate_qp(s,1) < 0)
3141             return -1;
3142         ff_get_2pass_fcode(s);
3143     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3144         if(s->pict_type==AV_PICTURE_TYPE_B)
3145             s->lambda= s->last_lambda_for[s->pict_type];
3146         else
3147             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3148         update_qscale(s);
3149     }
3150
3151     s->mb_intra=0; //for the rate distortion & bit compare functions
3152     for(i=1; i<context_count; i++){
3153         ret = ff_update_duplicate_context(s->thread_context[i], s);
3154         if (ret < 0)
3155             return ret;
3156     }
3157
3158     if(ff_init_me(s)<0)
3159         return -1;
3160
3161     /* Estimate motion for every MB */
3162     if(s->pict_type != AV_PICTURE_TYPE_I){
3163         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3164         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3165         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3166             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3167                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3168             }
3169         }
3170
3171         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3172     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3173         /* I-Frame */
3174         for(i=0; i<s->mb_stride*s->mb_height; i++)
3175             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3176
3177         if(!s->fixed_qscale){
3178             /* finding spatial complexity for I-frame rate control */
3179             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3180         }
3181     }
3182     for(i=1; i<context_count; i++){
3183         merge_context_after_me(s, s->thread_context[i]);
3184     }
3185     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3186     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3187     emms_c();
3188
3189     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3190         s->pict_type= AV_PICTURE_TYPE_I;
3191         for(i=0; i<s->mb_stride*s->mb_height; i++)
3192             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3193         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3194                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3195     }
3196
3197     if(!s->umvplus){
3198         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3199             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3200
3201             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3202                 int a,b;
3203                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3204                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3205                 s->f_code= FFMAX3(s->f_code, a, b);
3206             }
3207
3208             ff_fix_long_p_mvs(s);
3209             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3210             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3211                 int j;
3212                 for(i=0; i<2; i++){
3213                     for(j=0; j<2; j++)
3214                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3215                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3216                 }
3217             }
3218         }
3219
3220         if(s->pict_type==AV_PICTURE_TYPE_B){
3221             int a, b;
3222
3223             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3224             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3225             s->f_code = FFMAX(a, b);
3226
3227             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3228             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3229             s->b_code = FFMAX(a, b);
3230
3231             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3232             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3233             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3234             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3235             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3236                 int dir, j;
3237                 for(dir=0; dir<2; dir++){
3238                     for(i=0; i<2; i++){
3239                         for(j=0; j<2; j++){
3240                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3241                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3242                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3243                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3244                         }
3245                     }
3246                 }
3247             }
3248         }
3249     }
3250
3251     if (estimate_qp(s, 0) < 0)
3252         return -1;
3253
3254     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3255         s->qscale= 3; //reduce clipping problems
3256
3257     if (s->out_format == FMT_MJPEG) {
3258         /* for mjpeg, we do include qscale in the matrix */
3259         for(i=1;i<64;i++){
3260             int j= s->dsp.idct_permutation[i];
3261
3262             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3263         }
3264         s->y_dc_scale_table=
3265         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3266         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3267         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3268                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3269         s->qscale= 8;
3270     }
3271
3272     //FIXME var duplication
3273     s->current_picture_ptr->f.key_frame =
3274     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3275     s->current_picture_ptr->f.pict_type =
3276     s->current_picture.f.pict_type = s->pict_type;
3277
3278     if (s->current_picture.f.key_frame)
3279         s->picture_in_gop_number=0;
3280
3281     s->last_bits= put_bits_count(&s->pb);
3282     switch(s->out_format) {
3283     case FMT_MJPEG:
3284         if (CONFIG_MJPEG_ENCODER)
3285             ff_mjpeg_encode_picture_header(s);
3286         break;
3287     case FMT_H261:
3288         if (CONFIG_H261_ENCODER)
3289             ff_h261_encode_picture_header(s, picture_number);
3290         break;
3291     case FMT_H263:
3292         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3293             ff_wmv2_encode_picture_header(s, picture_number);
3294         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3295             ff_msmpeg4_encode_picture_header(s, picture_number);
3296         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3297             ff_mpeg4_encode_picture_header(s, picture_number);
3298         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3299             ff_rv10_encode_picture_header(s, picture_number);
3300         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3301             ff_rv20_encode_picture_header(s, picture_number);
3302         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3303             ff_flv_encode_picture_header(s, picture_number);
3304         else if (CONFIG_H263_ENCODER)
3305             ff_h263_encode_picture_header(s, picture_number);
3306         break;
3307     case FMT_MPEG1:
3308         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3309             ff_mpeg1_encode_picture_header(s, picture_number);
3310         break;
3311     case FMT_H264:
3312         break;
3313     default:
3314         assert(0);
3315     }
3316     bits= put_bits_count(&s->pb);
3317     s->header_bits= bits - s->last_bits;
3318
3319     for(i=1; i<context_count; i++){
3320         update_duplicate_context_after_me(s->thread_context[i], s);
3321     }
3322     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3323     for(i=1; i<context_count; i++){
3324         merge_context_after_encode(s, s->thread_context[i]);
3325     }
3326     emms_c();
3327     return 0;
3328 }
3329
3330 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3331     const int intra= s->mb_intra;
3332     int i;
3333
3334     s->dct_count[intra]++;
3335
3336     for(i=0; i<64; i++){
3337         int level= block[i];
3338
3339         if(level){
3340             if(level>0){
3341                 s->dct_error_sum[intra][i] += level;
3342                 level -= s->dct_offset[intra][i];
3343                 if(level<0) level=0;
3344             }else{
3345                 s->dct_error_sum[intra][i] -= level;
3346                 level += s->dct_offset[intra][i];
3347                 if(level>0) level=0;
3348             }
3349             block[i]= level;
3350         }
3351     }
3352 }
3353
3354 static int dct_quantize_trellis_c(MpegEncContext *s,
3355                                   DCTELEM *block, int n,
3356                                   int qscale, int *overflow){
3357     const int *qmat;
3358     const uint8_t *scantable= s->intra_scantable.scantable;
3359     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3360     int max=0;
3361     unsigned int threshold1, threshold2;
3362     int bias=0;
3363     int run_tab[65];
3364     int level_tab[65];
3365     int score_tab[65];
3366     int survivor[65];
3367     int survivor_count;
3368     int last_run=0;
3369     int last_level=0;
3370     int last_score= 0;
3371     int last_i;
3372     int coeff[2][64];
3373     int coeff_count[64];
3374     int qmul, qadd, start_i, last_non_zero, i, dc;
3375     const int esc_length= s->ac_esc_length;
3376     uint8_t * length;
3377     uint8_t * last_length;
3378     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3379
3380     s->dsp.fdct (block);
3381
3382     if(s->dct_error_sum)
3383         s->denoise_dct(s, block);
3384     qmul= qscale*16;
3385     qadd= ((qscale-1)|1)*8;
3386
3387     if (s->mb_intra) {
3388         int q;
3389         if (!s->h263_aic) {
3390             if (n < 4)
3391                 q = s->y_dc_scale;
3392             else
3393                 q = s->c_dc_scale;
3394             q = q << 3;
3395         } else{
3396             /* For AIC we skip quant/dequant of INTRADC */
3397             q = 1 << 3;
3398             qadd=0;
3399         }
3400
3401         /* note: block[0] is assumed to be positive */
3402         block[0] = (block[0] + (q >> 1)) / q;
3403         start_i = 1;
3404         last_non_zero = 0;
3405         qmat = s->q_intra_matrix[qscale];
3406         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3407             bias= 1<<(QMAT_SHIFT-1);
3408         length     = s->intra_ac_vlc_length;
3409         last_length= s->intra_ac_vlc_last_length;
3410     } else {
3411         start_i = 0;
3412         last_non_zero = -1;
3413         qmat = s->q_inter_matrix[qscale];
3414         length     = s->inter_ac_vlc_length;
3415         last_length= s->inter_ac_vlc_last_length;
3416     }
3417     last_i= start_i;
3418
3419     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3420     threshold2= (threshold1<<1);
3421
3422     for(i=63; i>=start_i; i--) {
3423         const int j = scantable[i];
3424         int level = block[j] * qmat[j];
3425
3426         if(((unsigned)(level+threshold1))>threshold2){
3427             last_non_zero = i;
3428             break;
3429         }
3430     }
3431
3432     for(i=start_i; i<=last_non_zero; i++) {
3433         const int j = scantable[i];
3434         int level = block[j] * qmat[j];
3435
3436 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3437 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3438         if(((unsigned)(level+threshold1))>threshold2){
3439             if(level>0){
3440                 level= (bias + level)>>QMAT_SHIFT;
3441                 coeff[0][i]= level;
3442                 coeff[1][i]= level-1;
3443 //                coeff[2][k]= level-2;
3444             }else{
3445                 level= (bias - level)>>QMAT_SHIFT;
3446                 coeff[0][i]= -level;
3447                 coeff[1][i]= -level+1;
3448 //                coeff[2][k]= -level+2;
3449             }
3450             coeff_count[i]= FFMIN(level, 2);
3451             assert(coeff_count[i]);
3452             max |=level;
3453         }else{
3454             coeff[0][i]= (level>>31)|1;
3455             coeff_count[i]= 1;
3456         }
3457     }
3458
3459     *overflow= s->max_qcoeff < max; //overflow might have happened
3460
3461     if(last_non_zero < start_i){
3462         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3463         return last_non_zero;
3464     }
3465
3466     score_tab[start_i]= 0;
3467     survivor[0]= start_i;
3468     survivor_count= 1;
3469
3470     for(i=start_i; i<=last_non_zero; i++){
3471         int level_index, j, zero_distortion;
3472         int dct_coeff= FFABS(block[ scantable[i] ]);
3473         int best_score=256*256*256*120;
3474
3475         if (s->dsp.fdct == ff_fdct_ifast)
3476             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3477         zero_distortion= dct_coeff*dct_coeff;
3478
3479         for(level_index=0; level_index < coeff_count[i]; level_index++){
3480             int distortion;
3481             int level= coeff[level_index][i];
3482             const int alevel= FFABS(level);
3483             int unquant_coeff;
3484
3485             assert(level);
3486
3487             if(s->out_format == FMT_H263){
3488                 unquant_coeff= alevel*qmul + qadd;
3489             }else{ //MPEG1
3490                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3491                 if(s->mb_intra){
3492                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3493                         unquant_coeff =   (unquant_coeff - 1) | 1;
3494                 }else{
3495                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3496                         unquant_coeff =   (unquant_coeff - 1) | 1;
3497                 }
3498                 unquant_coeff<<= 3;
3499             }
3500
3501             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3502             level+=64;
3503             if((level&(~127)) == 0){
3504                 for(j=survivor_count-1; j>=0; j--){
3505                     int run= i - survivor[j];
3506                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3507                     score += score_tab[i-run];
3508
3509                     if(score < best_score){
3510                         best_score= score;
3511                         run_tab[i+1]= run;
3512                         level_tab[i+1]= level-64;
3513                     }
3514                 }
3515
3516                 if(s->out_format == FMT_H263){
3517                     for(j=survivor_count-1; j>=0; j--){
3518                         int run= i - survivor[j];
3519                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3520                         score += score_tab[i-run];
3521                         if(score < last_score){
3522                             last_score= score;
3523                             last_run= run;
3524                             last_level= level-64;
3525                             last_i= i+1;
3526                         }
3527                     }
3528                 }
3529             }else{
3530                 distortion += esc_length*lambda;
3531                 for(j=survivor_count-1; j>=0; j--){
3532                     int run= i - survivor[j];
3533                     int score= distortion + score_tab[i-run];
3534
3535                     if(score < best_score){
3536                         best_score= score;
3537                         run_tab[i+1]= run;
3538                         level_tab[i+1]= level-64;
3539                     }
3540                 }
3541
3542                 if(s->out_format == FMT_H263){
3543                   for(j=survivor_count-1; j>=0; j--){
3544                         int run= i - survivor[j];
3545                         int score= distortion + score_tab[i-run];
3546                         if(score < last_score){
3547                             last_score= score;
3548                             last_run= run;
3549                             last_level= level-64;
3550                             last_i= i+1;
3551                         }
3552                     }
3553                 }
3554             }
3555         }
3556
3557         score_tab[i+1]= best_score;
3558
3559         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3560         if(last_non_zero <= 27){
3561             for(; survivor_count; survivor_count--){
3562                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3563                     break;
3564             }
3565         }else{
3566             for(; survivor_count; survivor_count--){
3567                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3568                     break;
3569             }
3570         }
3571
3572         survivor[ survivor_count++ ]= i+1;
3573     }
3574
3575     if(s->out_format != FMT_H263){
3576         last_score= 256*256*256*120;
3577         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3578             int score= score_tab[i];
3579             if(i) score += lambda*2; //FIXME exacter?
3580
3581             if(score < last_score){
3582                 last_score= score;
3583                 last_i= i;
3584                 last_level= level_tab[i];
3585                 last_run= run_tab[i];
3586             }
3587         }
3588     }
3589
3590     s->coded_score[n] = last_score;
3591
3592     dc= FFABS(block[0]);
3593     last_non_zero= last_i - 1;
3594     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3595
3596     if(last_non_zero < start_i)
3597         return last_non_zero;
3598
3599     if(last_non_zero == 0 && start_i == 0){
3600         int best_level= 0;
3601         int best_score= dc * dc;
3602
3603         for(i=0; i<coeff_count[0]; i++){
3604             int level= coeff[i][0];
3605             int alevel= FFABS(level);
3606             int unquant_coeff, score, distortion;
3607
3608             if(s->out_format == FMT_H263){
3609                     unquant_coeff= (alevel*qmul + qadd)>>3;
3610             }else{ //MPEG1
3611                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3612                     unquant_coeff =   (unquant_coeff - 1) | 1;
3613             }
3614             unquant_coeff = (unquant_coeff + 4) >> 3;
3615             unquant_coeff<<= 3 + 3;
3616
3617             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3618             level+=64;
3619             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3620             else                    score= distortion + esc_length*lambda;
3621
3622             if(score < best_score){
3623                 best_score= score;
3624                 best_level= level - 64;
3625             }
3626         }
3627         block[0]= best_level;
3628         s->coded_score[n] = best_score - dc*dc;
3629         if(best_level == 0) return -1;
3630         else                return last_non_zero;
3631     }
3632
3633     i= last_i;
3634     assert(last_level);
3635
3636     block[ perm_scantable[last_non_zero] ]= last_level;
3637     i -= last_run + 1;
3638
3639     for(; i>start_i; i -= run_tab[i] + 1){
3640         block[ perm_scantable[i-1] ]= level_tab[i];
3641     }
3642
3643     return last_non_zero;
3644 }
3645
3646 //#define REFINE_STATS 1
3647 static int16_t basis[64][64];
3648
3649 static void build_basis(uint8_t *perm){
3650     int i, j, x, y;
3651     emms_c();
3652     for(i=0; i<8; i++){
3653         for(j=0; j<8; j++){
3654             for(y=0; y<8; y++){
3655                 for(x=0; x<8; x++){
3656                     double s= 0.25*(1<<BASIS_SHIFT);
3657                     int index= 8*i + j;
3658                     int perm_index= perm[index];
3659                     if(i==0) s*= sqrt(0.5);
3660                     if(j==0) s*= sqrt(0.5);
3661                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3662                 }
3663             }
3664         }
3665     }
3666 }
3667
3668 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3669                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3670                         int n, int qscale){
3671     int16_t rem[64];
3672     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3673     const uint8_t *scantable= s->intra_scantable.scantable;
3674     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3675 //    unsigned int threshold1, threshold2;
3676 //    int bias=0;
3677     int run_tab[65];
3678     int prev_run=0;
3679     int prev_level=0;
3680     int qmul, qadd, start_i, last_non_zero, i, dc;
3681     uint8_t * length;
3682     uint8_t * last_length;
3683     int lambda;
3684     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3685 #ifdef REFINE_STATS
3686 static int count=0;
3687 static int after_last=0;
3688 static int to_zero=0;
3689 static int from_zero=0;
3690 static int raise=0;
3691 static int lower=0;
3692 static int messed_sign=0;
3693 #endif
3694
3695     if(basis[0][0] == 0)
3696         build_basis(s->dsp.idct_permutation);
3697
3698     qmul= qscale*2;
3699     qadd= (qscale-1)|1;
3700     if (s->mb_intra) {
3701         if (!s->h263_aic) {
3702             if (n < 4)
3703                 q = s->y_dc_scale;
3704             else
3705                 q = s->c_dc_scale;
3706         } else{
3707             /* For AIC we skip quant/dequant of INTRADC */
3708             q = 1;
3709             qadd=0;
3710         }
3711         q <<= RECON_SHIFT-3;
3712         /* note: block[0] is assumed to be positive */
3713         dc= block[0]*q;
3714 //        block[0] = (block[0] + (q >> 1)) / q;
3715         start_i = 1;
3716 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3717 //            bias= 1<<(QMAT_SHIFT-1);
3718         length     = s->intra_ac_vlc_length;
3719         last_length= s->intra_ac_vlc_last_length;
3720     } else {
3721         dc= 0;
3722         start_i = 0;
3723         length     = s->inter_ac_vlc_length;
3724         last_length= s->inter_ac_vlc_last_length;
3725     }
3726     last_non_zero = s->block_last_index[n];
3727
3728 #ifdef REFINE_STATS
3729 {START_TIMER
3730 #endif
3731     dc += (1<<(RECON_SHIFT-1));
3732     for(i=0; i<64; i++){
3733         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3734     }
3735 #ifdef REFINE_STATS
3736 STOP_TIMER("memset rem[]")}
3737 #endif
3738     sum=0;
3739     for(i=0; i<64; i++){
3740         int one= 36;
3741         int qns=4;
3742         int w;
3743
3744         w= FFABS(weight[i]) + qns*one;
3745         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3746
3747         weight[i] = w;
3748 //        w=weight[i] = (63*qns + (w/2)) / w;
3749
3750         assert(w>0);
3751         assert(w<(1<<6));
3752         sum += w*w;
3753     }
3754     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3755 #ifdef REFINE_STATS
3756 {START_TIMER
3757 #endif
3758     run=0;
3759     rle_index=0;
3760     for(i=start_i; i<=last_non_zero; i++){
3761         int j= perm_scantable[i];
3762         const int level= block[j];
3763         int coeff;
3764
3765         if(level){
3766             if(level<0) coeff= qmul*level - qadd;
3767             else        coeff= qmul*level + qadd;
3768             run_tab[rle_index++]=run;
3769             run=0;
3770
3771             s->dsp.add_8x8basis(rem, basis[j], coeff);
3772         }else{
3773             run++;
3774         }
3775     }
3776 #ifdef REFINE_STATS
3777 if(last_non_zero>0){
3778 STOP_TIMER("init rem[]")
3779 }
3780 }
3781
3782 {START_TIMER
3783 #endif
3784     for(;;){
3785         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3786         int best_coeff=0;
3787         int best_change=0;
3788         int run2, best_unquant_change=0, analyze_gradient;
3789 #ifdef REFINE_STATS
3790 {START_TIMER
3791 #endif
3792         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3793
3794         if(analyze_gradient){
3795 #ifdef REFINE_STATS
3796 {START_TIMER
3797 #endif
3798             for(i=0; i<64; i++){
3799                 int w= weight[i];
3800
3801                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3802             }
3803 #ifdef REFINE_STATS
3804 STOP_TIMER("rem*w*w")}
3805 {START_TIMER
3806 #endif
3807             s->dsp.fdct(d1);
3808 #ifdef REFINE_STATS
3809 STOP_TIMER("dct")}
3810 #endif
3811         }
3812
3813         if(start_i){
3814             const int level= block[0];
3815             int change, old_coeff;
3816
3817             assert(s->mb_intra);
3818
3819             old_coeff= q*level;
3820
3821             for(change=-1; change<=1; change+=2){
3822                 int new_level= level + change;
3823                 int score, new_coeff;
3824
3825                 new_coeff= q*new_level;
3826                 if(new_coeff >= 2048 || new_coeff < 0)
3827                     continue;
3828
3829                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3830                 if(score<best_score){
3831                     best_score= score;
3832                     best_coeff= 0;
3833                     best_change= change;
3834                     best_unquant_change= new_coeff - old_coeff;
3835                 }
3836             }
3837         }
3838
3839         run=0;
3840         rle_index=0;
3841         run2= run_tab[rle_index++];
3842         prev_level=0;
3843         prev_run=0;
3844
3845         for(i=start_i; i<64; i++){
3846             int j= perm_scantable[i];
3847             const int level= block[j];
3848             int change, old_coeff;
3849
3850             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3851                 break;
3852
3853             if(level){
3854                 if(level<0) old_coeff= qmul*level - qadd;
3855                 else        old_coeff= qmul*level + qadd;
3856                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3857             }else{
3858                 old_coeff=0;
3859                 run2--;
3860                 assert(run2>=0 || i >= last_non_zero );
3861             }
3862
3863             for(change=-1; change<=1; change+=2){
3864                 int new_level= level + change;
3865                 int score, new_coeff, unquant_change;
3866
3867                 score=0;
3868                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3869                    continue;
3870
3871                 if(new_level){
3872                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3873                     else            new_coeff= qmul*new_level + qadd;
3874                     if(new_coeff >= 2048 || new_coeff <= -2048)
3875                         continue;
3876                     //FIXME check for overflow
3877
3878                     if(level){
3879                         if(level < 63 && level > -63){
3880                             if(i < last_non_zero)
3881                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3882                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3883                             else
3884                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3885                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3886                         }
3887                     }else{
3888                         assert(FFABS(new_level)==1);
3889
3890                         if(analyze_gradient){
3891                             int g= d1[ scantable[i] ];
3892                             if(g && (g^new_level) >= 0)
3893                                 continue;
3894                         }
3895
3896                         if(i < last_non_zero){
3897                             int next_i= i + run2 + 1;
3898                             int next_level= block[ perm_scantable[next_i] ] + 64;
3899
3900                             if(next_level&(~127))
3901                                 next_level= 0;
3902
3903                             if(next_i < last_non_zero)
3904                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3905                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3906                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3907                             else
3908                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3909                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3910                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3911                         }else{
3912                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3913                             if(prev_level){
3914                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3915                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3916                             }
3917                         }
3918                     }
3919                 }else{
3920                     new_coeff=0;
3921                     assert(FFABS(level)==1);
3922
3923                     if(i < last_non_zero){
3924                         int next_i= i + run2 + 1;
3925                         int next_level= block[ perm_scantable[next_i] ] + 64;
3926
3927                         if(next_level&(~127))
3928                             next_level= 0;
3929
3930                         if(next_i < last_non_zero)
3931                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3932                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3933                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3934                         else
3935                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3936                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3937                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3938                     }else{
3939                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3940                         if(prev_level){
3941                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3942                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3943                         }
3944                     }
3945                 }
3946
3947                 score *= lambda;
3948
3949                 unquant_change= new_coeff - old_coeff;
3950                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3951
3952                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3953                 if(score<best_score){
3954                     best_score= score;
3955                     best_coeff= i;
3956                     best_change= change;
3957                     best_unquant_change= unquant_change;
3958                 }
3959             }
3960             if(level){
3961                 prev_level= level + 64;
3962                 if(prev_level&(~127))
3963                     prev_level= 0;
3964                 prev_run= run;
3965                 run=0;
3966             }else{
3967                 run++;
3968             }
3969         }
3970 #ifdef REFINE_STATS
3971 STOP_TIMER("iterative step")}
3972 #endif
3973
3974         if(best_change){
3975             int j= perm_scantable[ best_coeff ];
3976
3977             block[j] += best_change;
3978
3979             if(best_coeff > last_non_zero){
3980                 last_non_zero= best_coeff;
3981                 assert(block[j]);
3982 #ifdef REFINE_STATS
3983 after_last++;
3984 #endif
3985             }else{
3986 #ifdef REFINE_STATS
3987 if(block[j]){
3988     if(block[j] - best_change){
3989         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3990             raise++;
3991         }else{
3992             lower++;
3993         }
3994     }else{
3995         from_zero++;
3996     }
3997 }else{
3998     to_zero++;
3999 }
4000 #endif
4001                 for(; last_non_zero>=start_i; last_non_zero--){
4002                     if(block[perm_scantable[last_non_zero]])
4003                         break;
4004                 }
4005             }
4006 #ifdef REFINE_STATS
4007 count++;
4008 if(256*256*256*64 % count == 0){
4009     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4010 }
4011 #endif
4012             run=0;
4013             rle_index=0;
4014             for(i=start_i; i<=last_non_zero; i++){
4015                 int j= perm_scantable[i];
4016                 const int level= block[j];
4017
4018                  if(level){
4019                      run_tab[rle_index++]=run;
4020                      run=0;
4021                  }else{
4022                      run++;
4023                  }
4024             }
4025
4026             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4027         }else{
4028             break;
4029         }
4030     }
4031 #ifdef REFINE_STATS
4032 if(last_non_zero>0){
4033 STOP_TIMER("iterative search")
4034 }
4035 }
4036 #endif
4037
4038     return last_non_zero;
4039 }
4040
4041 int ff_dct_quantize_c(MpegEncContext *s,
4042                         DCTELEM *block, int n,
4043                         int qscale, int *overflow)
4044 {
4045     int i, j, level, last_non_zero, q, start_i;
4046     const int *qmat;
4047     const uint8_t *scantable= s->intra_scantable.scantable;
4048     int bias;
4049     int max=0;
4050     unsigned int threshold1, threshold2;
4051
4052     s->dsp.fdct (block);
4053
4054     if(s->dct_error_sum)
4055         s->denoise_dct(s, block);
4056
4057     if (s->mb_intra) {
4058         if (!s->h263_aic) {
4059             if (n < 4)
4060                 q = s->y_dc_scale;
4061             else
4062                 q = s->c_dc_scale;
4063             q = q << 3;
4064         } else
4065             /* For AIC we skip quant/dequant of INTRADC */
4066             q = 1 << 3;
4067
4068         /* note: block[0] is assumed to be positive */
4069         block[0] = (block[0] + (q >> 1)) / q;
4070         start_i = 1;
4071         last_non_zero = 0;
4072         qmat = s->q_intra_matrix[qscale];
4073         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4074     } else {
4075         start_i = 0;
4076         last_non_zero = -1;
4077         qmat = s->q_inter_matrix[qscale];
4078         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4079     }
4080     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4081     threshold2= (threshold1<<1);
4082     for(i=63;i>=start_i;i--) {
4083         j = scantable[i];
4084         level = block[j] * qmat[j];
4085
4086         if(((unsigned)(level+threshold1))>threshold2){
4087             last_non_zero = i;
4088             break;
4089         }else{
4090             block[j]=0;
4091         }
4092     }
4093     for(i=start_i; i<=last_non_zero; i++) {
4094         j = scantable[i];
4095         level = block[j] * qmat[j];
4096
4097 //        if(   bias+level >= (1<<QMAT_SHIFT)
4098 //           || bias-level >= (1<<QMAT_SHIFT)){
4099         if(((unsigned)(level+threshold1))>threshold2){
4100             if(level>0){
4101                 level= (bias + level)>>QMAT_SHIFT;
4102                 block[j]= level;
4103             }else{
4104                 level= (bias - level)>>QMAT_SHIFT;
4105                 block[j]= -level;
4106             }
4107             max |=level;
4108         }else{
4109             block[j]=0;
4110         }
4111     }
4112     *overflow= s->max_qcoeff < max; //overflow might have happened
4113
4114     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4115     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4116         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4117
4118     return last_non_zero;
4119 }
4120
4121 #define OFFSET(x) offsetof(MpegEncContext, x)
4122 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4123 static const AVOption h263_options[] = {
4124     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4125     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4126     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4127     FF_MPV_COMMON_OPTS
4128     { NULL },
4129 };
4130
4131 static const AVClass h263_class = {
4132     .class_name = "H.263 encoder",
4133     .item_name  = av_default_item_name,
4134     .option     = h263_options,
4135     .version    = LIBAVUTIL_VERSION_INT,
4136 };
4137
4138 AVCodec ff_h263_encoder = {
4139     .name           = "h263",
4140     .type           = AVMEDIA_TYPE_VIDEO,
4141     .id             = AV_CODEC_ID_H263,
4142     .priv_data_size = sizeof(MpegEncContext),
4143     .init           = ff_MPV_encode_init,
4144     .encode2        = ff_MPV_encode_picture,
4145     .close          = ff_MPV_encode_end,
4146     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4147     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4148     .priv_class     = &h263_class,
4149 };
4150
4151 static const AVOption h263p_options[] = {
4152     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4153     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4154     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4155     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4156     FF_MPV_COMMON_OPTS
4157     { NULL },
4158 };
4159 static const AVClass h263p_class = {
4160     .class_name = "H.263p encoder",
4161     .item_name  = av_default_item_name,
4162     .option     = h263p_options,
4163     .version    = LIBAVUTIL_VERSION_INT,
4164 };
4165
4166 AVCodec ff_h263p_encoder = {
4167     .name           = "h263p",
4168     .type           = AVMEDIA_TYPE_VIDEO,
4169     .id             = AV_CODEC_ID_H263P,
4170     .priv_data_size = sizeof(MpegEncContext),
4171     .init           = ff_MPV_encode_init,
4172     .encode2        = ff_MPV_encode_picture,
4173     .close          = ff_MPV_encode_end,
4174     .capabilities   = CODEC_CAP_SLICE_THREADS,
4175     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4176     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4177     .priv_class     = &h263p_class,
4178 };
4179
4180 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4181
4182 AVCodec ff_msmpeg4v2_encoder = {
4183     .name           = "msmpeg4v2",
4184     .type           = AVMEDIA_TYPE_VIDEO,
4185     .id             = AV_CODEC_ID_MSMPEG4V2,
4186     .priv_data_size = sizeof(MpegEncContext),
4187     .init           = ff_MPV_encode_init,
4188     .encode2        = ff_MPV_encode_picture,
4189     .close          = ff_MPV_encode_end,
4190     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4191     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4192     .priv_class     = &msmpeg4v2_class,
4193 };
4194
4195 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4196
4197 AVCodec ff_msmpeg4v3_encoder = {
4198     .name           = "msmpeg4",
4199     .type           = AVMEDIA_TYPE_VIDEO,
4200     .id             = AV_CODEC_ID_MSMPEG4V3,
4201     .priv_data_size = sizeof(MpegEncContext),
4202     .init           = ff_MPV_encode_init,
4203     .encode2        = ff_MPV_encode_picture,
4204     .close          = ff_MPV_encode_end,
4205     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4206     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4207     .priv_class     = &msmpeg4v3_class,
4208 };
4209
4210 FF_MPV_GENERIC_CLASS(wmv1)
4211
4212 AVCodec ff_wmv1_encoder = {
4213     .name           = "wmv1",
4214     .type           = AVMEDIA_TYPE_VIDEO,
4215     .id             = AV_CODEC_ID_WMV1,
4216     .priv_data_size = sizeof(MpegEncContext),
4217     .init           = ff_MPV_encode_init,
4218     .encode2        = ff_MPV_encode_picture,
4219     .close          = ff_MPV_encode_end,
4220     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4221     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4222     .priv_class     = &wmv1_class,
4223 };