]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
WMAL: Shift output samples by the specified number of padding zeroes.
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "mpegvideo_common.h"
37 #include "h263.h"
38 #include "mjpegenc.h"
39 #include "msmpeg4.h"
40 #include "faandct.h"
41 #include "thread.h"
42 #include "aandcttab.h"
43 #include "flv.h"
44 #include "mpeg4video.h"
45 #include "internal.h"
46 #include "bytestream.h"
47 #include <limits.h>
48
49 //#undef NDEBUG
50 //#include <assert.h>
51
52 static int encode_picture(MpegEncContext *s, int picture_number);
53 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
54 static int sse_mb(MpegEncContext *s);
55 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
56 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
57
58 /* enable all paranoid tests for rounding, overflows, etc... */
59 //#define PARANOID
60
61 //#define DEBUG
62
63 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
64 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
65
66 const AVOption ff_mpv_generic_options[] = {
67     FF_MPV_COMMON_OPTS
68     { NULL },
69 };
70
71 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
72                        uint16_t (*qmat16)[2][64],
73                        const uint16_t *quant_matrix,
74                        int bias, int qmin, int qmax, int intra)
75 {
76     int qscale;
77     int shift = 0;
78
79     for (qscale = qmin; qscale <= qmax; qscale++) {
80         int i;
81         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
82             dsp->fdct == ff_jpeg_fdct_islow_10 ||
83             dsp->fdct == ff_faandct) {
84             for (i = 0; i < 64; i++) {
85                 const int j = dsp->idct_permutation[i];
86                 /* 16 <= qscale * quant_matrix[i] <= 7905
87                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
88                  *             19952 <=              x  <= 249205026
89                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
90                  *           3444240 >= (1 << 36) / (x) >= 275 */
91
92                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
93                                         (qscale * quant_matrix[j]));
94             }
95         } else if (dsp->fdct == ff_fdct_ifast) {
96             for (i = 0; i < 64; i++) {
97                 const int j = dsp->idct_permutation[i];
98                 /* 16 <= qscale * quant_matrix[i] <= 7905
99                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
100                  *             19952 <=              x  <= 249205026
101                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
102                  *           3444240 >= (1 << 36) / (x) >= 275 */
103
104                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
105                                         (ff_aanscales[i] * qscale *
106                                          quant_matrix[j]));
107             }
108         } else {
109             for (i = 0; i < 64; i++) {
110                 const int j = dsp->idct_permutation[i];
111                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
112                  * Assume x = qscale * quant_matrix[i]
113                  * So             16 <=              x  <= 7905
114                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
115                  * so          32768 >= (1 << 19) / (x) >= 67 */
116                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
117                                         (qscale * quant_matrix[j]));
118                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
119                 //                    (qscale * quant_matrix[i]);
120                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
121                                        (qscale * quant_matrix[j]);
122
123                 if (qmat16[qscale][0][i] == 0 ||
124                     qmat16[qscale][0][i] == 128 * 256)
125                     qmat16[qscale][0][i] = 128 * 256 - 1;
126                 qmat16[qscale][1][i] =
127                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
128                                 qmat16[qscale][0][i]);
129             }
130         }
131
132         for (i = intra; i < 64; i++) {
133             int64_t max = 8191;
134             if (dsp->fdct == ff_fdct_ifast) {
135                 max = (8191LL * ff_aanscales[i]) >> 14;
136             }
137             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
138                 shift++;
139             }
140         }
141     }
142     if (shift) {
143         av_log(NULL, AV_LOG_INFO,
144                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
145                QMAT_SHIFT - shift);
146     }
147 }
148
149 static inline void update_qscale(MpegEncContext *s)
150 {
151     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
152                 (FF_LAMBDA_SHIFT + 7);
153     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
154
155     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
156                  FF_LAMBDA_SHIFT;
157 }
158
159 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
160 {
161     int i;
162
163     if (matrix) {
164         put_bits(pb, 1, 1);
165         for (i = 0; i < 64; i++) {
166             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
167         }
168     } else
169         put_bits(pb, 1, 0);
170 }
171
172 /**
173  * init s->current_picture.qscale_table from s->lambda_table
174  */
175 void ff_init_qscale_tab(MpegEncContext *s)
176 {
177     int8_t * const qscale_table = s->current_picture.f.qscale_table;
178     int i;
179
180     for (i = 0; i < s->mb_num; i++) {
181         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
182         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
183         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
184                                                   s->avctx->qmax);
185     }
186 }
187
188 static void copy_picture_attributes(MpegEncContext *s,
189                                     AVFrame *dst,
190                                     AVFrame *src)
191 {
192     int i;
193
194     dst->pict_type              = src->pict_type;
195     dst->quality                = src->quality;
196     dst->coded_picture_number   = src->coded_picture_number;
197     dst->display_picture_number = src->display_picture_number;
198     //dst->reference              = src->reference;
199     dst->pts                    = src->pts;
200     dst->interlaced_frame       = src->interlaced_frame;
201     dst->top_field_first        = src->top_field_first;
202
203     if (s->avctx->me_threshold) {
204         if (!src->motion_val[0])
205             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
206         if (!src->mb_type)
207             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
208         if (!src->ref_index[0])
209             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
210         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
211             av_log(s->avctx, AV_LOG_ERROR,
212                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
213                    src->motion_subsample_log2, dst->motion_subsample_log2);
214
215         memcpy(dst->mb_type, src->mb_type,
216                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
217
218         for (i = 0; i < 2; i++) {
219             int stride = ((16 * s->mb_width ) >>
220                           src->motion_subsample_log2) + 1;
221             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
222
223             if (src->motion_val[i] &&
224                 src->motion_val[i] != dst->motion_val[i]) {
225                 memcpy(dst->motion_val[i], src->motion_val[i],
226                        2 * stride * height * sizeof(int16_t));
227             }
228             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
229                 memcpy(dst->ref_index[i], src->ref_index[i],
230                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
231             }
232         }
233     }
234 }
235
236 static void update_duplicate_context_after_me(MpegEncContext *dst,
237                                               MpegEncContext *src)
238 {
239 #define COPY(a) dst->a= src->a
240     COPY(pict_type);
241     COPY(current_picture);
242     COPY(f_code);
243     COPY(b_code);
244     COPY(qscale);
245     COPY(lambda);
246     COPY(lambda2);
247     COPY(picture_in_gop_number);
248     COPY(gop_picture_number);
249     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
250     COPY(progressive_frame);    // FIXME don't set in encode_header
251     COPY(partitioned_frame);    // FIXME don't set in encode_header
252 #undef COPY
253 }
254
255 /**
256  * Set the given MpegEncContext to defaults for encoding.
257  * the changed fields will not depend upon the prior state of the MpegEncContext.
258  */
259 static void MPV_encode_defaults(MpegEncContext *s)
260 {
261     int i;
262     ff_MPV_common_defaults(s);
263
264     for (i = -16; i < 16; i++) {
265         default_fcode_tab[i + MAX_MV] = 1;
266     }
267     s->me.mv_penalty = default_mv_penalty;
268     s->fcode_tab     = default_fcode_tab;
269 }
270
271 /* init video encoder */
272 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
273 {
274     MpegEncContext *s = avctx->priv_data;
275     int i;
276     int chroma_h_shift, chroma_v_shift;
277
278     MPV_encode_defaults(s);
279
280     switch (avctx->codec_id) {
281     case CODEC_ID_MPEG2VIDEO:
282         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
283             avctx->pix_fmt != PIX_FMT_YUV422P) {
284             av_log(avctx, AV_LOG_ERROR,
285                    "only YUV420 and YUV422 are supported\n");
286             return -1;
287         }
288         break;
289     case CODEC_ID_LJPEG:
290         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
291             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
292             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
293             avctx->pix_fmt != PIX_FMT_BGRA     &&
294             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
295               avctx->pix_fmt != PIX_FMT_YUV422P &&
296               avctx->pix_fmt != PIX_FMT_YUV444P) ||
297              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
298             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
299             return -1;
300         }
301         break;
302     case CODEC_ID_MJPEG:
303         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
304             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
305             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
306               avctx->pix_fmt != PIX_FMT_YUV422P) ||
307              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
308             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
309             return -1;
310         }
311         break;
312     default:
313         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
314             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
315             return -1;
316         }
317     }
318
319     switch (avctx->pix_fmt) {
320     case PIX_FMT_YUVJ422P:
321     case PIX_FMT_YUV422P:
322         s->chroma_format = CHROMA_422;
323         break;
324     case PIX_FMT_YUVJ420P:
325     case PIX_FMT_YUV420P:
326     default:
327         s->chroma_format = CHROMA_420;
328         break;
329     }
330
331     s->bit_rate = avctx->bit_rate;
332     s->width    = avctx->width;
333     s->height   = avctx->height;
334     if (avctx->gop_size > 600 &&
335         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
336         av_log(avctx, AV_LOG_ERROR,
337                "Warning keyframe interval too large! reducing it ...\n");
338         avctx->gop_size = 600;
339     }
340     s->gop_size     = avctx->gop_size;
341     s->avctx        = avctx;
342     s->flags        = avctx->flags;
343     s->flags2       = avctx->flags2;
344     s->max_b_frames = avctx->max_b_frames;
345     s->codec_id     = avctx->codec->id;
346 #if FF_API_MPV_GLOBAL_OPTS
347     if (avctx->luma_elim_threshold)
348         s->luma_elim_threshold   = avctx->luma_elim_threshold;
349     if (avctx->chroma_elim_threshold)
350         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
351 #endif
352     s->strict_std_compliance = avctx->strict_std_compliance;
353     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
354     s->mpeg_quant         = avctx->mpeg_quant;
355     s->rtp_mode           = !!avctx->rtp_payload_size;
356     s->intra_dc_precision = avctx->intra_dc_precision;
357     s->user_specified_pts = AV_NOPTS_VALUE;
358
359     if (s->gop_size <= 1) {
360         s->intra_only = 1;
361         s->gop_size   = 12;
362     } else {
363         s->intra_only = 0;
364     }
365
366     s->me_method = avctx->me_method;
367
368     /* Fixed QSCALE */
369     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
370
371 #if FF_API_MPV_GLOBAL_OPTS
372     if (s->flags & CODEC_FLAG_QP_RD)
373         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
374 #endif
375
376     s->adaptive_quant = (s->avctx->lumi_masking ||
377                          s->avctx->dark_masking ||
378                          s->avctx->temporal_cplx_masking ||
379                          s->avctx->spatial_cplx_masking  ||
380                          s->avctx->p_masking      ||
381                          s->avctx->border_masking ||
382                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
383                         !s->fixed_qscale;
384
385     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
386
387     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
388         av_log(avctx, AV_LOG_ERROR,
389                "a vbv buffer size is needed, "
390                "for encoding with a maximum bitrate\n");
391         return -1;
392     }
393
394     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
395         av_log(avctx, AV_LOG_INFO,
396                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
397     }
398
399     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
400         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
401         return -1;
402     }
403
404     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
405         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
406         return -1;
407     }
408
409     if (avctx->rc_max_rate &&
410         avctx->rc_max_rate == avctx->bit_rate &&
411         avctx->rc_max_rate != avctx->rc_min_rate) {
412         av_log(avctx, AV_LOG_INFO,
413                "impossible bitrate constraints, this will fail\n");
414     }
415
416     if (avctx->rc_buffer_size &&
417         avctx->bit_rate * (int64_t)avctx->time_base.num >
418             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
419         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
420         return -1;
421     }
422
423     if (!s->fixed_qscale &&
424         avctx->bit_rate * av_q2d(avctx->time_base) >
425             avctx->bit_rate_tolerance) {
426         av_log(avctx, AV_LOG_ERROR,
427                "bitrate tolerance too small for bitrate\n");
428         return -1;
429     }
430
431     if (s->avctx->rc_max_rate &&
432         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
433         (s->codec_id == CODEC_ID_MPEG1VIDEO ||
434          s->codec_id == CODEC_ID_MPEG2VIDEO) &&
435         90000LL * (avctx->rc_buffer_size - 1) >
436             s->avctx->rc_max_rate * 0xFFFFLL) {
437         av_log(avctx, AV_LOG_INFO,
438                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
439                "specified vbv buffer is too large for the given bitrate!\n");
440     }
441
442     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != CODEC_ID_MPEG4 &&
443         s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P &&
444         s->codec_id != CODEC_ID_FLV1) {
445         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
446         return -1;
447     }
448
449     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
450         av_log(avctx, AV_LOG_ERROR,
451                "OBMC is only supported with simple mb decision\n");
452         return -1;
453     }
454
455     if (s->quarter_sample && s->codec_id != CODEC_ID_MPEG4) {
456         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
457         return -1;
458     }
459
460     if (s->max_b_frames                    &&
461         s->codec_id != CODEC_ID_MPEG4      &&
462         s->codec_id != CODEC_ID_MPEG1VIDEO &&
463         s->codec_id != CODEC_ID_MPEG2VIDEO) {
464         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
465         return -1;
466     }
467
468     if ((s->codec_id == CODEC_ID_MPEG4 ||
469          s->codec_id == CODEC_ID_H263  ||
470          s->codec_id == CODEC_ID_H263P) &&
471         (avctx->sample_aspect_ratio.num > 255 ||
472          avctx->sample_aspect_ratio.den > 255)) {
473         av_log(avctx, AV_LOG_ERROR,
474                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
475                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
476         return -1;
477     }
478
479     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
480         s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO) {
481         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
482         return -1;
483     }
484
485     // FIXME mpeg2 uses that too
486     if (s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4) {
487         av_log(avctx, AV_LOG_ERROR,
488                "mpeg2 style quantization not supported by codec\n");
489         return -1;
490     }
491
492 #if FF_API_MPV_GLOBAL_OPTS
493     if (s->flags & CODEC_FLAG_CBP_RD)
494         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
495 #endif
496
497     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
498         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
499         return -1;
500     }
501
502     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
503         s->avctx->mb_decision != FF_MB_DECISION_RD) {
504         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
505         return -1;
506     }
507
508     if (s->avctx->scenechange_threshold < 1000000000 &&
509         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
510         av_log(avctx, AV_LOG_ERROR,
511                "closed gop with scene change detection are not supported yet, "
512                "set threshold to 1000000000\n");
513         return -1;
514     }
515
516     if (s->flags & CODEC_FLAG_LOW_DELAY) {
517         if (s->codec_id != CODEC_ID_MPEG2VIDEO) {
518             av_log(avctx, AV_LOG_ERROR,
519                   "low delay forcing is only available for mpeg2\n");
520             return -1;
521         }
522         if (s->max_b_frames != 0) {
523             av_log(avctx, AV_LOG_ERROR,
524                    "b frames cannot be used with low delay\n");
525             return -1;
526         }
527     }
528
529     if (s->q_scale_type == 1) {
530         if (avctx->qmax > 12) {
531             av_log(avctx, AV_LOG_ERROR,
532                    "non linear quant only supports qmax <= 12 currently\n");
533             return -1;
534         }
535     }
536
537     if (s->avctx->thread_count > 1         &&
538         s->codec_id != CODEC_ID_MPEG4      &&
539         s->codec_id != CODEC_ID_MPEG1VIDEO &&
540         s->codec_id != CODEC_ID_MPEG2VIDEO &&
541         (s->codec_id != CODEC_ID_H263P)) {
542         av_log(avctx, AV_LOG_ERROR,
543                "multi threaded encoding not supported by codec\n");
544         return -1;
545     }
546
547     if (s->avctx->thread_count < 1) {
548         av_log(avctx, AV_LOG_ERROR,
549                "automatic thread number detection not supported by codec,"
550                "patch welcome\n");
551         return -1;
552     }
553
554     if (s->avctx->thread_count > 1)
555         s->rtp_mode = 1;
556
557     if (!avctx->time_base.den || !avctx->time_base.num) {
558         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
559         return -1;
560     }
561
562     i = (INT_MAX / 2 + 128) >> 8;
563     if (avctx->me_threshold >= i) {
564         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
565                i - 1);
566         return -1;
567     }
568     if (avctx->mb_threshold >= i) {
569         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
570                i - 1);
571         return -1;
572     }
573
574     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
575         av_log(avctx, AV_LOG_INFO,
576                "notice: b_frame_strategy only affects the first pass\n");
577         avctx->b_frame_strategy = 0;
578     }
579
580     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
581     if (i > 1) {
582         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
583         avctx->time_base.den /= i;
584         avctx->time_base.num /= i;
585         //return -1;
586     }
587
588     if (s->mpeg_quant || s->codec_id == CODEC_ID_MPEG1VIDEO ||
589         s->codec_id == CODEC_ID_MPEG2VIDEO || s->codec_id == CODEC_ID_MJPEG) {
590         // (a + x * 3 / 8) / x
591         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
592         s->inter_quant_bias = 0;
593     } else {
594         s->intra_quant_bias = 0;
595         // (a - x / 4) / x
596         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
597     }
598
599     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
600         s->intra_quant_bias = avctx->intra_quant_bias;
601     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
602         s->inter_quant_bias = avctx->inter_quant_bias;
603
604     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
605                                   &chroma_v_shift);
606
607     if (avctx->codec_id == CODEC_ID_MPEG4 &&
608         s->avctx->time_base.den > (1 << 16) - 1) {
609         av_log(avctx, AV_LOG_ERROR,
610                "timebase %d/%d not supported by MPEG 4 standard, "
611                "the maximum admitted value for the timebase denominator "
612                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
613                (1 << 16) - 1);
614         return -1;
615     }
616     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
617
618 #if FF_API_MPV_GLOBAL_OPTS
619     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
620         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
621     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
622         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
623     if (avctx->quantizer_noise_shaping)
624         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
625 #endif
626
627     switch (avctx->codec->id) {
628     case CODEC_ID_MPEG1VIDEO:
629         s->out_format = FMT_MPEG1;
630         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
631         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
632         break;
633     case CODEC_ID_MPEG2VIDEO:
634         s->out_format = FMT_MPEG1;
635         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
636         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
637         s->rtp_mode   = 1;
638         break;
639     case CODEC_ID_LJPEG:
640     case CODEC_ID_MJPEG:
641         s->out_format = FMT_MJPEG;
642         s->intra_only = 1; /* force intra only for jpeg */
643         if (avctx->codec->id == CODEC_ID_LJPEG &&
644             avctx->pix_fmt   == PIX_FMT_BGRA) {
645             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
646             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
647             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
648         } else {
649             s->mjpeg_vsample[0] = 2;
650             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
651             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
652             s->mjpeg_hsample[0] = 2;
653             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
654             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
655         }
656         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
657             ff_mjpeg_encode_init(s) < 0)
658             return -1;
659         avctx->delay = 0;
660         s->low_delay = 1;
661         break;
662     case CODEC_ID_H261:
663         if (!CONFIG_H261_ENCODER)
664             return -1;
665         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
666             av_log(avctx, AV_LOG_ERROR,
667                    "The specified picture size of %dx%d is not valid for the "
668                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
669                     s->width, s->height);
670             return -1;
671         }
672         s->out_format = FMT_H261;
673         avctx->delay  = 0;
674         s->low_delay  = 1;
675         break;
676     case CODEC_ID_H263:
677         if (!CONFIG_H263_ENCODER)
678         return -1;
679         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
680                              s->width, s->height) == 8) {
681             av_log(avctx, AV_LOG_INFO,
682                    "The specified picture size of %dx%d is not valid for "
683                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
684                    "352x288, 704x576, and 1408x1152."
685                    "Try H.263+.\n", s->width, s->height);
686             return -1;
687         }
688         s->out_format = FMT_H263;
689         avctx->delay  = 0;
690         s->low_delay  = 1;
691         break;
692     case CODEC_ID_H263P:
693         s->out_format = FMT_H263;
694         s->h263_plus  = 1;
695         /* Fx */
696         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
697         s->modified_quant  = s->h263_aic;
698         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
699         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
700
701         /* /Fx */
702         /* These are just to be sure */
703         avctx->delay = 0;
704         s->low_delay = 1;
705         break;
706     case CODEC_ID_FLV1:
707         s->out_format      = FMT_H263;
708         s->h263_flv        = 2; /* format = 1; 11-bit codes */
709         s->unrestricted_mv = 1;
710         s->rtp_mode  = 0; /* don't allow GOB */
711         avctx->delay = 0;
712         s->low_delay = 1;
713         break;
714     case CODEC_ID_RV10:
715         s->out_format = FMT_H263;
716         avctx->delay  = 0;
717         s->low_delay  = 1;
718         break;
719     case CODEC_ID_RV20:
720         s->out_format      = FMT_H263;
721         avctx->delay       = 0;
722         s->low_delay       = 1;
723         s->modified_quant  = 1;
724         s->h263_aic        = 1;
725         s->h263_plus       = 1;
726         s->loop_filter     = 1;
727         s->unrestricted_mv = 0;
728         break;
729     case CODEC_ID_MPEG4:
730         s->out_format      = FMT_H263;
731         s->h263_pred       = 1;
732         s->unrestricted_mv = 1;
733         s->low_delay       = s->max_b_frames ? 0 : 1;
734         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
735         break;
736     case CODEC_ID_MSMPEG4V2:
737         s->out_format      = FMT_H263;
738         s->h263_pred       = 1;
739         s->unrestricted_mv = 1;
740         s->msmpeg4_version = 2;
741         avctx->delay       = 0;
742         s->low_delay       = 1;
743         break;
744     case CODEC_ID_MSMPEG4V3:
745         s->out_format        = FMT_H263;
746         s->h263_pred         = 1;
747         s->unrestricted_mv   = 1;
748         s->msmpeg4_version   = 3;
749         s->flipflop_rounding = 1;
750         avctx->delay         = 0;
751         s->low_delay         = 1;
752         break;
753     case CODEC_ID_WMV1:
754         s->out_format        = FMT_H263;
755         s->h263_pred         = 1;
756         s->unrestricted_mv   = 1;
757         s->msmpeg4_version   = 4;
758         s->flipflop_rounding = 1;
759         avctx->delay         = 0;
760         s->low_delay         = 1;
761         break;
762     case CODEC_ID_WMV2:
763         s->out_format        = FMT_H263;
764         s->h263_pred         = 1;
765         s->unrestricted_mv   = 1;
766         s->msmpeg4_version   = 5;
767         s->flipflop_rounding = 1;
768         avctx->delay         = 0;
769         s->low_delay         = 1;
770         break;
771     default:
772         return -1;
773     }
774
775     avctx->has_b_frames = !s->low_delay;
776
777     s->encoding = 1;
778
779     s->progressive_frame    =
780     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
781                                                 CODEC_FLAG_INTERLACED_ME) ||
782                                 s->alternate_scan);
783
784     /* init */
785     if (ff_MPV_common_init(s) < 0)
786         return -1;
787
788     if (!s->dct_quantize)
789         s->dct_quantize = ff_dct_quantize_c;
790     if (!s->denoise_dct)
791         s->denoise_dct  = denoise_dct_c;
792     s->fast_dct_quantize = s->dct_quantize;
793     if (avctx->trellis)
794         s->dct_quantize  = dct_quantize_trellis_c;
795
796     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
797         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
798
799     s->quant_precision = 5;
800
801     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
802     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
803
804     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
805         ff_h261_encode_init(s);
806     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
807         ff_h263_encode_init(s);
808     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
809         ff_msmpeg4_encode_init(s);
810     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
811         && s->out_format == FMT_MPEG1)
812         ff_mpeg1_encode_init(s);
813
814     /* init q matrix */
815     for (i = 0; i < 64; i++) {
816         int j = s->dsp.idct_permutation[i];
817         if (CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4 &&
818             s->mpeg_quant) {
819             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
820             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
821         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
822             s->intra_matrix[j] =
823             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
824         } else {
825             /* mpeg1/2 */
826             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
827             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
828         }
829         if (s->avctx->intra_matrix)
830             s->intra_matrix[j] = s->avctx->intra_matrix[i];
831         if (s->avctx->inter_matrix)
832             s->inter_matrix[j] = s->avctx->inter_matrix[i];
833     }
834
835     /* precompute matrix */
836     /* for mjpeg, we do include qscale in the matrix */
837     if (s->out_format != FMT_MJPEG) {
838         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
839                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
840                           31, 1);
841         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
842                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
843                           31, 0);
844     }
845
846     if (ff_rate_control_init(s) < 0)
847         return -1;
848
849     return 0;
850 }
851
852 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
853 {
854     MpegEncContext *s = avctx->priv_data;
855
856     ff_rate_control_uninit(s);
857
858     ff_MPV_common_end(s);
859     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
860         s->out_format == FMT_MJPEG)
861         ff_mjpeg_encode_close(s);
862
863     av_freep(&avctx->extradata);
864
865     return 0;
866 }
867
868 static int get_sae(uint8_t *src, int ref, int stride)
869 {
870     int x,y;
871     int acc = 0;
872
873     for (y = 0; y < 16; y++) {
874         for (x = 0; x < 16; x++) {
875             acc += FFABS(src[x + y * stride] - ref);
876         }
877     }
878
879     return acc;
880 }
881
882 static int get_intra_count(MpegEncContext *s, uint8_t *src,
883                            uint8_t *ref, int stride)
884 {
885     int x, y, w, h;
886     int acc = 0;
887
888     w = s->width  & ~15;
889     h = s->height & ~15;
890
891     for (y = 0; y < h; y += 16) {
892         for (x = 0; x < w; x += 16) {
893             int offset = x + y * stride;
894             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
895                                      16);
896             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
897             int sae  = get_sae(src + offset, mean, stride);
898
899             acc += sae + 500 < sad;
900         }
901     }
902     return acc;
903 }
904
905
906 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
907 {
908     AVFrame *pic = NULL;
909     int64_t pts;
910     int i;
911     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
912                                                  (s->low_delay ? 0 : 1);
913     int direct = 1;
914
915     if (pic_arg) {
916         pts = pic_arg->pts;
917         pic_arg->display_picture_number = s->input_picture_number++;
918
919         if (pts != AV_NOPTS_VALUE) {
920             if (s->user_specified_pts != AV_NOPTS_VALUE) {
921                 int64_t time = pts;
922                 int64_t last = s->user_specified_pts;
923
924                 if (time <= last) {
925                     av_log(s->avctx, AV_LOG_ERROR,
926                            "Error, Invalid timestamp=%"PRId64", "
927                            "last=%"PRId64"\n", pts, s->user_specified_pts);
928                     return -1;
929                 }
930
931                 if (!s->low_delay && pic_arg->display_picture_number == 1)
932                     s->dts_delta = time - last;
933             }
934             s->user_specified_pts = pts;
935         } else {
936             if (s->user_specified_pts != AV_NOPTS_VALUE) {
937                 s->user_specified_pts =
938                 pts = s->user_specified_pts + 1;
939                 av_log(s->avctx, AV_LOG_INFO,
940                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
941                        pts);
942             } else {
943                 pts = pic_arg->display_picture_number;
944             }
945         }
946     }
947
948   if (pic_arg) {
949     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
950         direct = 0;
951     if (pic_arg->linesize[0] != s->linesize)
952         direct = 0;
953     if (pic_arg->linesize[1] != s->uvlinesize)
954         direct = 0;
955     if (pic_arg->linesize[2] != s->uvlinesize)
956         direct = 0;
957
958     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
959     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
960
961     if (direct) {
962         i = ff_find_unused_picture(s, 1);
963         if (i < 0)
964             return i;
965
966         pic = &s->picture[i].f;
967         pic->reference = 3;
968
969         for (i = 0; i < 4; i++) {
970             pic->data[i]     = pic_arg->data[i];
971             pic->linesize[i] = pic_arg->linesize[i];
972         }
973         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
974             return -1;
975         }
976     } else {
977         i = ff_find_unused_picture(s, 0);
978         if (i < 0)
979             return i;
980
981         pic = &s->picture[i].f;
982         pic->reference = 3;
983
984         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
985             return -1;
986         }
987
988         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
989             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
990             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
991             // empty
992         } else {
993             int h_chroma_shift, v_chroma_shift;
994             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
995                                           &v_chroma_shift);
996
997             for (i = 0; i < 3; i++) {
998                 int src_stride = pic_arg->linesize[i];
999                 int dst_stride = i ? s->uvlinesize : s->linesize;
1000                 int h_shift = i ? h_chroma_shift : 0;
1001                 int v_shift = i ? v_chroma_shift : 0;
1002                 int w = s->width  >> h_shift;
1003                 int h = s->height >> v_shift;
1004                 uint8_t *src = pic_arg->data[i];
1005                 uint8_t *dst = pic->data[i];
1006
1007                 if (!s->avctx->rc_buffer_size)
1008                     dst += INPLACE_OFFSET;
1009
1010                 if (src_stride == dst_stride)
1011                     memcpy(dst, src, src_stride * h);
1012                 else {
1013                     while (h--) {
1014                         memcpy(dst, src, w);
1015                         dst += dst_stride;
1016                         src += src_stride;
1017                     }
1018                 }
1019             }
1020         }
1021     }
1022     copy_picture_attributes(s, pic, pic_arg);
1023     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1024   }
1025
1026     /* shift buffer entries */
1027     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1028         s->input_picture[i - 1] = s->input_picture[i];
1029
1030     s->input_picture[encoding_delay] = (Picture*) pic;
1031
1032     return 0;
1033 }
1034
1035 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1036 {
1037     int x, y, plane;
1038     int score = 0;
1039     int64_t score64 = 0;
1040
1041     for (plane = 0; plane < 3; plane++) {
1042         const int stride = p->f.linesize[plane];
1043         const int bw = plane ? 1 : 2;
1044         for (y = 0; y < s->mb_height * bw; y++) {
1045             for (x = 0; x < s->mb_width * bw; x++) {
1046                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1047                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1048                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1049                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1050
1051                 switch (s->avctx->frame_skip_exp) {
1052                 case 0: score    =  FFMAX(score, v);          break;
1053                 case 1: score   += FFABS(v);                  break;
1054                 case 2: score   += v * v;                     break;
1055                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1056                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1057                 }
1058             }
1059         }
1060     }
1061
1062     if (score)
1063         score64 = score;
1064
1065     if (score64 < s->avctx->frame_skip_threshold)
1066         return 1;
1067     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1068         return 1;
1069     return 0;
1070 }
1071
1072 static int estimate_best_b_count(MpegEncContext *s)
1073 {
1074     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1075     AVCodecContext *c = avcodec_alloc_context3(NULL);
1076     AVFrame input[FF_MAX_B_FRAMES + 2];
1077     const int scale = s->avctx->brd_scale;
1078     int i, j, out_size, p_lambda, b_lambda, lambda2;
1079     int outbuf_size  = s->width * s->height; // FIXME
1080     uint8_t *outbuf  = av_malloc(outbuf_size);
1081     int64_t best_rd  = INT64_MAX;
1082     int best_b_count = -1;
1083
1084     assert(scale >= 0 && scale <= 3);
1085
1086     //emms_c();
1087     //s->next_picture_ptr->quality;
1088     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1089     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1090     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1091     if (!b_lambda) // FIXME we should do this somewhere else
1092         b_lambda = p_lambda;
1093     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1094                FF_LAMBDA_SHIFT;
1095
1096     c->width        = s->width  >> scale;
1097     c->height       = s->height >> scale;
1098     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1099                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1100     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1101     c->mb_decision  = s->avctx->mb_decision;
1102     c->me_cmp       = s->avctx->me_cmp;
1103     c->mb_cmp       = s->avctx->mb_cmp;
1104     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1105     c->pix_fmt      = PIX_FMT_YUV420P;
1106     c->time_base    = s->avctx->time_base;
1107     c->max_b_frames = s->max_b_frames;
1108
1109     if (avcodec_open2(c, codec, NULL) < 0)
1110         return -1;
1111
1112     for (i = 0; i < s->max_b_frames + 2; i++) {
1113         int ysize = c->width * c->height;
1114         int csize = (c->width / 2) * (c->height / 2);
1115         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1116                                                 s->next_picture_ptr;
1117
1118         avcodec_get_frame_defaults(&input[i]);
1119         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1120         input[i].data[1]     = input[i].data[0] + ysize;
1121         input[i].data[2]     = input[i].data[1] + csize;
1122         input[i].linesize[0] = c->width;
1123         input[i].linesize[1] =
1124         input[i].linesize[2] = c->width / 2;
1125
1126         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1127             pre_input = *pre_input_ptr;
1128
1129             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1130                 pre_input.f.data[0] += INPLACE_OFFSET;
1131                 pre_input.f.data[1] += INPLACE_OFFSET;
1132                 pre_input.f.data[2] += INPLACE_OFFSET;
1133             }
1134
1135             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1136                                  pre_input.f.data[0], pre_input.f.linesize[0],
1137                                  c->width,      c->height);
1138             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1139                                  pre_input.f.data[1], pre_input.f.linesize[1],
1140                                  c->width >> 1, c->height >> 1);
1141             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1142                                  pre_input.f.data[2], pre_input.f.linesize[2],
1143                                  c->width >> 1, c->height >> 1);
1144         }
1145     }
1146
1147     for (j = 0; j < s->max_b_frames + 1; j++) {
1148         int64_t rd = 0;
1149
1150         if (!s->input_picture[j])
1151             break;
1152
1153         c->error[0] = c->error[1] = c->error[2] = 0;
1154
1155         input[0].pict_type = AV_PICTURE_TYPE_I;
1156         input[0].quality   = 1 * FF_QP2LAMBDA;
1157         out_size           = avcodec_encode_video(c, outbuf,
1158                                                   outbuf_size, &input[0]);
1159         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1160
1161         for (i = 0; i < s->max_b_frames + 1; i++) {
1162             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1163
1164             input[i + 1].pict_type = is_p ?
1165                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1166             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1167             out_size = avcodec_encode_video(c, outbuf, outbuf_size,
1168                                             &input[i + 1]);
1169             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1170         }
1171
1172         /* get the delayed frames */
1173         while (out_size) {
1174             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
1175             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1176         }
1177
1178         rd += c->error[0] + c->error[1] + c->error[2];
1179
1180         if (rd < best_rd) {
1181             best_rd = rd;
1182             best_b_count = j;
1183         }
1184     }
1185
1186     av_freep(&outbuf);
1187     avcodec_close(c);
1188     av_freep(&c);
1189
1190     for (i = 0; i < s->max_b_frames + 2; i++) {
1191         av_freep(&input[i].data[0]);
1192     }
1193
1194     return best_b_count;
1195 }
1196
1197 static int select_input_picture(MpegEncContext *s)
1198 {
1199     int i;
1200
1201     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1202         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1203     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1204
1205     /* set next picture type & ordering */
1206     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1207         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1208             s->next_picture_ptr == NULL || s->intra_only) {
1209             s->reordered_input_picture[0] = s->input_picture[0];
1210             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1211             s->reordered_input_picture[0]->f.coded_picture_number =
1212                 s->coded_picture_number++;
1213         } else {
1214             int b_frames;
1215
1216             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1217                 if (s->picture_in_gop_number < s->gop_size &&
1218                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1219                     // FIXME check that te gop check above is +-1 correct
1220                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1221                     //       s->input_picture[0]->f.data[0],
1222                     //       s->input_picture[0]->pts);
1223
1224                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1225                         for (i = 0; i < 4; i++)
1226                             s->input_picture[0]->f.data[i] = NULL;
1227                         s->input_picture[0]->f.type = 0;
1228                     } else {
1229                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1230                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1231
1232                         s->avctx->release_buffer(s->avctx,
1233                                                  &s->input_picture[0]->f);
1234                     }
1235
1236                     emms_c();
1237                     ff_vbv_update(s, 0);
1238
1239                     goto no_output_pic;
1240                 }
1241             }
1242
1243             if (s->flags & CODEC_FLAG_PASS2) {
1244                 for (i = 0; i < s->max_b_frames + 1; i++) {
1245                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1246
1247                     if (pict_num >= s->rc_context.num_entries)
1248                         break;
1249                     if (!s->input_picture[i]) {
1250                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1251                         break;
1252                     }
1253
1254                     s->input_picture[i]->f.pict_type =
1255                         s->rc_context.entry[pict_num].new_pict_type;
1256                 }
1257             }
1258
1259             if (s->avctx->b_frame_strategy == 0) {
1260                 b_frames = s->max_b_frames;
1261                 while (b_frames && !s->input_picture[b_frames])
1262                     b_frames--;
1263             } else if (s->avctx->b_frame_strategy == 1) {
1264                 for (i = 1; i < s->max_b_frames + 1; i++) {
1265                     if (s->input_picture[i] &&
1266                         s->input_picture[i]->b_frame_score == 0) {
1267                         s->input_picture[i]->b_frame_score =
1268                             get_intra_count(s,
1269                                             s->input_picture[i    ]->f.data[0],
1270                                             s->input_picture[i - 1]->f.data[0],
1271                                             s->linesize) + 1;
1272                     }
1273                 }
1274                 for (i = 0; i < s->max_b_frames + 1; i++) {
1275                     if (s->input_picture[i] == NULL ||
1276                         s->input_picture[i]->b_frame_score - 1 >
1277                             s->mb_num / s->avctx->b_sensitivity)
1278                         break;
1279                 }
1280
1281                 b_frames = FFMAX(0, i - 1);
1282
1283                 /* reset scores */
1284                 for (i = 0; i < b_frames + 1; i++) {
1285                     s->input_picture[i]->b_frame_score = 0;
1286                 }
1287             } else if (s->avctx->b_frame_strategy == 2) {
1288                 b_frames = estimate_best_b_count(s);
1289             } else {
1290                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1291                 b_frames = 0;
1292             }
1293
1294             emms_c();
1295             //static int b_count = 0;
1296             //b_count += b_frames;
1297             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1298
1299             for (i = b_frames - 1; i >= 0; i--) {
1300                 int type = s->input_picture[i]->f.pict_type;
1301                 if (type && type != AV_PICTURE_TYPE_B)
1302                     b_frames = i;
1303             }
1304             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1305                 b_frames == s->max_b_frames) {
1306                 av_log(s->avctx, AV_LOG_ERROR,
1307                        "warning, too many b frames in a row\n");
1308             }
1309
1310             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1311                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1312                     s->gop_size > s->picture_in_gop_number) {
1313                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1314                 } else {
1315                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1316                         b_frames = 0;
1317                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1318                 }
1319             }
1320
1321             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1322                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1323                 b_frames--;
1324
1325             s->reordered_input_picture[0] = s->input_picture[b_frames];
1326             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1327                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1328             s->reordered_input_picture[0]->f.coded_picture_number =
1329                 s->coded_picture_number++;
1330             for (i = 0; i < b_frames; i++) {
1331                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1332                 s->reordered_input_picture[i + 1]->f.pict_type =
1333                     AV_PICTURE_TYPE_B;
1334                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1335                     s->coded_picture_number++;
1336             }
1337         }
1338     }
1339 no_output_pic:
1340     if (s->reordered_input_picture[0]) {
1341         s->reordered_input_picture[0]->f.reference =
1342            s->reordered_input_picture[0]->f.pict_type !=
1343                AV_PICTURE_TYPE_B ? 3 : 0;
1344
1345         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1346
1347         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1348             s->avctx->rc_buffer_size) {
1349             // input is a shared pix, so we can't modifiy it -> alloc a new
1350             // one & ensure that the shared one is reuseable
1351
1352             Picture *pic;
1353             int i = ff_find_unused_picture(s, 0);
1354             if (i < 0)
1355                 return i;
1356             pic = &s->picture[i];
1357
1358             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1359             if (ff_alloc_picture(s, pic, 0) < 0) {
1360                 return -1;
1361             }
1362
1363             /* mark us unused / free shared pic */
1364             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1365                 s->avctx->release_buffer(s->avctx,
1366                                          &s->reordered_input_picture[0]->f);
1367             for (i = 0; i < 4; i++)
1368                 s->reordered_input_picture[0]->f.data[i] = NULL;
1369             s->reordered_input_picture[0]->f.type = 0;
1370
1371             copy_picture_attributes(s, &pic->f,
1372                                     &s->reordered_input_picture[0]->f);
1373
1374             s->current_picture_ptr = pic;
1375         } else {
1376             // input is not a shared pix -> reuse buffer for current_pix
1377
1378             assert(s->reordered_input_picture[0]->f.type ==
1379                        FF_BUFFER_TYPE_USER ||
1380                    s->reordered_input_picture[0]->f.type ==
1381                        FF_BUFFER_TYPE_INTERNAL);
1382
1383             s->current_picture_ptr = s->reordered_input_picture[0];
1384             for (i = 0; i < 4; i++) {
1385                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1386             }
1387         }
1388         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1389
1390         s->picture_number = s->new_picture.f.display_picture_number;
1391         //printf("dpn:%d\n", s->picture_number);
1392     } else {
1393         memset(&s->new_picture, 0, sizeof(Picture));
1394     }
1395     return 0;
1396 }
1397
1398 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1399                           const AVFrame *pic_arg, int *got_packet)
1400 {
1401     MpegEncContext *s = avctx->priv_data;
1402     int i, stuffing_count, ret;
1403     int context_count = s->slice_context_count;
1404
1405     s->picture_in_gop_number++;
1406
1407     if (load_input_picture(s, pic_arg) < 0)
1408         return -1;
1409
1410     if (select_input_picture(s) < 0) {
1411         return -1;
1412     }
1413
1414     /* output? */
1415     if (s->new_picture.f.data[0]) {
1416         if (!pkt->data &&
1417             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1418             return ret;
1419         if (s->mb_info) {
1420             s->mb_info_ptr = av_packet_new_side_data(pkt,
1421                                  AV_PKT_DATA_H263_MB_INFO,
1422                                  s->mb_width*s->mb_height*12);
1423             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1424         }
1425
1426         for (i = 0; i < context_count; i++) {
1427             int start_y = s->thread_context[i]->start_mb_y;
1428             int   end_y = s->thread_context[i]->  end_mb_y;
1429             int h       = s->mb_height;
1430             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1431             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1432
1433             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1434         }
1435
1436         s->pict_type = s->new_picture.f.pict_type;
1437         //emms_c();
1438         //printf("qs:%f %f %d\n", s->new_picture.quality,
1439         //       s->current_picture.quality, s->qscale);
1440         ff_MPV_frame_start(s, avctx);
1441 vbv_retry:
1442         if (encode_picture(s, s->picture_number) < 0)
1443             return -1;
1444
1445         avctx->header_bits = s->header_bits;
1446         avctx->mv_bits     = s->mv_bits;
1447         avctx->misc_bits   = s->misc_bits;
1448         avctx->i_tex_bits  = s->i_tex_bits;
1449         avctx->p_tex_bits  = s->p_tex_bits;
1450         avctx->i_count     = s->i_count;
1451         // FIXME f/b_count in avctx
1452         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1453         avctx->skip_count  = s->skip_count;
1454
1455         ff_MPV_frame_end(s);
1456
1457         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1458             ff_mjpeg_encode_picture_trailer(s);
1459
1460         if (avctx->rc_buffer_size) {
1461             RateControlContext *rcc = &s->rc_context;
1462             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1463
1464             if (put_bits_count(&s->pb) > max_size &&
1465                 s->lambda < s->avctx->lmax) {
1466                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1467                                        (s->qscale + 1) / s->qscale);
1468                 if (s->adaptive_quant) {
1469                     int i;
1470                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1471                         s->lambda_table[i] =
1472                             FFMAX(s->lambda_table[i] + 1,
1473                                   s->lambda_table[i] * (s->qscale + 1) /
1474                                   s->qscale);
1475                 }
1476                 s->mb_skipped = 0;        // done in MPV_frame_start()
1477                 // done in encode_picture() so we must undo it
1478                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1479                     if (s->flipflop_rounding          ||
1480                         s->codec_id == CODEC_ID_H263P ||
1481                         s->codec_id == CODEC_ID_MPEG4)
1482                         s->no_rounding ^= 1;
1483                 }
1484                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1485                     s->time_base       = s->last_time_base;
1486                     s->last_non_b_time = s->time - s->pp_time;
1487                 }
1488                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1489                 for (i = 0; i < context_count; i++) {
1490                     PutBitContext *pb = &s->thread_context[i]->pb;
1491                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1492                 }
1493                 goto vbv_retry;
1494             }
1495
1496             assert(s->avctx->rc_max_rate);
1497         }
1498
1499         if (s->flags & CODEC_FLAG_PASS1)
1500             ff_write_pass1_stats(s);
1501
1502         for (i = 0; i < 4; i++) {
1503             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1504             avctx->error[i] += s->current_picture_ptr->f.error[i];
1505         }
1506
1507         if (s->flags & CODEC_FLAG_PASS1)
1508             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1509                    avctx->i_tex_bits + avctx->p_tex_bits ==
1510                        put_bits_count(&s->pb));
1511         flush_put_bits(&s->pb);
1512         s->frame_bits  = put_bits_count(&s->pb);
1513
1514         stuffing_count = ff_vbv_update(s, s->frame_bits);
1515         if (stuffing_count) {
1516             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1517                     stuffing_count + 50) {
1518                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1519                 return -1;
1520             }
1521
1522             switch (s->codec_id) {
1523             case CODEC_ID_MPEG1VIDEO:
1524             case CODEC_ID_MPEG2VIDEO:
1525                 while (stuffing_count--) {
1526                     put_bits(&s->pb, 8, 0);
1527                 }
1528             break;
1529             case CODEC_ID_MPEG4:
1530                 put_bits(&s->pb, 16, 0);
1531                 put_bits(&s->pb, 16, 0x1C3);
1532                 stuffing_count -= 4;
1533                 while (stuffing_count--) {
1534                     put_bits(&s->pb, 8, 0xFF);
1535                 }
1536             break;
1537             default:
1538                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1539             }
1540             flush_put_bits(&s->pb);
1541             s->frame_bits  = put_bits_count(&s->pb);
1542         }
1543
1544         /* update mpeg1/2 vbv_delay for CBR */
1545         if (s->avctx->rc_max_rate                          &&
1546             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1547             s->out_format == FMT_MPEG1                     &&
1548             90000LL * (avctx->rc_buffer_size - 1) <=
1549                 s->avctx->rc_max_rate * 0xFFFFLL) {
1550             int vbv_delay, min_delay;
1551             double inbits  = s->avctx->rc_max_rate *
1552                              av_q2d(s->avctx->time_base);
1553             int    minbits = s->frame_bits - 8 *
1554                              (s->vbv_delay_ptr - s->pb.buf - 1);
1555             double bits    = s->rc_context.buffer_index + minbits - inbits;
1556
1557             if (bits < 0)
1558                 av_log(s->avctx, AV_LOG_ERROR,
1559                        "Internal error, negative bits\n");
1560
1561             assert(s->repeat_first_field == 0);
1562
1563             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1564             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1565                         s->avctx->rc_max_rate;
1566
1567             vbv_delay = FFMAX(vbv_delay, min_delay);
1568
1569             assert(vbv_delay < 0xFFFF);
1570
1571             s->vbv_delay_ptr[0] &= 0xF8;
1572             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1573             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1574             s->vbv_delay_ptr[2] &= 0x07;
1575             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1576             avctx->vbv_delay     = vbv_delay * 300;
1577         }
1578         s->total_bits     += s->frame_bits;
1579         avctx->frame_bits  = s->frame_bits;
1580
1581         pkt->pts = s->current_picture.f.pts;
1582         if (!s->low_delay) {
1583             if (!s->current_picture.f.coded_picture_number)
1584                 pkt->dts = pkt->pts - s->dts_delta;
1585             else
1586                 pkt->dts = s->reordered_pts;
1587             s->reordered_pts = s->input_picture[0]->f.pts;
1588         } else
1589             pkt->dts = pkt->pts;
1590         if (s->current_picture.f.key_frame)
1591             pkt->flags |= AV_PKT_FLAG_KEY;
1592         if (s->mb_info)
1593             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1594     } else {
1595         assert((put_bits_ptr(&s->pb) == s->pb.buf));
1596         s->frame_bits = 0;
1597     }
1598     assert((s->frame_bits & 7) == 0);
1599
1600     pkt->size = s->frame_bits / 8;
1601     *got_packet = !!pkt->size;
1602     return 0;
1603 }
1604
1605 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1606                                                 int n, int threshold)
1607 {
1608     static const char tab[64] = {
1609         3, 2, 2, 1, 1, 1, 1, 1,
1610         1, 1, 1, 1, 1, 1, 1, 1,
1611         1, 1, 1, 1, 1, 1, 1, 1,
1612         0, 0, 0, 0, 0, 0, 0, 0,
1613         0, 0, 0, 0, 0, 0, 0, 0,
1614         0, 0, 0, 0, 0, 0, 0, 0,
1615         0, 0, 0, 0, 0, 0, 0, 0,
1616         0, 0, 0, 0, 0, 0, 0, 0
1617     };
1618     int score = 0;
1619     int run = 0;
1620     int i;
1621     DCTELEM *block = s->block[n];
1622     const int last_index = s->block_last_index[n];
1623     int skip_dc;
1624
1625     if (threshold < 0) {
1626         skip_dc = 0;
1627         threshold = -threshold;
1628     } else
1629         skip_dc = 1;
1630
1631     /* Are all we could set to zero already zero? */
1632     if (last_index <= skip_dc - 1)
1633         return;
1634
1635     for (i = 0; i <= last_index; i++) {
1636         const int j = s->intra_scantable.permutated[i];
1637         const int level = FFABS(block[j]);
1638         if (level == 1) {
1639             if (skip_dc && i == 0)
1640                 continue;
1641             score += tab[run];
1642             run = 0;
1643         } else if (level > 1) {
1644             return;
1645         } else {
1646             run++;
1647         }
1648     }
1649     if (score >= threshold)
1650         return;
1651     for (i = skip_dc; i <= last_index; i++) {
1652         const int j = s->intra_scantable.permutated[i];
1653         block[j] = 0;
1654     }
1655     if (block[0])
1656         s->block_last_index[n] = 0;
1657     else
1658         s->block_last_index[n] = -1;
1659 }
1660
1661 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1662                                int last_index)
1663 {
1664     int i;
1665     const int maxlevel = s->max_qcoeff;
1666     const int minlevel = s->min_qcoeff;
1667     int overflow = 0;
1668
1669     if (s->mb_intra) {
1670         i = 1; // skip clipping of intra dc
1671     } else
1672         i = 0;
1673
1674     for (; i <= last_index; i++) {
1675         const int j = s->intra_scantable.permutated[i];
1676         int level = block[j];
1677
1678         if (level > maxlevel) {
1679             level = maxlevel;
1680             overflow++;
1681         } else if (level < minlevel) {
1682             level = minlevel;
1683             overflow++;
1684         }
1685
1686         block[j] = level;
1687     }
1688
1689     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1690         av_log(s->avctx, AV_LOG_INFO,
1691                "warning, clipping %d dct coefficients to %d..%d\n",
1692                overflow, minlevel, maxlevel);
1693 }
1694
1695 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1696 {
1697     int x, y;
1698     // FIXME optimize
1699     for (y = 0; y < 8; y++) {
1700         for (x = 0; x < 8; x++) {
1701             int x2, y2;
1702             int sum = 0;
1703             int sqr = 0;
1704             int count = 0;
1705
1706             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1707                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1708                     int v = ptr[x2 + y2 * stride];
1709                     sum += v;
1710                     sqr += v * v;
1711                     count++;
1712                 }
1713             }
1714             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1715         }
1716     }
1717 }
1718
1719 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1720                                                 int motion_x, int motion_y,
1721                                                 int mb_block_height,
1722                                                 int mb_block_count)
1723 {
1724     int16_t weight[8][64];
1725     DCTELEM orig[8][64];
1726     const int mb_x = s->mb_x;
1727     const int mb_y = s->mb_y;
1728     int i;
1729     int skip_dct[8];
1730     int dct_offset = s->linesize * 8; // default for progressive frames
1731     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1732     int wrap_y, wrap_c;
1733
1734     for (i = 0; i < mb_block_count; i++)
1735         skip_dct[i] = s->skipdct;
1736
1737     if (s->adaptive_quant) {
1738         const int last_qp = s->qscale;
1739         const int mb_xy = mb_x + mb_y * s->mb_stride;
1740
1741         s->lambda = s->lambda_table[mb_xy];
1742         update_qscale(s);
1743
1744         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1745             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1746             s->dquant = s->qscale - last_qp;
1747
1748             if (s->out_format == FMT_H263) {
1749                 s->dquant = av_clip(s->dquant, -2, 2);
1750
1751                 if (s->codec_id == CODEC_ID_MPEG4) {
1752                     if (!s->mb_intra) {
1753                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1754                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1755                                 s->dquant = 0;
1756                         }
1757                         if (s->mv_type == MV_TYPE_8X8)
1758                             s->dquant = 0;
1759                     }
1760                 }
1761             }
1762         }
1763         ff_set_qscale(s, last_qp + s->dquant);
1764     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1765         ff_set_qscale(s, s->qscale + s->dquant);
1766
1767     wrap_y = s->linesize;
1768     wrap_c = s->uvlinesize;
1769     ptr_y  = s->new_picture.f.data[0] +
1770              (mb_y * 16 * wrap_y)              + mb_x * 16;
1771     ptr_cb = s->new_picture.f.data[1] +
1772              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1773     ptr_cr = s->new_picture.f.data[2] +
1774              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1775
1776     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1777         uint8_t *ebuf = s->edge_emu_buffer + 32;
1778         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1779                                 mb_y * 16, s->width, s->height);
1780         ptr_y = ebuf;
1781         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1782                                 mb_block_height, mb_x * 8, mb_y * 8,
1783                                 s->width >> 1, s->height >> 1);
1784         ptr_cb = ebuf + 18 * wrap_y;
1785         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1786                                 mb_block_height, mb_x * 8, mb_y * 8,
1787                                 s->width >> 1, s->height >> 1);
1788         ptr_cr = ebuf + 18 * wrap_y + 8;
1789     }
1790
1791     if (s->mb_intra) {
1792         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1793             int progressive_score, interlaced_score;
1794
1795             s->interlaced_dct = 0;
1796             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1797                                                     NULL, wrap_y, 8) +
1798                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1799                                                     NULL, wrap_y, 8) - 400;
1800
1801             if (progressive_score > 0) {
1802                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1803                                                        NULL, wrap_y * 2, 8) +
1804                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1805                                                        NULL, wrap_y * 2, 8);
1806                 if (progressive_score > interlaced_score) {
1807                     s->interlaced_dct = 1;
1808
1809                     dct_offset = wrap_y;
1810                     wrap_y <<= 1;
1811                     if (s->chroma_format == CHROMA_422)
1812                         wrap_c <<= 1;
1813                 }
1814             }
1815         }
1816
1817         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1818         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1819         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1820         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1821
1822         if (s->flags & CODEC_FLAG_GRAY) {
1823             skip_dct[4] = 1;
1824             skip_dct[5] = 1;
1825         } else {
1826             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1827             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1828             if (!s->chroma_y_shift) { /* 422 */
1829                 s->dsp.get_pixels(s->block[6],
1830                                   ptr_cb + (dct_offset >> 1), wrap_c);
1831                 s->dsp.get_pixels(s->block[7],
1832                                   ptr_cr + (dct_offset >> 1), wrap_c);
1833             }
1834         }
1835     } else {
1836         op_pixels_func (*op_pix)[4];
1837         qpel_mc_func (*op_qpix)[16];
1838         uint8_t *dest_y, *dest_cb, *dest_cr;
1839
1840         dest_y  = s->dest[0];
1841         dest_cb = s->dest[1];
1842         dest_cr = s->dest[2];
1843
1844         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1845             op_pix  = s->dsp.put_pixels_tab;
1846             op_qpix = s->dsp.put_qpel_pixels_tab;
1847         } else {
1848             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1849             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1850         }
1851
1852         if (s->mv_dir & MV_DIR_FORWARD) {
1853             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data,
1854                        op_pix, op_qpix);
1855             op_pix  = s->dsp.avg_pixels_tab;
1856             op_qpix = s->dsp.avg_qpel_pixels_tab;
1857         }
1858         if (s->mv_dir & MV_DIR_BACKWARD) {
1859             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data,
1860                        op_pix, op_qpix);
1861         }
1862
1863         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1864             int progressive_score, interlaced_score;
1865
1866             s->interlaced_dct = 0;
1867             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1868                                                     ptr_y,              wrap_y,
1869                                                     8) +
1870                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1871                                                     ptr_y + wrap_y * 8, wrap_y,
1872                                                     8) - 400;
1873
1874             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1875                 progressive_score -= 400;
1876
1877             if (progressive_score > 0) {
1878                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1879                                                        ptr_y,
1880                                                        wrap_y * 2, 8) +
1881                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1882                                                        ptr_y + wrap_y,
1883                                                        wrap_y * 2, 8);
1884
1885                 if (progressive_score > interlaced_score) {
1886                     s->interlaced_dct = 1;
1887
1888                     dct_offset = wrap_y;
1889                     wrap_y <<= 1;
1890                     if (s->chroma_format == CHROMA_422)
1891                         wrap_c <<= 1;
1892                 }
1893             }
1894         }
1895
1896         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1897         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1898         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1899                            dest_y + dct_offset, wrap_y);
1900         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1901                            dest_y + dct_offset + 8, wrap_y);
1902
1903         if (s->flags & CODEC_FLAG_GRAY) {
1904             skip_dct[4] = 1;
1905             skip_dct[5] = 1;
1906         } else {
1907             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1908             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1909             if (!s->chroma_y_shift) { /* 422 */
1910                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1911                                    dest_cb + (dct_offset >> 1), wrap_c);
1912                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1913                                    dest_cr + (dct_offset >> 1), wrap_c);
1914             }
1915         }
1916         /* pre quantization */
1917         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1918                 2 * s->qscale * s->qscale) {
1919             // FIXME optimize
1920             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1921                               wrap_y, 8) < 20 * s->qscale)
1922                 skip_dct[0] = 1;
1923             if (s->dsp.sad[1](NULL, ptr_y + 8,
1924                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1925                 skip_dct[1] = 1;
1926             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1927                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1928                 skip_dct[2] = 1;
1929             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1930                               dest_y + dct_offset + 8,
1931                               wrap_y, 8) < 20 * s->qscale)
1932                 skip_dct[3] = 1;
1933             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1934                               wrap_c, 8) < 20 * s->qscale)
1935                 skip_dct[4] = 1;
1936             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1937                               wrap_c, 8) < 20 * s->qscale)
1938                 skip_dct[5] = 1;
1939             if (!s->chroma_y_shift) { /* 422 */
1940                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1941                                   dest_cb + (dct_offset >> 1),
1942                                   wrap_c, 8) < 20 * s->qscale)
1943                     skip_dct[6] = 1;
1944                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1945                                   dest_cr + (dct_offset >> 1),
1946                                   wrap_c, 8) < 20 * s->qscale)
1947                     skip_dct[7] = 1;
1948             }
1949         }
1950     }
1951
1952     if (s->quantizer_noise_shaping) {
1953         if (!skip_dct[0])
1954             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1955         if (!skip_dct[1])
1956             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1957         if (!skip_dct[2])
1958             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1959         if (!skip_dct[3])
1960             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1961         if (!skip_dct[4])
1962             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1963         if (!skip_dct[5])
1964             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1965         if (!s->chroma_y_shift) { /* 422 */
1966             if (!skip_dct[6])
1967                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1968                                   wrap_c);
1969             if (!skip_dct[7])
1970                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1971                                   wrap_c);
1972         }
1973         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
1974     }
1975
1976     /* DCT & quantize */
1977     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1978     {
1979         for (i = 0; i < mb_block_count; i++) {
1980             if (!skip_dct[i]) {
1981                 int overflow;
1982                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1983                 // FIXME we could decide to change to quantizer instead of
1984                 // clipping
1985                 // JS: I don't think that would be a good idea it could lower
1986                 //     quality instead of improve it. Just INTRADC clipping
1987                 //     deserves changes in quantizer
1988                 if (overflow)
1989                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
1990             } else
1991                 s->block_last_index[i] = -1;
1992         }
1993         if (s->quantizer_noise_shaping) {
1994             for (i = 0; i < mb_block_count; i++) {
1995                 if (!skip_dct[i]) {
1996                     s->block_last_index[i] =
1997                         dct_quantize_refine(s, s->block[i], weight[i],
1998                                             orig[i], i, s->qscale);
1999                 }
2000             }
2001         }
2002
2003         if (s->luma_elim_threshold && !s->mb_intra)
2004             for (i = 0; i < 4; i++)
2005                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2006         if (s->chroma_elim_threshold && !s->mb_intra)
2007             for (i = 4; i < mb_block_count; i++)
2008                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2009
2010         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2011             for (i = 0; i < mb_block_count; i++) {
2012                 if (s->block_last_index[i] == -1)
2013                     s->coded_score[i] = INT_MAX / 256;
2014             }
2015         }
2016     }
2017
2018     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2019         s->block_last_index[4] =
2020         s->block_last_index[5] = 0;
2021         s->block[4][0] =
2022         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2023     }
2024
2025     // non c quantize code returns incorrect block_last_index FIXME
2026     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2027         for (i = 0; i < mb_block_count; i++) {
2028             int j;
2029             if (s->block_last_index[i] > 0) {
2030                 for (j = 63; j > 0; j--) {
2031                     if (s->block[i][s->intra_scantable.permutated[j]])
2032                         break;
2033                 }
2034                 s->block_last_index[i] = j;
2035             }
2036         }
2037     }
2038
2039     /* huffman encode */
2040     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2041     case CODEC_ID_MPEG1VIDEO:
2042     case CODEC_ID_MPEG2VIDEO:
2043         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2044             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2045         break;
2046     case CODEC_ID_MPEG4:
2047         if (CONFIG_MPEG4_ENCODER)
2048             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2049         break;
2050     case CODEC_ID_MSMPEG4V2:
2051     case CODEC_ID_MSMPEG4V3:
2052     case CODEC_ID_WMV1:
2053         if (CONFIG_MSMPEG4_ENCODER)
2054             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2055         break;
2056     case CODEC_ID_WMV2:
2057         if (CONFIG_WMV2_ENCODER)
2058             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2059         break;
2060     case CODEC_ID_H261:
2061         if (CONFIG_H261_ENCODER)
2062             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2063         break;
2064     case CODEC_ID_H263:
2065     case CODEC_ID_H263P:
2066     case CODEC_ID_FLV1:
2067     case CODEC_ID_RV10:
2068     case CODEC_ID_RV20:
2069         if (CONFIG_H263_ENCODER)
2070             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2071         break;
2072     case CODEC_ID_MJPEG:
2073         if (CONFIG_MJPEG_ENCODER)
2074             ff_mjpeg_encode_mb(s, s->block);
2075         break;
2076     default:
2077         assert(0);
2078     }
2079 }
2080
2081 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2082 {
2083     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2084     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2085 }
2086
2087 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2088     int i;
2089
2090     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2091
2092     /* mpeg1 */
2093     d->mb_skip_run= s->mb_skip_run;
2094     for(i=0; i<3; i++)
2095         d->last_dc[i] = s->last_dc[i];
2096
2097     /* statistics */
2098     d->mv_bits= s->mv_bits;
2099     d->i_tex_bits= s->i_tex_bits;
2100     d->p_tex_bits= s->p_tex_bits;
2101     d->i_count= s->i_count;
2102     d->f_count= s->f_count;
2103     d->b_count= s->b_count;
2104     d->skip_count= s->skip_count;
2105     d->misc_bits= s->misc_bits;
2106     d->last_bits= 0;
2107
2108     d->mb_skipped= 0;
2109     d->qscale= s->qscale;
2110     d->dquant= s->dquant;
2111
2112     d->esc3_level_length= s->esc3_level_length;
2113 }
2114
2115 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2116     int i;
2117
2118     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2119     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2120
2121     /* mpeg1 */
2122     d->mb_skip_run= s->mb_skip_run;
2123     for(i=0; i<3; i++)
2124         d->last_dc[i] = s->last_dc[i];
2125
2126     /* statistics */
2127     d->mv_bits= s->mv_bits;
2128     d->i_tex_bits= s->i_tex_bits;
2129     d->p_tex_bits= s->p_tex_bits;
2130     d->i_count= s->i_count;
2131     d->f_count= s->f_count;
2132     d->b_count= s->b_count;
2133     d->skip_count= s->skip_count;
2134     d->misc_bits= s->misc_bits;
2135
2136     d->mb_intra= s->mb_intra;
2137     d->mb_skipped= s->mb_skipped;
2138     d->mv_type= s->mv_type;
2139     d->mv_dir= s->mv_dir;
2140     d->pb= s->pb;
2141     if(s->data_partitioning){
2142         d->pb2= s->pb2;
2143         d->tex_pb= s->tex_pb;
2144     }
2145     d->block= s->block;
2146     for(i=0; i<8; i++)
2147         d->block_last_index[i]= s->block_last_index[i];
2148     d->interlaced_dct= s->interlaced_dct;
2149     d->qscale= s->qscale;
2150
2151     d->esc3_level_length= s->esc3_level_length;
2152 }
2153
2154 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2155                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2156                            int *dmin, int *next_block, int motion_x, int motion_y)
2157 {
2158     int score;
2159     uint8_t *dest_backup[3];
2160
2161     copy_context_before_encode(s, backup, type);
2162
2163     s->block= s->blocks[*next_block];
2164     s->pb= pb[*next_block];
2165     if(s->data_partitioning){
2166         s->pb2   = pb2   [*next_block];
2167         s->tex_pb= tex_pb[*next_block];
2168     }
2169
2170     if(*next_block){
2171         memcpy(dest_backup, s->dest, sizeof(s->dest));
2172         s->dest[0] = s->rd_scratchpad;
2173         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2174         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2175         assert(s->linesize >= 32); //FIXME
2176     }
2177
2178     encode_mb(s, motion_x, motion_y);
2179
2180     score= put_bits_count(&s->pb);
2181     if(s->data_partitioning){
2182         score+= put_bits_count(&s->pb2);
2183         score+= put_bits_count(&s->tex_pb);
2184     }
2185
2186     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2187         ff_MPV_decode_mb(s, s->block);
2188
2189         score *= s->lambda2;
2190         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2191     }
2192
2193     if(*next_block){
2194         memcpy(s->dest, dest_backup, sizeof(s->dest));
2195     }
2196
2197     if(score<*dmin){
2198         *dmin= score;
2199         *next_block^=1;
2200
2201         copy_context_after_encode(best, s, type);
2202     }
2203 }
2204
2205 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2206     uint32_t *sq = ff_squareTbl + 256;
2207     int acc=0;
2208     int x,y;
2209
2210     if(w==16 && h==16)
2211         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2212     else if(w==8 && h==8)
2213         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2214
2215     for(y=0; y<h; y++){
2216         for(x=0; x<w; x++){
2217             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2218         }
2219     }
2220
2221     assert(acc>=0);
2222
2223     return acc;
2224 }
2225
2226 static int sse_mb(MpegEncContext *s){
2227     int w= 16;
2228     int h= 16;
2229
2230     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2231     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2232
2233     if(w==16 && h==16)
2234       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2235         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2236                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2237                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2238       }else{
2239         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2240                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2241                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2242       }
2243     else
2244         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2245                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2246                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2247 }
2248
2249 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2250     MpegEncContext *s= *(void**)arg;
2251
2252
2253     s->me.pre_pass=1;
2254     s->me.dia_size= s->avctx->pre_dia_size;
2255     s->first_slice_line=1;
2256     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2257         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2258             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2259         }
2260         s->first_slice_line=0;
2261     }
2262
2263     s->me.pre_pass=0;
2264
2265     return 0;
2266 }
2267
2268 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2269     MpegEncContext *s= *(void**)arg;
2270
2271     ff_check_alignment();
2272
2273     s->me.dia_size= s->avctx->dia_size;
2274     s->first_slice_line=1;
2275     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2276         s->mb_x=0; //for block init below
2277         ff_init_block_index(s);
2278         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2279             s->block_index[0]+=2;
2280             s->block_index[1]+=2;
2281             s->block_index[2]+=2;
2282             s->block_index[3]+=2;
2283
2284             /* compute motion vector & mb_type and store in context */
2285             if(s->pict_type==AV_PICTURE_TYPE_B)
2286                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2287             else
2288                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2289         }
2290         s->first_slice_line=0;
2291     }
2292     return 0;
2293 }
2294
2295 static int mb_var_thread(AVCodecContext *c, void *arg){
2296     MpegEncContext *s= *(void**)arg;
2297     int mb_x, mb_y;
2298
2299     ff_check_alignment();
2300
2301     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2302         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2303             int xx = mb_x * 16;
2304             int yy = mb_y * 16;
2305             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2306             int varc;
2307             int sum = s->dsp.pix_sum(pix, s->linesize);
2308
2309             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2310
2311             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2312             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2313             s->me.mb_var_sum_temp    += varc;
2314         }
2315     }
2316     return 0;
2317 }
2318
2319 static void write_slice_end(MpegEncContext *s){
2320     if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4){
2321         if(s->partitioned_frame){
2322             ff_mpeg4_merge_partitions(s);
2323         }
2324
2325         ff_mpeg4_stuffing(&s->pb);
2326     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2327         ff_mjpeg_encode_stuffing(&s->pb);
2328     }
2329
2330     avpriv_align_put_bits(&s->pb);
2331     flush_put_bits(&s->pb);
2332
2333     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2334         s->misc_bits+= get_bits_diff(s);
2335 }
2336
2337 static void write_mb_info(MpegEncContext *s)
2338 {
2339     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2340     int offset = put_bits_count(&s->pb);
2341     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2342     int gobn = s->mb_y / s->gob_index;
2343     int pred_x, pred_y;
2344     if (CONFIG_H263_ENCODER)
2345         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2346     bytestream_put_le32(&ptr, offset);
2347     bytestream_put_byte(&ptr, s->qscale);
2348     bytestream_put_byte(&ptr, gobn);
2349     bytestream_put_le16(&ptr, mba);
2350     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2351     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2352     /* 4MV not implemented */
2353     bytestream_put_byte(&ptr, 0); /* hmv2 */
2354     bytestream_put_byte(&ptr, 0); /* vmv2 */
2355 }
2356
2357 static void update_mb_info(MpegEncContext *s, int startcode)
2358 {
2359     if (!s->mb_info)
2360         return;
2361     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2362         s->mb_info_size += 12;
2363         s->prev_mb_info = s->last_mb_info;
2364     }
2365     if (startcode) {
2366         s->prev_mb_info = put_bits_count(&s->pb)/8;
2367         /* This might have incremented mb_info_size above, and we return without
2368          * actually writing any info into that slot yet. But in that case,
2369          * this will be called again at the start of the after writing the
2370          * start code, actually writing the mb info. */
2371         return;
2372     }
2373
2374     s->last_mb_info = put_bits_count(&s->pb)/8;
2375     if (!s->mb_info_size)
2376         s->mb_info_size += 12;
2377     write_mb_info(s);
2378 }
2379
2380 static int encode_thread(AVCodecContext *c, void *arg){
2381     MpegEncContext *s= *(void**)arg;
2382     int mb_x, mb_y, pdif = 0;
2383     int chr_h= 16>>s->chroma_y_shift;
2384     int i, j;
2385     MpegEncContext best_s, backup_s;
2386     uint8_t bit_buf[2][MAX_MB_BYTES];
2387     uint8_t bit_buf2[2][MAX_MB_BYTES];
2388     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2389     PutBitContext pb[2], pb2[2], tex_pb[2];
2390 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2391
2392     ff_check_alignment();
2393
2394     for(i=0; i<2; i++){
2395         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2396         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2397         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2398     }
2399
2400     s->last_bits= put_bits_count(&s->pb);
2401     s->mv_bits=0;
2402     s->misc_bits=0;
2403     s->i_tex_bits=0;
2404     s->p_tex_bits=0;
2405     s->i_count=0;
2406     s->f_count=0;
2407     s->b_count=0;
2408     s->skip_count=0;
2409
2410     for(i=0; i<3; i++){
2411         /* init last dc values */
2412         /* note: quant matrix value (8) is implied here */
2413         s->last_dc[i] = 128 << s->intra_dc_precision;
2414
2415         s->current_picture.f.error[i] = 0;
2416     }
2417     s->mb_skip_run = 0;
2418     memset(s->last_mv, 0, sizeof(s->last_mv));
2419
2420     s->last_mv_dir = 0;
2421
2422     switch(s->codec_id){
2423     case CODEC_ID_H263:
2424     case CODEC_ID_H263P:
2425     case CODEC_ID_FLV1:
2426         if (CONFIG_H263_ENCODER)
2427             s->gob_index = ff_h263_get_gob_height(s);
2428         break;
2429     case CODEC_ID_MPEG4:
2430         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2431             ff_mpeg4_init_partitions(s);
2432         break;
2433     }
2434
2435     s->resync_mb_x=0;
2436     s->resync_mb_y=0;
2437     s->first_slice_line = 1;
2438     s->ptr_lastgob = s->pb.buf;
2439     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2440 //    printf("row %d at %X\n", s->mb_y, (int)s);
2441         s->mb_x=0;
2442         s->mb_y= mb_y;
2443
2444         ff_set_qscale(s, s->qscale);
2445         ff_init_block_index(s);
2446
2447         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2448             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2449             int mb_type= s->mb_type[xy];
2450 //            int d;
2451             int dmin= INT_MAX;
2452             int dir;
2453
2454             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2455                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2456                 return -1;
2457             }
2458             if(s->data_partitioning){
2459                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2460                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2461                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2462                     return -1;
2463                 }
2464             }
2465
2466             s->mb_x = mb_x;
2467             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2468             ff_update_block_index(s);
2469
2470             if(CONFIG_H261_ENCODER && s->codec_id == CODEC_ID_H261){
2471                 ff_h261_reorder_mb_index(s);
2472                 xy= s->mb_y*s->mb_stride + s->mb_x;
2473                 mb_type= s->mb_type[xy];
2474             }
2475
2476             /* write gob / video packet header  */
2477             if(s->rtp_mode){
2478                 int current_packet_size, is_gob_start;
2479
2480                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2481
2482                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2483
2484                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2485
2486                 switch(s->codec_id){
2487                 case CODEC_ID_H263:
2488                 case CODEC_ID_H263P:
2489                     if(!s->h263_slice_structured)
2490                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2491                     break;
2492                 case CODEC_ID_MPEG2VIDEO:
2493                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2494                 case CODEC_ID_MPEG1VIDEO:
2495                     if(s->mb_skip_run) is_gob_start=0;
2496                     break;
2497                 }
2498
2499                 if(is_gob_start){
2500                     if(s->start_mb_y != mb_y || mb_x!=0){
2501                         write_slice_end(s);
2502
2503                         if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
2504                             ff_mpeg4_init_partitions(s);
2505                         }
2506                     }
2507
2508                     assert((put_bits_count(&s->pb)&7) == 0);
2509                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2510
2511                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2512                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2513                         int d= 100 / s->avctx->error_rate;
2514                         if(r % d == 0){
2515                             current_packet_size=0;
2516                             s->pb.buf_ptr= s->ptr_lastgob;
2517                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2518                         }
2519                     }
2520
2521                     if (s->avctx->rtp_callback){
2522                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2523                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2524                     }
2525                     update_mb_info(s, 1);
2526
2527                     switch(s->codec_id){
2528                     case CODEC_ID_MPEG4:
2529                         if (CONFIG_MPEG4_ENCODER) {
2530                             ff_mpeg4_encode_video_packet_header(s);
2531                             ff_mpeg4_clean_buffers(s);
2532                         }
2533                     break;
2534                     case CODEC_ID_MPEG1VIDEO:
2535                     case CODEC_ID_MPEG2VIDEO:
2536                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2537                             ff_mpeg1_encode_slice_header(s);
2538                             ff_mpeg1_clean_buffers(s);
2539                         }
2540                     break;
2541                     case CODEC_ID_H263:
2542                     case CODEC_ID_H263P:
2543                         if (CONFIG_H263_ENCODER)
2544                             ff_h263_encode_gob_header(s, mb_y);
2545                     break;
2546                     }
2547
2548                     if(s->flags&CODEC_FLAG_PASS1){
2549                         int bits= put_bits_count(&s->pb);
2550                         s->misc_bits+= bits - s->last_bits;
2551                         s->last_bits= bits;
2552                     }
2553
2554                     s->ptr_lastgob += current_packet_size;
2555                     s->first_slice_line=1;
2556                     s->resync_mb_x=mb_x;
2557                     s->resync_mb_y=mb_y;
2558                 }
2559             }
2560
2561             if(  (s->resync_mb_x   == s->mb_x)
2562                && s->resync_mb_y+1 == s->mb_y){
2563                 s->first_slice_line=0;
2564             }
2565
2566             s->mb_skipped=0;
2567             s->dquant=0; //only for QP_RD
2568
2569             update_mb_info(s, 0);
2570
2571             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2572                 int next_block=0;
2573                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2574
2575                 copy_context_before_encode(&backup_s, s, -1);
2576                 backup_s.pb= s->pb;
2577                 best_s.data_partitioning= s->data_partitioning;
2578                 best_s.partitioned_frame= s->partitioned_frame;
2579                 if(s->data_partitioning){
2580                     backup_s.pb2= s->pb2;
2581                     backup_s.tex_pb= s->tex_pb;
2582                 }
2583
2584                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2585                     s->mv_dir = MV_DIR_FORWARD;
2586                     s->mv_type = MV_TYPE_16X16;
2587                     s->mb_intra= 0;
2588                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2589                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2590                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2591                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2592                 }
2593                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2594                     s->mv_dir = MV_DIR_FORWARD;
2595                     s->mv_type = MV_TYPE_FIELD;
2596                     s->mb_intra= 0;
2597                     for(i=0; i<2; i++){
2598                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2599                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2600                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2601                     }
2602                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2603                                  &dmin, &next_block, 0, 0);
2604                 }
2605                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2606                     s->mv_dir = MV_DIR_FORWARD;
2607                     s->mv_type = MV_TYPE_16X16;
2608                     s->mb_intra= 0;
2609                     s->mv[0][0][0] = 0;
2610                     s->mv[0][0][1] = 0;
2611                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2612                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2613                 }
2614                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2615                     s->mv_dir = MV_DIR_FORWARD;
2616                     s->mv_type = MV_TYPE_8X8;
2617                     s->mb_intra= 0;
2618                     for(i=0; i<4; i++){
2619                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2620                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2621                     }
2622                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2623                                  &dmin, &next_block, 0, 0);
2624                 }
2625                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2626                     s->mv_dir = MV_DIR_FORWARD;
2627                     s->mv_type = MV_TYPE_16X16;
2628                     s->mb_intra= 0;
2629                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2630                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2631                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2632                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2633                 }
2634                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2635                     s->mv_dir = MV_DIR_BACKWARD;
2636                     s->mv_type = MV_TYPE_16X16;
2637                     s->mb_intra= 0;
2638                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2639                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2640                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2641                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2642                 }
2643                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2644                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2645                     s->mv_type = MV_TYPE_16X16;
2646                     s->mb_intra= 0;
2647                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2648                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2649                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2650                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2651                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2652                                  &dmin, &next_block, 0, 0);
2653                 }
2654                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2655                     s->mv_dir = MV_DIR_FORWARD;
2656                     s->mv_type = MV_TYPE_FIELD;
2657                     s->mb_intra= 0;
2658                     for(i=0; i<2; i++){
2659                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2660                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2661                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2662                     }
2663                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2664                                  &dmin, &next_block, 0, 0);
2665                 }
2666                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2667                     s->mv_dir = MV_DIR_BACKWARD;
2668                     s->mv_type = MV_TYPE_FIELD;
2669                     s->mb_intra= 0;
2670                     for(i=0; i<2; i++){
2671                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2672                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2673                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2674                     }
2675                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2676                                  &dmin, &next_block, 0, 0);
2677                 }
2678                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2679                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2680                     s->mv_type = MV_TYPE_FIELD;
2681                     s->mb_intra= 0;
2682                     for(dir=0; dir<2; dir++){
2683                         for(i=0; i<2; i++){
2684                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2685                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2686                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2687                         }
2688                     }
2689                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2690                                  &dmin, &next_block, 0, 0);
2691                 }
2692                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2693                     s->mv_dir = 0;
2694                     s->mv_type = MV_TYPE_16X16;
2695                     s->mb_intra= 1;
2696                     s->mv[0][0][0] = 0;
2697                     s->mv[0][0][1] = 0;
2698                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2699                                  &dmin, &next_block, 0, 0);
2700                     if(s->h263_pred || s->h263_aic){
2701                         if(best_s.mb_intra)
2702                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2703                         else
2704                             ff_clean_intra_table_entries(s); //old mode?
2705                     }
2706                 }
2707
2708                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2709                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2710                         const int last_qp= backup_s.qscale;
2711                         int qpi, qp, dc[6];
2712                         DCTELEM ac[6][16];
2713                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2714                         static const int dquant_tab[4]={-1,1,-2,2};
2715
2716                         assert(backup_s.dquant == 0);
2717
2718                         //FIXME intra
2719                         s->mv_dir= best_s.mv_dir;
2720                         s->mv_type = MV_TYPE_16X16;
2721                         s->mb_intra= best_s.mb_intra;
2722                         s->mv[0][0][0] = best_s.mv[0][0][0];
2723                         s->mv[0][0][1] = best_s.mv[0][0][1];
2724                         s->mv[1][0][0] = best_s.mv[1][0][0];
2725                         s->mv[1][0][1] = best_s.mv[1][0][1];
2726
2727                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2728                         for(; qpi<4; qpi++){
2729                             int dquant= dquant_tab[qpi];
2730                             qp= last_qp + dquant;
2731                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2732                                 continue;
2733                             backup_s.dquant= dquant;
2734                             if(s->mb_intra && s->dc_val[0]){
2735                                 for(i=0; i<6; i++){
2736                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2737                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2738                                 }
2739                             }
2740
2741                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2742                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2743                             if(best_s.qscale != qp){
2744                                 if(s->mb_intra && s->dc_val[0]){
2745                                     for(i=0; i<6; i++){
2746                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2747                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2748                                     }
2749                                 }
2750                             }
2751                         }
2752                     }
2753                 }
2754                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2755                     int mx= s->b_direct_mv_table[xy][0];
2756                     int my= s->b_direct_mv_table[xy][1];
2757
2758                     backup_s.dquant = 0;
2759                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2760                     s->mb_intra= 0;
2761                     ff_mpeg4_set_direct_mv(s, mx, my);
2762                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2763                                  &dmin, &next_block, mx, my);
2764                 }
2765                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2766                     backup_s.dquant = 0;
2767                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2768                     s->mb_intra= 0;
2769                     ff_mpeg4_set_direct_mv(s, 0, 0);
2770                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2771                                  &dmin, &next_block, 0, 0);
2772                 }
2773                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2774                     int coded=0;
2775                     for(i=0; i<6; i++)
2776                         coded |= s->block_last_index[i];
2777                     if(coded){
2778                         int mx,my;
2779                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2780                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2781                             mx=my=0; //FIXME find the one we actually used
2782                             ff_mpeg4_set_direct_mv(s, mx, my);
2783                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2784                             mx= s->mv[1][0][0];
2785                             my= s->mv[1][0][1];
2786                         }else{
2787                             mx= s->mv[0][0][0];
2788                             my= s->mv[0][0][1];
2789                         }
2790
2791                         s->mv_dir= best_s.mv_dir;
2792                         s->mv_type = best_s.mv_type;
2793                         s->mb_intra= 0;
2794 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2795                         s->mv[0][0][1] = best_s.mv[0][0][1];
2796                         s->mv[1][0][0] = best_s.mv[1][0][0];
2797                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2798                         backup_s.dquant= 0;
2799                         s->skipdct=1;
2800                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2801                                         &dmin, &next_block, mx, my);
2802                         s->skipdct=0;
2803                     }
2804                 }
2805
2806                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2807
2808                 copy_context_after_encode(s, &best_s, -1);
2809
2810                 pb_bits_count= put_bits_count(&s->pb);
2811                 flush_put_bits(&s->pb);
2812                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2813                 s->pb= backup_s.pb;
2814
2815                 if(s->data_partitioning){
2816                     pb2_bits_count= put_bits_count(&s->pb2);
2817                     flush_put_bits(&s->pb2);
2818                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2819                     s->pb2= backup_s.pb2;
2820
2821                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2822                     flush_put_bits(&s->tex_pb);
2823                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2824                     s->tex_pb= backup_s.tex_pb;
2825                 }
2826                 s->last_bits= put_bits_count(&s->pb);
2827
2828                 if (CONFIG_H263_ENCODER &&
2829                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2830                     ff_h263_update_motion_val(s);
2831
2832                 if(next_block==0){ //FIXME 16 vs linesize16
2833                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2834                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2835                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2836                 }
2837
2838                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2839                     ff_MPV_decode_mb(s, s->block);
2840             } else {
2841                 int motion_x = 0, motion_y = 0;
2842                 s->mv_type=MV_TYPE_16X16;
2843                 // only one MB-Type possible
2844
2845                 switch(mb_type){
2846                 case CANDIDATE_MB_TYPE_INTRA:
2847                     s->mv_dir = 0;
2848                     s->mb_intra= 1;
2849                     motion_x= s->mv[0][0][0] = 0;
2850                     motion_y= s->mv[0][0][1] = 0;
2851                     break;
2852                 case CANDIDATE_MB_TYPE_INTER:
2853                     s->mv_dir = MV_DIR_FORWARD;
2854                     s->mb_intra= 0;
2855                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2856                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2857                     break;
2858                 case CANDIDATE_MB_TYPE_INTER_I:
2859                     s->mv_dir = MV_DIR_FORWARD;
2860                     s->mv_type = MV_TYPE_FIELD;
2861                     s->mb_intra= 0;
2862                     for(i=0; i<2; i++){
2863                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2864                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2865                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2866                     }
2867                     break;
2868                 case CANDIDATE_MB_TYPE_INTER4V:
2869                     s->mv_dir = MV_DIR_FORWARD;
2870                     s->mv_type = MV_TYPE_8X8;
2871                     s->mb_intra= 0;
2872                     for(i=0; i<4; i++){
2873                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2874                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2875                     }
2876                     break;
2877                 case CANDIDATE_MB_TYPE_DIRECT:
2878                     if (CONFIG_MPEG4_ENCODER) {
2879                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2880                         s->mb_intra= 0;
2881                         motion_x=s->b_direct_mv_table[xy][0];
2882                         motion_y=s->b_direct_mv_table[xy][1];
2883                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2884                     }
2885                     break;
2886                 case CANDIDATE_MB_TYPE_DIRECT0:
2887                     if (CONFIG_MPEG4_ENCODER) {
2888                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2889                         s->mb_intra= 0;
2890                         ff_mpeg4_set_direct_mv(s, 0, 0);
2891                     }
2892                     break;
2893                 case CANDIDATE_MB_TYPE_BIDIR:
2894                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2895                     s->mb_intra= 0;
2896                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2897                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2898                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2899                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2900                     break;
2901                 case CANDIDATE_MB_TYPE_BACKWARD:
2902                     s->mv_dir = MV_DIR_BACKWARD;
2903                     s->mb_intra= 0;
2904                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2905                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2906                     break;
2907                 case CANDIDATE_MB_TYPE_FORWARD:
2908                     s->mv_dir = MV_DIR_FORWARD;
2909                     s->mb_intra= 0;
2910                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2911                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2912 //                    printf(" %d %d ", motion_x, motion_y);
2913                     break;
2914                 case CANDIDATE_MB_TYPE_FORWARD_I:
2915                     s->mv_dir = MV_DIR_FORWARD;
2916                     s->mv_type = MV_TYPE_FIELD;
2917                     s->mb_intra= 0;
2918                     for(i=0; i<2; i++){
2919                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2920                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2921                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2922                     }
2923                     break;
2924                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2925                     s->mv_dir = MV_DIR_BACKWARD;
2926                     s->mv_type = MV_TYPE_FIELD;
2927                     s->mb_intra= 0;
2928                     for(i=0; i<2; i++){
2929                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2930                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2931                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2932                     }
2933                     break;
2934                 case CANDIDATE_MB_TYPE_BIDIR_I:
2935                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2936                     s->mv_type = MV_TYPE_FIELD;
2937                     s->mb_intra= 0;
2938                     for(dir=0; dir<2; dir++){
2939                         for(i=0; i<2; i++){
2940                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2941                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2942                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2943                         }
2944                     }
2945                     break;
2946                 default:
2947                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2948                 }
2949
2950                 encode_mb(s, motion_x, motion_y);
2951
2952                 // RAL: Update last macroblock type
2953                 s->last_mv_dir = s->mv_dir;
2954
2955                 if (CONFIG_H263_ENCODER &&
2956                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2957                     ff_h263_update_motion_val(s);
2958
2959                 ff_MPV_decode_mb(s, s->block);
2960             }
2961
2962             /* clean the MV table in IPS frames for direct mode in B frames */
2963             if(s->mb_intra /* && I,P,S_TYPE */){
2964                 s->p_mv_table[xy][0]=0;
2965                 s->p_mv_table[xy][1]=0;
2966             }
2967
2968             if(s->flags&CODEC_FLAG_PSNR){
2969                 int w= 16;
2970                 int h= 16;
2971
2972                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2973                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2974
2975                 s->current_picture.f.error[0] += sse(
2976                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2977                     s->dest[0], w, h, s->linesize);
2978                 s->current_picture.f.error[1] += sse(
2979                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2980                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2981                 s->current_picture.f.error[2] += sse(
2982                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2983                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2984             }
2985             if(s->loop_filter){
2986                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2987                     ff_h263_loop_filter(s);
2988             }
2989 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
2990         }
2991     }
2992
2993     //not beautiful here but we must write it before flushing so it has to be here
2994     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
2995         ff_msmpeg4_encode_ext_header(s);
2996
2997     write_slice_end(s);
2998
2999     /* Send the last GOB if RTP */
3000     if (s->avctx->rtp_callback) {
3001         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3002         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3003         /* Call the RTP callback to send the last GOB */
3004         emms_c();
3005         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3006     }
3007
3008     return 0;
3009 }
3010
3011 #define MERGE(field) dst->field += src->field; src->field=0
3012 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3013     MERGE(me.scene_change_score);
3014     MERGE(me.mc_mb_var_sum_temp);
3015     MERGE(me.mb_var_sum_temp);
3016 }
3017
3018 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3019     int i;
3020
3021     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3022     MERGE(dct_count[1]);
3023     MERGE(mv_bits);
3024     MERGE(i_tex_bits);
3025     MERGE(p_tex_bits);
3026     MERGE(i_count);
3027     MERGE(f_count);
3028     MERGE(b_count);
3029     MERGE(skip_count);
3030     MERGE(misc_bits);
3031     MERGE(error_count);
3032     MERGE(padding_bug_score);
3033     MERGE(current_picture.f.error[0]);
3034     MERGE(current_picture.f.error[1]);
3035     MERGE(current_picture.f.error[2]);
3036
3037     if(dst->avctx->noise_reduction){
3038         for(i=0; i<64; i++){
3039             MERGE(dct_error_sum[0][i]);
3040             MERGE(dct_error_sum[1][i]);
3041         }
3042     }
3043
3044     assert(put_bits_count(&src->pb) % 8 ==0);
3045     assert(put_bits_count(&dst->pb) % 8 ==0);
3046     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3047     flush_put_bits(&dst->pb);
3048 }
3049
3050 static int estimate_qp(MpegEncContext *s, int dry_run){
3051     if (s->next_lambda){
3052         s->current_picture_ptr->f.quality =
3053         s->current_picture.f.quality = s->next_lambda;
3054         if(!dry_run) s->next_lambda= 0;
3055     } else if (!s->fixed_qscale) {
3056         s->current_picture_ptr->f.quality =
3057         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3058         if (s->current_picture.f.quality < 0)
3059             return -1;
3060     }
3061
3062     if(s->adaptive_quant){
3063         switch(s->codec_id){
3064         case CODEC_ID_MPEG4:
3065             if (CONFIG_MPEG4_ENCODER)
3066                 ff_clean_mpeg4_qscales(s);
3067             break;
3068         case CODEC_ID_H263:
3069         case CODEC_ID_H263P:
3070         case CODEC_ID_FLV1:
3071             if (CONFIG_H263_ENCODER)
3072                 ff_clean_h263_qscales(s);
3073             break;
3074         default:
3075             ff_init_qscale_tab(s);
3076         }
3077
3078         s->lambda= s->lambda_table[0];
3079         //FIXME broken
3080     }else
3081         s->lambda = s->current_picture.f.quality;
3082 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3083     update_qscale(s);
3084     return 0;
3085 }
3086
3087 /* must be called before writing the header */
3088 static void set_frame_distances(MpegEncContext * s){
3089     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3090     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3091
3092     if(s->pict_type==AV_PICTURE_TYPE_B){
3093         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3094         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3095     }else{
3096         s->pp_time= s->time - s->last_non_b_time;
3097         s->last_non_b_time= s->time;
3098         assert(s->picture_number==0 || s->pp_time > 0);
3099     }
3100 }
3101
3102 static int encode_picture(MpegEncContext *s, int picture_number)
3103 {
3104     int i;
3105     int bits;
3106     int context_count = s->slice_context_count;
3107
3108     s->picture_number = picture_number;
3109
3110     /* Reset the average MB variance */
3111     s->me.mb_var_sum_temp    =
3112     s->me.mc_mb_var_sum_temp = 0;
3113
3114     /* we need to initialize some time vars before we can encode b-frames */
3115     // RAL: Condition added for MPEG1VIDEO
3116     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3117         set_frame_distances(s);
3118     if(CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4)
3119         ff_set_mpeg4_time(s);
3120
3121     s->me.scene_change_score=0;
3122
3123 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3124
3125     if(s->pict_type==AV_PICTURE_TYPE_I){
3126         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3127         else                        s->no_rounding=0;
3128     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3129         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3130             s->no_rounding ^= 1;
3131     }
3132
3133     if(s->flags & CODEC_FLAG_PASS2){
3134         if (estimate_qp(s,1) < 0)
3135             return -1;
3136         ff_get_2pass_fcode(s);
3137     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3138         if(s->pict_type==AV_PICTURE_TYPE_B)
3139             s->lambda= s->last_lambda_for[s->pict_type];
3140         else
3141             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3142         update_qscale(s);
3143     }
3144
3145     s->mb_intra=0; //for the rate distortion & bit compare functions
3146     for(i=1; i<context_count; i++){
3147         ff_update_duplicate_context(s->thread_context[i], s);
3148     }
3149
3150     if(ff_init_me(s)<0)
3151         return -1;
3152
3153     /* Estimate motion for every MB */
3154     if(s->pict_type != AV_PICTURE_TYPE_I){
3155         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3156         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3157         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3158             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3159                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3160             }
3161         }
3162
3163         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3164     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3165         /* I-Frame */
3166         for(i=0; i<s->mb_stride*s->mb_height; i++)
3167             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3168
3169         if(!s->fixed_qscale){
3170             /* finding spatial complexity for I-frame rate control */
3171             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3172         }
3173     }
3174     for(i=1; i<context_count; i++){
3175         merge_context_after_me(s, s->thread_context[i]);
3176     }
3177     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3178     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3179     emms_c();
3180
3181     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3182         s->pict_type= AV_PICTURE_TYPE_I;
3183         for(i=0; i<s->mb_stride*s->mb_height; i++)
3184             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3185 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3186     }
3187
3188     if(!s->umvplus){
3189         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3190             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3191
3192             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3193                 int a,b;
3194                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3195                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3196                 s->f_code= FFMAX3(s->f_code, a, b);
3197             }
3198
3199             ff_fix_long_p_mvs(s);
3200             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3201             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3202                 int j;
3203                 for(i=0; i<2; i++){
3204                     for(j=0; j<2; j++)
3205                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3206                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3207                 }
3208             }
3209         }
3210
3211         if(s->pict_type==AV_PICTURE_TYPE_B){
3212             int a, b;
3213
3214             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3215             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3216             s->f_code = FFMAX(a, b);
3217
3218             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3219             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3220             s->b_code = FFMAX(a, b);
3221
3222             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3223             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3224             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3225             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3226             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3227                 int dir, j;
3228                 for(dir=0; dir<2; dir++){
3229                     for(i=0; i<2; i++){
3230                         for(j=0; j<2; j++){
3231                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3232                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3233                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3234                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3235                         }
3236                     }
3237                 }
3238             }
3239         }
3240     }
3241
3242     if (estimate_qp(s, 0) < 0)
3243         return -1;
3244
3245     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3246         s->qscale= 3; //reduce clipping problems
3247
3248     if (s->out_format == FMT_MJPEG) {
3249         /* for mjpeg, we do include qscale in the matrix */
3250         for(i=1;i<64;i++){
3251             int j= s->dsp.idct_permutation[i];
3252
3253             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3254         }
3255         s->y_dc_scale_table=
3256         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3257         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3258         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3259                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3260         s->qscale= 8;
3261     }
3262
3263     //FIXME var duplication
3264     s->current_picture_ptr->f.key_frame =
3265     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3266     s->current_picture_ptr->f.pict_type =
3267     s->current_picture.f.pict_type = s->pict_type;
3268
3269     if (s->current_picture.f.key_frame)
3270         s->picture_in_gop_number=0;
3271
3272     s->last_bits= put_bits_count(&s->pb);
3273     switch(s->out_format) {
3274     case FMT_MJPEG:
3275         if (CONFIG_MJPEG_ENCODER)
3276             ff_mjpeg_encode_picture_header(s);
3277         break;
3278     case FMT_H261:
3279         if (CONFIG_H261_ENCODER)
3280             ff_h261_encode_picture_header(s, picture_number);
3281         break;
3282     case FMT_H263:
3283         if (CONFIG_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
3284             ff_wmv2_encode_picture_header(s, picture_number);
3285         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3286             ff_msmpeg4_encode_picture_header(s, picture_number);
3287         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3288             ff_mpeg4_encode_picture_header(s, picture_number);
3289         else if (CONFIG_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
3290             ff_rv10_encode_picture_header(s, picture_number);
3291         else if (CONFIG_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
3292             ff_rv20_encode_picture_header(s, picture_number);
3293         else if (CONFIG_FLV_ENCODER && s->codec_id == CODEC_ID_FLV1)
3294             ff_flv_encode_picture_header(s, picture_number);
3295         else if (CONFIG_H263_ENCODER)
3296             ff_h263_encode_picture_header(s, picture_number);
3297         break;
3298     case FMT_MPEG1:
3299         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3300             ff_mpeg1_encode_picture_header(s, picture_number);
3301         break;
3302     case FMT_H264:
3303         break;
3304     default:
3305         assert(0);
3306     }
3307     bits= put_bits_count(&s->pb);
3308     s->header_bits= bits - s->last_bits;
3309
3310     for(i=1; i<context_count; i++){
3311         update_duplicate_context_after_me(s->thread_context[i], s);
3312     }
3313     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3314     for(i=1; i<context_count; i++){
3315         merge_context_after_encode(s, s->thread_context[i]);
3316     }
3317     emms_c();
3318     return 0;
3319 }
3320
3321 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3322     const int intra= s->mb_intra;
3323     int i;
3324
3325     s->dct_count[intra]++;
3326
3327     for(i=0; i<64; i++){
3328         int level= block[i];
3329
3330         if(level){
3331             if(level>0){
3332                 s->dct_error_sum[intra][i] += level;
3333                 level -= s->dct_offset[intra][i];
3334                 if(level<0) level=0;
3335             }else{
3336                 s->dct_error_sum[intra][i] -= level;
3337                 level += s->dct_offset[intra][i];
3338                 if(level>0) level=0;
3339             }
3340             block[i]= level;
3341         }
3342     }
3343 }
3344
3345 static int dct_quantize_trellis_c(MpegEncContext *s,
3346                                   DCTELEM *block, int n,
3347                                   int qscale, int *overflow){
3348     const int *qmat;
3349     const uint8_t *scantable= s->intra_scantable.scantable;
3350     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3351     int max=0;
3352     unsigned int threshold1, threshold2;
3353     int bias=0;
3354     int run_tab[65];
3355     int level_tab[65];
3356     int score_tab[65];
3357     int survivor[65];
3358     int survivor_count;
3359     int last_run=0;
3360     int last_level=0;
3361     int last_score= 0;
3362     int last_i;
3363     int coeff[2][64];
3364     int coeff_count[64];
3365     int qmul, qadd, start_i, last_non_zero, i, dc;
3366     const int esc_length= s->ac_esc_length;
3367     uint8_t * length;
3368     uint8_t * last_length;
3369     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3370
3371     s->dsp.fdct (block);
3372
3373     if(s->dct_error_sum)
3374         s->denoise_dct(s, block);
3375     qmul= qscale*16;
3376     qadd= ((qscale-1)|1)*8;
3377
3378     if (s->mb_intra) {
3379         int q;
3380         if (!s->h263_aic) {
3381             if (n < 4)
3382                 q = s->y_dc_scale;
3383             else
3384                 q = s->c_dc_scale;
3385             q = q << 3;
3386         } else{
3387             /* For AIC we skip quant/dequant of INTRADC */
3388             q = 1 << 3;
3389             qadd=0;
3390         }
3391
3392         /* note: block[0] is assumed to be positive */
3393         block[0] = (block[0] + (q >> 1)) / q;
3394         start_i = 1;
3395         last_non_zero = 0;
3396         qmat = s->q_intra_matrix[qscale];
3397         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3398             bias= 1<<(QMAT_SHIFT-1);
3399         length     = s->intra_ac_vlc_length;
3400         last_length= s->intra_ac_vlc_last_length;
3401     } else {
3402         start_i = 0;
3403         last_non_zero = -1;
3404         qmat = s->q_inter_matrix[qscale];
3405         length     = s->inter_ac_vlc_length;
3406         last_length= s->inter_ac_vlc_last_length;
3407     }
3408     last_i= start_i;
3409
3410     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3411     threshold2= (threshold1<<1);
3412
3413     for(i=63; i>=start_i; i--) {
3414         const int j = scantable[i];
3415         int level = block[j] * qmat[j];
3416
3417         if(((unsigned)(level+threshold1))>threshold2){
3418             last_non_zero = i;
3419             break;
3420         }
3421     }
3422
3423     for(i=start_i; i<=last_non_zero; i++) {
3424         const int j = scantable[i];
3425         int level = block[j] * qmat[j];
3426
3427 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3428 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3429         if(((unsigned)(level+threshold1))>threshold2){
3430             if(level>0){
3431                 level= (bias + level)>>QMAT_SHIFT;
3432                 coeff[0][i]= level;
3433                 coeff[1][i]= level-1;
3434 //                coeff[2][k]= level-2;
3435             }else{
3436                 level= (bias - level)>>QMAT_SHIFT;
3437                 coeff[0][i]= -level;
3438                 coeff[1][i]= -level+1;
3439 //                coeff[2][k]= -level+2;
3440             }
3441             coeff_count[i]= FFMIN(level, 2);
3442             assert(coeff_count[i]);
3443             max |=level;
3444         }else{
3445             coeff[0][i]= (level>>31)|1;
3446             coeff_count[i]= 1;
3447         }
3448     }
3449
3450     *overflow= s->max_qcoeff < max; //overflow might have happened
3451
3452     if(last_non_zero < start_i){
3453         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3454         return last_non_zero;
3455     }
3456
3457     score_tab[start_i]= 0;
3458     survivor[0]= start_i;
3459     survivor_count= 1;
3460
3461     for(i=start_i; i<=last_non_zero; i++){
3462         int level_index, j, zero_distortion;
3463         int dct_coeff= FFABS(block[ scantable[i] ]);
3464         int best_score=256*256*256*120;
3465
3466         if (s->dsp.fdct == ff_fdct_ifast)
3467             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3468         zero_distortion= dct_coeff*dct_coeff;
3469
3470         for(level_index=0; level_index < coeff_count[i]; level_index++){
3471             int distortion;
3472             int level= coeff[level_index][i];
3473             const int alevel= FFABS(level);
3474             int unquant_coeff;
3475
3476             assert(level);
3477
3478             if(s->out_format == FMT_H263){
3479                 unquant_coeff= alevel*qmul + qadd;
3480             }else{ //MPEG1
3481                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3482                 if(s->mb_intra){
3483                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3484                         unquant_coeff =   (unquant_coeff - 1) | 1;
3485                 }else{
3486                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3487                         unquant_coeff =   (unquant_coeff - 1) | 1;
3488                 }
3489                 unquant_coeff<<= 3;
3490             }
3491
3492             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3493             level+=64;
3494             if((level&(~127)) == 0){
3495                 for(j=survivor_count-1; j>=0; j--){
3496                     int run= i - survivor[j];
3497                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3498                     score += score_tab[i-run];
3499
3500                     if(score < best_score){
3501                         best_score= score;
3502                         run_tab[i+1]= run;
3503                         level_tab[i+1]= level-64;
3504                     }
3505                 }
3506
3507                 if(s->out_format == FMT_H263){
3508                     for(j=survivor_count-1; j>=0; j--){
3509                         int run= i - survivor[j];
3510                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3511                         score += score_tab[i-run];
3512                         if(score < last_score){
3513                             last_score= score;
3514                             last_run= run;
3515                             last_level= level-64;
3516                             last_i= i+1;
3517                         }
3518                     }
3519                 }
3520             }else{
3521                 distortion += esc_length*lambda;
3522                 for(j=survivor_count-1; j>=0; j--){
3523                     int run= i - survivor[j];
3524                     int score= distortion + score_tab[i-run];
3525
3526                     if(score < best_score){
3527                         best_score= score;
3528                         run_tab[i+1]= run;
3529                         level_tab[i+1]= level-64;
3530                     }
3531                 }
3532
3533                 if(s->out_format == FMT_H263){
3534                   for(j=survivor_count-1; j>=0; j--){
3535                         int run= i - survivor[j];
3536                         int score= distortion + score_tab[i-run];
3537                         if(score < last_score){
3538                             last_score= score;
3539                             last_run= run;
3540                             last_level= level-64;
3541                             last_i= i+1;
3542                         }
3543                     }
3544                 }
3545             }
3546         }
3547
3548         score_tab[i+1]= best_score;
3549
3550         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3551         if(last_non_zero <= 27){
3552             for(; survivor_count; survivor_count--){
3553                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3554                     break;
3555             }
3556         }else{
3557             for(; survivor_count; survivor_count--){
3558                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3559                     break;
3560             }
3561         }
3562
3563         survivor[ survivor_count++ ]= i+1;
3564     }
3565
3566     if(s->out_format != FMT_H263){
3567         last_score= 256*256*256*120;
3568         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3569             int score= score_tab[i];
3570             if(i) score += lambda*2; //FIXME exacter?
3571
3572             if(score < last_score){
3573                 last_score= score;
3574                 last_i= i;
3575                 last_level= level_tab[i];
3576                 last_run= run_tab[i];
3577             }
3578         }
3579     }
3580
3581     s->coded_score[n] = last_score;
3582
3583     dc= FFABS(block[0]);
3584     last_non_zero= last_i - 1;
3585     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3586
3587     if(last_non_zero < start_i)
3588         return last_non_zero;
3589
3590     if(last_non_zero == 0 && start_i == 0){
3591         int best_level= 0;
3592         int best_score= dc * dc;
3593
3594         for(i=0; i<coeff_count[0]; i++){
3595             int level= coeff[i][0];
3596             int alevel= FFABS(level);
3597             int unquant_coeff, score, distortion;
3598
3599             if(s->out_format == FMT_H263){
3600                     unquant_coeff= (alevel*qmul + qadd)>>3;
3601             }else{ //MPEG1
3602                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3603                     unquant_coeff =   (unquant_coeff - 1) | 1;
3604             }
3605             unquant_coeff = (unquant_coeff + 4) >> 3;
3606             unquant_coeff<<= 3 + 3;
3607
3608             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3609             level+=64;
3610             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3611             else                    score= distortion + esc_length*lambda;
3612
3613             if(score < best_score){
3614                 best_score= score;
3615                 best_level= level - 64;
3616             }
3617         }
3618         block[0]= best_level;
3619         s->coded_score[n] = best_score - dc*dc;
3620         if(best_level == 0) return -1;
3621         else                return last_non_zero;
3622     }
3623
3624     i= last_i;
3625     assert(last_level);
3626
3627     block[ perm_scantable[last_non_zero] ]= last_level;
3628     i -= last_run + 1;
3629
3630     for(; i>start_i; i -= run_tab[i] + 1){
3631         block[ perm_scantable[i-1] ]= level_tab[i];
3632     }
3633
3634     return last_non_zero;
3635 }
3636
3637 //#define REFINE_STATS 1
3638 static int16_t basis[64][64];
3639
3640 static void build_basis(uint8_t *perm){
3641     int i, j, x, y;
3642     emms_c();
3643     for(i=0; i<8; i++){
3644         for(j=0; j<8; j++){
3645             for(y=0; y<8; y++){
3646                 for(x=0; x<8; x++){
3647                     double s= 0.25*(1<<BASIS_SHIFT);
3648                     int index= 8*i + j;
3649                     int perm_index= perm[index];
3650                     if(i==0) s*= sqrt(0.5);
3651                     if(j==0) s*= sqrt(0.5);
3652                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3653                 }
3654             }
3655         }
3656     }
3657 }
3658
3659 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3660                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3661                         int n, int qscale){
3662     int16_t rem[64];
3663     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3664     const uint8_t *scantable= s->intra_scantable.scantable;
3665     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3666 //    unsigned int threshold1, threshold2;
3667 //    int bias=0;
3668     int run_tab[65];
3669     int prev_run=0;
3670     int prev_level=0;
3671     int qmul, qadd, start_i, last_non_zero, i, dc;
3672     uint8_t * length;
3673     uint8_t * last_length;
3674     int lambda;
3675     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3676 #ifdef REFINE_STATS
3677 static int count=0;
3678 static int after_last=0;
3679 static int to_zero=0;
3680 static int from_zero=0;
3681 static int raise=0;
3682 static int lower=0;
3683 static int messed_sign=0;
3684 #endif
3685
3686     if(basis[0][0] == 0)
3687         build_basis(s->dsp.idct_permutation);
3688
3689     qmul= qscale*2;
3690     qadd= (qscale-1)|1;
3691     if (s->mb_intra) {
3692         if (!s->h263_aic) {
3693             if (n < 4)
3694                 q = s->y_dc_scale;
3695             else
3696                 q = s->c_dc_scale;
3697         } else{
3698             /* For AIC we skip quant/dequant of INTRADC */
3699             q = 1;
3700             qadd=0;
3701         }
3702         q <<= RECON_SHIFT-3;
3703         /* note: block[0] is assumed to be positive */
3704         dc= block[0]*q;
3705 //        block[0] = (block[0] + (q >> 1)) / q;
3706         start_i = 1;
3707 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3708 //            bias= 1<<(QMAT_SHIFT-1);
3709         length     = s->intra_ac_vlc_length;
3710         last_length= s->intra_ac_vlc_last_length;
3711     } else {
3712         dc= 0;
3713         start_i = 0;
3714         length     = s->inter_ac_vlc_length;
3715         last_length= s->inter_ac_vlc_last_length;
3716     }
3717     last_non_zero = s->block_last_index[n];
3718
3719 #ifdef REFINE_STATS
3720 {START_TIMER
3721 #endif
3722     dc += (1<<(RECON_SHIFT-1));
3723     for(i=0; i<64; i++){
3724         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3725     }
3726 #ifdef REFINE_STATS
3727 STOP_TIMER("memset rem[]")}
3728 #endif
3729     sum=0;
3730     for(i=0; i<64; i++){
3731         int one= 36;
3732         int qns=4;
3733         int w;
3734
3735         w= FFABS(weight[i]) + qns*one;
3736         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3737
3738         weight[i] = w;
3739 //        w=weight[i] = (63*qns + (w/2)) / w;
3740
3741         assert(w>0);
3742         assert(w<(1<<6));
3743         sum += w*w;
3744     }
3745     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3746 #ifdef REFINE_STATS
3747 {START_TIMER
3748 #endif
3749     run=0;
3750     rle_index=0;
3751     for(i=start_i; i<=last_non_zero; i++){
3752         int j= perm_scantable[i];
3753         const int level= block[j];
3754         int coeff;
3755
3756         if(level){
3757             if(level<0) coeff= qmul*level - qadd;
3758             else        coeff= qmul*level + qadd;
3759             run_tab[rle_index++]=run;
3760             run=0;
3761
3762             s->dsp.add_8x8basis(rem, basis[j], coeff);
3763         }else{
3764             run++;
3765         }
3766     }
3767 #ifdef REFINE_STATS
3768 if(last_non_zero>0){
3769 STOP_TIMER("init rem[]")
3770 }
3771 }
3772
3773 {START_TIMER
3774 #endif
3775     for(;;){
3776         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3777         int best_coeff=0;
3778         int best_change=0;
3779         int run2, best_unquant_change=0, analyze_gradient;
3780 #ifdef REFINE_STATS
3781 {START_TIMER
3782 #endif
3783         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3784
3785         if(analyze_gradient){
3786 #ifdef REFINE_STATS
3787 {START_TIMER
3788 #endif
3789             for(i=0; i<64; i++){
3790                 int w= weight[i];
3791
3792                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3793             }
3794 #ifdef REFINE_STATS
3795 STOP_TIMER("rem*w*w")}
3796 {START_TIMER
3797 #endif
3798             s->dsp.fdct(d1);
3799 #ifdef REFINE_STATS
3800 STOP_TIMER("dct")}
3801 #endif
3802         }
3803
3804         if(start_i){
3805             const int level= block[0];
3806             int change, old_coeff;
3807
3808             assert(s->mb_intra);
3809
3810             old_coeff= q*level;
3811
3812             for(change=-1; change<=1; change+=2){
3813                 int new_level= level + change;
3814                 int score, new_coeff;
3815
3816                 new_coeff= q*new_level;
3817                 if(new_coeff >= 2048 || new_coeff < 0)
3818                     continue;
3819
3820                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3821                 if(score<best_score){
3822                     best_score= score;
3823                     best_coeff= 0;
3824                     best_change= change;
3825                     best_unquant_change= new_coeff - old_coeff;
3826                 }
3827             }
3828         }
3829
3830         run=0;
3831         rle_index=0;
3832         run2= run_tab[rle_index++];
3833         prev_level=0;
3834         prev_run=0;
3835
3836         for(i=start_i; i<64; i++){
3837             int j= perm_scantable[i];
3838             const int level= block[j];
3839             int change, old_coeff;
3840
3841             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3842                 break;
3843
3844             if(level){
3845                 if(level<0) old_coeff= qmul*level - qadd;
3846                 else        old_coeff= qmul*level + qadd;
3847                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3848             }else{
3849                 old_coeff=0;
3850                 run2--;
3851                 assert(run2>=0 || i >= last_non_zero );
3852             }
3853
3854             for(change=-1; change<=1; change+=2){
3855                 int new_level= level + change;
3856                 int score, new_coeff, unquant_change;
3857
3858                 score=0;
3859                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3860                    continue;
3861
3862                 if(new_level){
3863                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3864                     else            new_coeff= qmul*new_level + qadd;
3865                     if(new_coeff >= 2048 || new_coeff <= -2048)
3866                         continue;
3867                     //FIXME check for overflow
3868
3869                     if(level){
3870                         if(level < 63 && level > -63){
3871                             if(i < last_non_zero)
3872                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3873                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3874                             else
3875                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3876                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3877                         }
3878                     }else{
3879                         assert(FFABS(new_level)==1);
3880
3881                         if(analyze_gradient){
3882                             int g= d1[ scantable[i] ];
3883                             if(g && (g^new_level) >= 0)
3884                                 continue;
3885                         }
3886
3887                         if(i < last_non_zero){
3888                             int next_i= i + run2 + 1;
3889                             int next_level= block[ perm_scantable[next_i] ] + 64;
3890
3891                             if(next_level&(~127))
3892                                 next_level= 0;
3893
3894                             if(next_i < last_non_zero)
3895                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3896                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3897                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3898                             else
3899                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3900                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3901                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3902                         }else{
3903                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3904                             if(prev_level){
3905                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3906                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3907                             }
3908                         }
3909                     }
3910                 }else{
3911                     new_coeff=0;
3912                     assert(FFABS(level)==1);
3913
3914                     if(i < last_non_zero){
3915                         int next_i= i + run2 + 1;
3916                         int next_level= block[ perm_scantable[next_i] ] + 64;
3917
3918                         if(next_level&(~127))
3919                             next_level= 0;
3920
3921                         if(next_i < last_non_zero)
3922                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3923                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3924                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3925                         else
3926                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3927                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3928                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3929                     }else{
3930                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3931                         if(prev_level){
3932                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3933                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3934                         }
3935                     }
3936                 }
3937
3938                 score *= lambda;
3939
3940                 unquant_change= new_coeff - old_coeff;
3941                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3942
3943                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3944                 if(score<best_score){
3945                     best_score= score;
3946                     best_coeff= i;
3947                     best_change= change;
3948                     best_unquant_change= unquant_change;
3949                 }
3950             }
3951             if(level){
3952                 prev_level= level + 64;
3953                 if(prev_level&(~127))
3954                     prev_level= 0;
3955                 prev_run= run;
3956                 run=0;
3957             }else{
3958                 run++;
3959             }
3960         }
3961 #ifdef REFINE_STATS
3962 STOP_TIMER("iterative step")}
3963 #endif
3964
3965         if(best_change){
3966             int j= perm_scantable[ best_coeff ];
3967
3968             block[j] += best_change;
3969
3970             if(best_coeff > last_non_zero){
3971                 last_non_zero= best_coeff;
3972                 assert(block[j]);
3973 #ifdef REFINE_STATS
3974 after_last++;
3975 #endif
3976             }else{
3977 #ifdef REFINE_STATS
3978 if(block[j]){
3979     if(block[j] - best_change){
3980         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3981             raise++;
3982         }else{
3983             lower++;
3984         }
3985     }else{
3986         from_zero++;
3987     }
3988 }else{
3989     to_zero++;
3990 }
3991 #endif
3992                 for(; last_non_zero>=start_i; last_non_zero--){
3993                     if(block[perm_scantable[last_non_zero]])
3994                         break;
3995                 }
3996             }
3997 #ifdef REFINE_STATS
3998 count++;
3999 if(256*256*256*64 % count == 0){
4000     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4001 }
4002 #endif
4003             run=0;
4004             rle_index=0;
4005             for(i=start_i; i<=last_non_zero; i++){
4006                 int j= perm_scantable[i];
4007                 const int level= block[j];
4008
4009                  if(level){
4010                      run_tab[rle_index++]=run;
4011                      run=0;
4012                  }else{
4013                      run++;
4014                  }
4015             }
4016
4017             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4018         }else{
4019             break;
4020         }
4021     }
4022 #ifdef REFINE_STATS
4023 if(last_non_zero>0){
4024 STOP_TIMER("iterative search")
4025 }
4026 }
4027 #endif
4028
4029     return last_non_zero;
4030 }
4031
4032 int ff_dct_quantize_c(MpegEncContext *s,
4033                         DCTELEM *block, int n,
4034                         int qscale, int *overflow)
4035 {
4036     int i, j, level, last_non_zero, q, start_i;
4037     const int *qmat;
4038     const uint8_t *scantable= s->intra_scantable.scantable;
4039     int bias;
4040     int max=0;
4041     unsigned int threshold1, threshold2;
4042
4043     s->dsp.fdct (block);
4044
4045     if(s->dct_error_sum)
4046         s->denoise_dct(s, block);
4047
4048     if (s->mb_intra) {
4049         if (!s->h263_aic) {
4050             if (n < 4)
4051                 q = s->y_dc_scale;
4052             else
4053                 q = s->c_dc_scale;
4054             q = q << 3;
4055         } else
4056             /* For AIC we skip quant/dequant of INTRADC */
4057             q = 1 << 3;
4058
4059         /* note: block[0] is assumed to be positive */
4060         block[0] = (block[0] + (q >> 1)) / q;
4061         start_i = 1;
4062         last_non_zero = 0;
4063         qmat = s->q_intra_matrix[qscale];
4064         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4065     } else {
4066         start_i = 0;
4067         last_non_zero = -1;
4068         qmat = s->q_inter_matrix[qscale];
4069         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4070     }
4071     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4072     threshold2= (threshold1<<1);
4073     for(i=63;i>=start_i;i--) {
4074         j = scantable[i];
4075         level = block[j] * qmat[j];
4076
4077         if(((unsigned)(level+threshold1))>threshold2){
4078             last_non_zero = i;
4079             break;
4080         }else{
4081             block[j]=0;
4082         }
4083     }
4084     for(i=start_i; i<=last_non_zero; i++) {
4085         j = scantable[i];
4086         level = block[j] * qmat[j];
4087
4088 //        if(   bias+level >= (1<<QMAT_SHIFT)
4089 //           || bias-level >= (1<<QMAT_SHIFT)){
4090         if(((unsigned)(level+threshold1))>threshold2){
4091             if(level>0){
4092                 level= (bias + level)>>QMAT_SHIFT;
4093                 block[j]= level;
4094             }else{
4095                 level= (bias - level)>>QMAT_SHIFT;
4096                 block[j]= -level;
4097             }
4098             max |=level;
4099         }else{
4100             block[j]=0;
4101         }
4102     }
4103     *overflow= s->max_qcoeff < max; //overflow might have happened
4104
4105     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4106     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4107         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4108
4109     return last_non_zero;
4110 }
4111
4112 #define OFFSET(x) offsetof(MpegEncContext, x)
4113 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4114 static const AVOption h263_options[] = {
4115     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4116     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4117     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { 0 }, 0, INT_MAX, VE },
4118     FF_MPV_COMMON_OPTS
4119     { NULL },
4120 };
4121
4122 static const AVClass h263_class = {
4123     .class_name = "H.263 encoder",
4124     .item_name  = av_default_item_name,
4125     .option     = h263_options,
4126     .version    = LIBAVUTIL_VERSION_INT,
4127 };
4128
4129 AVCodec ff_h263_encoder = {
4130     .name           = "h263",
4131     .type           = AVMEDIA_TYPE_VIDEO,
4132     .id             = CODEC_ID_H263,
4133     .priv_data_size = sizeof(MpegEncContext),
4134     .init           = ff_MPV_encode_init,
4135     .encode2        = ff_MPV_encode_picture,
4136     .close          = ff_MPV_encode_end,
4137     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4138     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4139     .priv_class     = &h263_class,
4140 };
4141
4142 static const AVOption h263p_options[] = {
4143     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4144     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4145     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4146     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4147     FF_MPV_COMMON_OPTS
4148     { NULL },
4149 };
4150 static const AVClass h263p_class = {
4151     .class_name = "H.263p encoder",
4152     .item_name  = av_default_item_name,
4153     .option     = h263p_options,
4154     .version    = LIBAVUTIL_VERSION_INT,
4155 };
4156
4157 AVCodec ff_h263p_encoder = {
4158     .name           = "h263p",
4159     .type           = AVMEDIA_TYPE_VIDEO,
4160     .id             = CODEC_ID_H263P,
4161     .priv_data_size = sizeof(MpegEncContext),
4162     .init           = ff_MPV_encode_init,
4163     .encode2        = ff_MPV_encode_picture,
4164     .close          = ff_MPV_encode_end,
4165     .capabilities   = CODEC_CAP_SLICE_THREADS,
4166     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4167     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4168     .priv_class     = &h263p_class,
4169 };
4170
4171 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4172
4173 AVCodec ff_msmpeg4v2_encoder = {
4174     .name           = "msmpeg4v2",
4175     .type           = AVMEDIA_TYPE_VIDEO,
4176     .id             = CODEC_ID_MSMPEG4V2,
4177     .priv_data_size = sizeof(MpegEncContext),
4178     .init           = ff_MPV_encode_init,
4179     .encode2        = ff_MPV_encode_picture,
4180     .close          = ff_MPV_encode_end,
4181     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4182     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4183     .priv_class     = &msmpeg4v2_class,
4184 };
4185
4186 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4187
4188 AVCodec ff_msmpeg4v3_encoder = {
4189     .name           = "msmpeg4",
4190     .type           = AVMEDIA_TYPE_VIDEO,
4191     .id             = CODEC_ID_MSMPEG4V3,
4192     .priv_data_size = sizeof(MpegEncContext),
4193     .init           = ff_MPV_encode_init,
4194     .encode2        = ff_MPV_encode_picture,
4195     .close          = ff_MPV_encode_end,
4196     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4197     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4198     .priv_class     = &msmpeg4v3_class,
4199 };
4200
4201 FF_MPV_GENERIC_CLASS(wmv1)
4202
4203 AVCodec ff_wmv1_encoder = {
4204     .name           = "wmv1",
4205     .type           = AVMEDIA_TYPE_VIDEO,
4206     .id             = CODEC_ID_WMV1,
4207     .priv_data_size = sizeof(MpegEncContext),
4208     .init           = ff_MPV_encode_init,
4209     .encode2        = ff_MPV_encode_picture,
4210     .close          = ff_MPV_encode_end,
4211     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4212     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4213     .priv_class     = &wmv1_class,
4214 };