]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
lavc decoders: work with refcounted frames.
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/internal.h"
31 #include "libavutil/intmath.h"
32 #include "libavutil/mathematics.h"
33 #include "libavutil/pixdesc.h"
34 #include "libavutil/opt.h"
35 #include "avcodec.h"
36 #include "dct.h"
37 #include "dsputil.h"
38 #include "mpegvideo.h"
39 #include "h263.h"
40 #include "mathops.h"
41 #include "mjpegenc.h"
42 #include "msmpeg4.h"
43 #include "faandct.h"
44 #include "thread.h"
45 #include "aandcttab.h"
46 #include "flv.h"
47 #include "mpeg4video.h"
48 #include "internal.h"
49 #include "bytestream.h"
50 #include <limits.h>
51
52 //#undef NDEBUG
53 //#include <assert.h>
54
55 static int encode_picture(MpegEncContext *s, int picture_number);
56 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
57 static int sse_mb(MpegEncContext *s);
58 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
59 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
60
61 /* enable all paranoid tests for rounding, overflows, etc... */
62 //#define PARANOID
63
64 //#define DEBUG
65
66 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
67 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
68
69 const AVOption ff_mpv_generic_options[] = {
70     FF_MPV_COMMON_OPTS
71     { NULL },
72 };
73
74 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
75                        uint16_t (*qmat16)[2][64],
76                        const uint16_t *quant_matrix,
77                        int bias, int qmin, int qmax, int intra)
78 {
79     int qscale;
80     int shift = 0;
81
82     for (qscale = qmin; qscale <= qmax; qscale++) {
83         int i;
84         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
85             dsp->fdct == ff_jpeg_fdct_islow_10 ||
86             dsp->fdct == ff_faandct) {
87             for (i = 0; i < 64; i++) {
88                 const int j = dsp->idct_permutation[i];
89                 /* 16 <= qscale * quant_matrix[i] <= 7905
90                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
91                  *             19952 <=              x  <= 249205026
92                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
93                  *           3444240 >= (1 << 36) / (x) >= 275 */
94
95                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
96                                         (qscale * quant_matrix[j]));
97             }
98         } else if (dsp->fdct == ff_fdct_ifast) {
99             for (i = 0; i < 64; i++) {
100                 const int j = dsp->idct_permutation[i];
101                 /* 16 <= qscale * quant_matrix[i] <= 7905
102                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
103                  *             19952 <=              x  <= 249205026
104                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
105                  *           3444240 >= (1 << 36) / (x) >= 275 */
106
107                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
108                                         (ff_aanscales[i] * qscale *
109                                          quant_matrix[j]));
110             }
111         } else {
112             for (i = 0; i < 64; i++) {
113                 const int j = dsp->idct_permutation[i];
114                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
115                  * Assume x = qscale * quant_matrix[i]
116                  * So             16 <=              x  <= 7905
117                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
118                  * so          32768 >= (1 << 19) / (x) >= 67 */
119                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
120                                         (qscale * quant_matrix[j]));
121                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
122                 //                    (qscale * quant_matrix[i]);
123                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
124                                        (qscale * quant_matrix[j]);
125
126                 if (qmat16[qscale][0][i] == 0 ||
127                     qmat16[qscale][0][i] == 128 * 256)
128                     qmat16[qscale][0][i] = 128 * 256 - 1;
129                 qmat16[qscale][1][i] =
130                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
131                                 qmat16[qscale][0][i]);
132             }
133         }
134
135         for (i = intra; i < 64; i++) {
136             int64_t max = 8191;
137             if (dsp->fdct == ff_fdct_ifast) {
138                 max = (8191LL * ff_aanscales[i]) >> 14;
139             }
140             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
141                 shift++;
142             }
143         }
144     }
145     if (shift) {
146         av_log(NULL, AV_LOG_INFO,
147                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
148                QMAT_SHIFT - shift);
149     }
150 }
151
152 static inline void update_qscale(MpegEncContext *s)
153 {
154     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
155                 (FF_LAMBDA_SHIFT + 7);
156     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
157
158     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
159                  FF_LAMBDA_SHIFT;
160 }
161
162 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
163 {
164     int i;
165
166     if (matrix) {
167         put_bits(pb, 1, 1);
168         for (i = 0; i < 64; i++) {
169             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
170         }
171     } else
172         put_bits(pb, 1, 0);
173 }
174
175 /**
176  * init s->current_picture.qscale_table from s->lambda_table
177  */
178 void ff_init_qscale_tab(MpegEncContext *s)
179 {
180     int8_t * const qscale_table = s->current_picture.qscale_table;
181     int i;
182
183     for (i = 0; i < s->mb_num; i++) {
184         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
185         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
186         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
187                                                   s->avctx->qmax);
188     }
189 }
190
191 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst,
192                                     const AVFrame *src)
193 {
194     dst->pict_type              = src->pict_type;
195     dst->quality                = src->quality;
196     dst->coded_picture_number   = src->coded_picture_number;
197     dst->display_picture_number = src->display_picture_number;
198     //dst->reference              = src->reference;
199     dst->pts                    = src->pts;
200     dst->interlaced_frame       = src->interlaced_frame;
201     dst->top_field_first        = src->top_field_first;
202 }
203
204 static void update_duplicate_context_after_me(MpegEncContext *dst,
205                                               MpegEncContext *src)
206 {
207 #define COPY(a) dst->a= src->a
208     COPY(pict_type);
209     COPY(current_picture);
210     COPY(f_code);
211     COPY(b_code);
212     COPY(qscale);
213     COPY(lambda);
214     COPY(lambda2);
215     COPY(picture_in_gop_number);
216     COPY(gop_picture_number);
217     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
218     COPY(progressive_frame);    // FIXME don't set in encode_header
219     COPY(partitioned_frame);    // FIXME don't set in encode_header
220 #undef COPY
221 }
222
223 /**
224  * Set the given MpegEncContext to defaults for encoding.
225  * the changed fields will not depend upon the prior state of the MpegEncContext.
226  */
227 static void MPV_encode_defaults(MpegEncContext *s)
228 {
229     int i;
230     ff_MPV_common_defaults(s);
231
232     for (i = -16; i < 16; i++) {
233         default_fcode_tab[i + MAX_MV] = 1;
234     }
235     s->me.mv_penalty = default_mv_penalty;
236     s->fcode_tab     = default_fcode_tab;
237 }
238
239 /* init video encoder */
240 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
241 {
242     MpegEncContext *s = avctx->priv_data;
243     int i;
244     int chroma_h_shift, chroma_v_shift;
245
246     MPV_encode_defaults(s);
247
248     switch (avctx->codec_id) {
249     case AV_CODEC_ID_MPEG2VIDEO:
250         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
251             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
252             av_log(avctx, AV_LOG_ERROR,
253                    "only YUV420 and YUV422 are supported\n");
254             return -1;
255         }
256         break;
257     case AV_CODEC_ID_LJPEG:
258         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
259             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
260             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
261             avctx->pix_fmt != AV_PIX_FMT_BGRA     &&
262             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
263               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
264               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
265              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
266             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
267             return -1;
268         }
269         break;
270     case AV_CODEC_ID_MJPEG:
271         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
272             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
273             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
274               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
275              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
276             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
277             return -1;
278         }
279         break;
280     default:
281         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
282             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
283             return -1;
284         }
285     }
286
287     switch (avctx->pix_fmt) {
288     case AV_PIX_FMT_YUVJ422P:
289     case AV_PIX_FMT_YUV422P:
290         s->chroma_format = CHROMA_422;
291         break;
292     case AV_PIX_FMT_YUVJ420P:
293     case AV_PIX_FMT_YUV420P:
294     default:
295         s->chroma_format = CHROMA_420;
296         break;
297     }
298
299     s->bit_rate = avctx->bit_rate;
300     s->width    = avctx->width;
301     s->height   = avctx->height;
302     if (avctx->gop_size > 600 &&
303         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
304         av_log(avctx, AV_LOG_ERROR,
305                "Warning keyframe interval too large! reducing it ...\n");
306         avctx->gop_size = 600;
307     }
308     s->gop_size     = avctx->gop_size;
309     s->avctx        = avctx;
310     s->flags        = avctx->flags;
311     s->flags2       = avctx->flags2;
312     s->max_b_frames = avctx->max_b_frames;
313     s->codec_id     = avctx->codec->id;
314 #if FF_API_MPV_GLOBAL_OPTS
315     if (avctx->luma_elim_threshold)
316         s->luma_elim_threshold   = avctx->luma_elim_threshold;
317     if (avctx->chroma_elim_threshold)
318         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
319 #endif
320     s->strict_std_compliance = avctx->strict_std_compliance;
321     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
322     s->mpeg_quant         = avctx->mpeg_quant;
323     s->rtp_mode           = !!avctx->rtp_payload_size;
324     s->intra_dc_precision = avctx->intra_dc_precision;
325     s->user_specified_pts = AV_NOPTS_VALUE;
326
327     if (s->gop_size <= 1) {
328         s->intra_only = 1;
329         s->gop_size   = 12;
330     } else {
331         s->intra_only = 0;
332     }
333
334     s->me_method = avctx->me_method;
335
336     /* Fixed QSCALE */
337     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
338
339 #if FF_API_MPV_GLOBAL_OPTS
340     if (s->flags & CODEC_FLAG_QP_RD)
341         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
342 #endif
343
344     s->adaptive_quant = (s->avctx->lumi_masking ||
345                          s->avctx->dark_masking ||
346                          s->avctx->temporal_cplx_masking ||
347                          s->avctx->spatial_cplx_masking  ||
348                          s->avctx->p_masking      ||
349                          s->avctx->border_masking ||
350                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
351                         !s->fixed_qscale;
352
353     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
354
355     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
356         av_log(avctx, AV_LOG_ERROR,
357                "a vbv buffer size is needed, "
358                "for encoding with a maximum bitrate\n");
359         return -1;
360     }
361
362     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
363         av_log(avctx, AV_LOG_INFO,
364                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
365     }
366
367     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
368         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
369         return -1;
370     }
371
372     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
373         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
374         return -1;
375     }
376
377     if (avctx->rc_max_rate &&
378         avctx->rc_max_rate == avctx->bit_rate &&
379         avctx->rc_max_rate != avctx->rc_min_rate) {
380         av_log(avctx, AV_LOG_INFO,
381                "impossible bitrate constraints, this will fail\n");
382     }
383
384     if (avctx->rc_buffer_size &&
385         avctx->bit_rate * (int64_t)avctx->time_base.num >
386             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
387         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
388         return -1;
389     }
390
391     if (!s->fixed_qscale &&
392         avctx->bit_rate * av_q2d(avctx->time_base) >
393             avctx->bit_rate_tolerance) {
394         av_log(avctx, AV_LOG_ERROR,
395                "bitrate tolerance too small for bitrate\n");
396         return -1;
397     }
398
399     if (s->avctx->rc_max_rate &&
400         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
401         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
402          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
403         90000LL * (avctx->rc_buffer_size - 1) >
404             s->avctx->rc_max_rate * 0xFFFFLL) {
405         av_log(avctx, AV_LOG_INFO,
406                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
407                "specified vbv buffer is too large for the given bitrate!\n");
408     }
409
410     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
411         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
412         s->codec_id != AV_CODEC_ID_FLV1) {
413         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
414         return -1;
415     }
416
417     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
418         av_log(avctx, AV_LOG_ERROR,
419                "OBMC is only supported with simple mb decision\n");
420         return -1;
421     }
422
423     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
424         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
425         return -1;
426     }
427
428     if (s->max_b_frames                    &&
429         s->codec_id != AV_CODEC_ID_MPEG4      &&
430         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
431         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
432         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
433         return -1;
434     }
435
436     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
437          s->codec_id == AV_CODEC_ID_H263  ||
438          s->codec_id == AV_CODEC_ID_H263P) &&
439         (avctx->sample_aspect_ratio.num > 255 ||
440          avctx->sample_aspect_ratio.den > 255)) {
441         av_log(avctx, AV_LOG_ERROR,
442                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
443                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
444         return -1;
445     }
446
447     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
448         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
449         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
450         return -1;
451     }
452
453     // FIXME mpeg2 uses that too
454     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
455         av_log(avctx, AV_LOG_ERROR,
456                "mpeg2 style quantization not supported by codec\n");
457         return -1;
458     }
459
460 #if FF_API_MPV_GLOBAL_OPTS
461     if (s->flags & CODEC_FLAG_CBP_RD)
462         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
463 #endif
464
465     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
466         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
467         return -1;
468     }
469
470     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
471         s->avctx->mb_decision != FF_MB_DECISION_RD) {
472         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
473         return -1;
474     }
475
476     if (s->avctx->scenechange_threshold < 1000000000 &&
477         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
478         av_log(avctx, AV_LOG_ERROR,
479                "closed gop with scene change detection are not supported yet, "
480                "set threshold to 1000000000\n");
481         return -1;
482     }
483
484     if (s->flags & CODEC_FLAG_LOW_DELAY) {
485         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
486             av_log(avctx, AV_LOG_ERROR,
487                   "low delay forcing is only available for mpeg2\n");
488             return -1;
489         }
490         if (s->max_b_frames != 0) {
491             av_log(avctx, AV_LOG_ERROR,
492                    "b frames cannot be used with low delay\n");
493             return -1;
494         }
495     }
496
497     if (s->q_scale_type == 1) {
498         if (avctx->qmax > 12) {
499             av_log(avctx, AV_LOG_ERROR,
500                    "non linear quant only supports qmax <= 12 currently\n");
501             return -1;
502         }
503     }
504
505     if (s->avctx->thread_count > 1         &&
506         s->codec_id != AV_CODEC_ID_MPEG4      &&
507         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
508         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
509         (s->codec_id != AV_CODEC_ID_H263P)) {
510         av_log(avctx, AV_LOG_ERROR,
511                "multi threaded encoding not supported by codec\n");
512         return -1;
513     }
514
515     if (s->avctx->thread_count < 1) {
516         av_log(avctx, AV_LOG_ERROR,
517                "automatic thread number detection not supported by codec,"
518                "patch welcome\n");
519         return -1;
520     }
521
522     if (s->avctx->thread_count > 1)
523         s->rtp_mode = 1;
524
525     if (!avctx->time_base.den || !avctx->time_base.num) {
526         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
527         return -1;
528     }
529
530     i = (INT_MAX / 2 + 128) >> 8;
531     if (avctx->mb_threshold >= i) {
532         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
533                i - 1);
534         return -1;
535     }
536
537     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
538         av_log(avctx, AV_LOG_INFO,
539                "notice: b_frame_strategy only affects the first pass\n");
540         avctx->b_frame_strategy = 0;
541     }
542
543     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
544     if (i > 1) {
545         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
546         avctx->time_base.den /= i;
547         avctx->time_base.num /= i;
548         //return -1;
549     }
550
551     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
552         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
553         // (a + x * 3 / 8) / x
554         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
555         s->inter_quant_bias = 0;
556     } else {
557         s->intra_quant_bias = 0;
558         // (a - x / 4) / x
559         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
560     }
561
562     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
563         s->intra_quant_bias = avctx->intra_quant_bias;
564     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
565         s->inter_quant_bias = avctx->inter_quant_bias;
566
567     av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
568                                      &chroma_v_shift);
569
570     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
571         s->avctx->time_base.den > (1 << 16) - 1) {
572         av_log(avctx, AV_LOG_ERROR,
573                "timebase %d/%d not supported by MPEG 4 standard, "
574                "the maximum admitted value for the timebase denominator "
575                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
576                (1 << 16) - 1);
577         return -1;
578     }
579     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
580
581 #if FF_API_MPV_GLOBAL_OPTS
582     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
583         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
584     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
585         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
586     if (avctx->quantizer_noise_shaping)
587         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
588 #endif
589
590     switch (avctx->codec->id) {
591     case AV_CODEC_ID_MPEG1VIDEO:
592         s->out_format = FMT_MPEG1;
593         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
594         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
595         break;
596     case AV_CODEC_ID_MPEG2VIDEO:
597         s->out_format = FMT_MPEG1;
598         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
599         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
600         s->rtp_mode   = 1;
601         break;
602     case AV_CODEC_ID_LJPEG:
603     case AV_CODEC_ID_MJPEG:
604         s->out_format = FMT_MJPEG;
605         s->intra_only = 1; /* force intra only for jpeg */
606         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
607             avctx->pix_fmt   == AV_PIX_FMT_BGRA) {
608             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
609             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
610             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
611         } else {
612             s->mjpeg_vsample[0] = 2;
613             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
614             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
615             s->mjpeg_hsample[0] = 2;
616             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
617             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
618         }
619         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
620             ff_mjpeg_encode_init(s) < 0)
621             return -1;
622         avctx->delay = 0;
623         s->low_delay = 1;
624         break;
625     case AV_CODEC_ID_H261:
626         if (!CONFIG_H261_ENCODER)
627             return -1;
628         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
629             av_log(avctx, AV_LOG_ERROR,
630                    "The specified picture size of %dx%d is not valid for the "
631                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
632                     s->width, s->height);
633             return -1;
634         }
635         s->out_format = FMT_H261;
636         avctx->delay  = 0;
637         s->low_delay  = 1;
638         break;
639     case AV_CODEC_ID_H263:
640         if (!CONFIG_H263_ENCODER)
641         return -1;
642         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
643                              s->width, s->height) == 8) {
644             av_log(avctx, AV_LOG_INFO,
645                    "The specified picture size of %dx%d is not valid for "
646                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
647                    "352x288, 704x576, and 1408x1152."
648                    "Try H.263+.\n", s->width, s->height);
649             return -1;
650         }
651         s->out_format = FMT_H263;
652         avctx->delay  = 0;
653         s->low_delay  = 1;
654         break;
655     case AV_CODEC_ID_H263P:
656         s->out_format = FMT_H263;
657         s->h263_plus  = 1;
658         /* Fx */
659         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
660         s->modified_quant  = s->h263_aic;
661         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
662         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
663
664         /* /Fx */
665         /* These are just to be sure */
666         avctx->delay = 0;
667         s->low_delay = 1;
668         break;
669     case AV_CODEC_ID_FLV1:
670         s->out_format      = FMT_H263;
671         s->h263_flv        = 2; /* format = 1; 11-bit codes */
672         s->unrestricted_mv = 1;
673         s->rtp_mode  = 0; /* don't allow GOB */
674         avctx->delay = 0;
675         s->low_delay = 1;
676         break;
677     case AV_CODEC_ID_RV10:
678         s->out_format = FMT_H263;
679         avctx->delay  = 0;
680         s->low_delay  = 1;
681         break;
682     case AV_CODEC_ID_RV20:
683         s->out_format      = FMT_H263;
684         avctx->delay       = 0;
685         s->low_delay       = 1;
686         s->modified_quant  = 1;
687         s->h263_aic        = 1;
688         s->h263_plus       = 1;
689         s->loop_filter     = 1;
690         s->unrestricted_mv = 0;
691         break;
692     case AV_CODEC_ID_MPEG4:
693         s->out_format      = FMT_H263;
694         s->h263_pred       = 1;
695         s->unrestricted_mv = 1;
696         s->low_delay       = s->max_b_frames ? 0 : 1;
697         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
698         break;
699     case AV_CODEC_ID_MSMPEG4V2:
700         s->out_format      = FMT_H263;
701         s->h263_pred       = 1;
702         s->unrestricted_mv = 1;
703         s->msmpeg4_version = 2;
704         avctx->delay       = 0;
705         s->low_delay       = 1;
706         break;
707     case AV_CODEC_ID_MSMPEG4V3:
708         s->out_format        = FMT_H263;
709         s->h263_pred         = 1;
710         s->unrestricted_mv   = 1;
711         s->msmpeg4_version   = 3;
712         s->flipflop_rounding = 1;
713         avctx->delay         = 0;
714         s->low_delay         = 1;
715         break;
716     case AV_CODEC_ID_WMV1:
717         s->out_format        = FMT_H263;
718         s->h263_pred         = 1;
719         s->unrestricted_mv   = 1;
720         s->msmpeg4_version   = 4;
721         s->flipflop_rounding = 1;
722         avctx->delay         = 0;
723         s->low_delay         = 1;
724         break;
725     case AV_CODEC_ID_WMV2:
726         s->out_format        = FMT_H263;
727         s->h263_pred         = 1;
728         s->unrestricted_mv   = 1;
729         s->msmpeg4_version   = 5;
730         s->flipflop_rounding = 1;
731         avctx->delay         = 0;
732         s->low_delay         = 1;
733         break;
734     default:
735         return -1;
736     }
737
738     avctx->has_b_frames = !s->low_delay;
739
740     s->encoding = 1;
741
742     s->progressive_frame    =
743     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
744                                                 CODEC_FLAG_INTERLACED_ME) ||
745                                 s->alternate_scan);
746
747     /* init */
748     if (ff_MPV_common_init(s) < 0)
749         return -1;
750
751     if (ARCH_X86)
752         ff_MPV_encode_init_x86(s);
753
754     if (!s->dct_quantize)
755         s->dct_quantize = ff_dct_quantize_c;
756     if (!s->denoise_dct)
757         s->denoise_dct  = denoise_dct_c;
758     s->fast_dct_quantize = s->dct_quantize;
759     if (avctx->trellis)
760         s->dct_quantize  = dct_quantize_trellis_c;
761
762     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
763         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
764
765     s->quant_precision = 5;
766
767     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
768     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
769
770     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
771         ff_h261_encode_init(s);
772     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
773         ff_h263_encode_init(s);
774     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
775         ff_msmpeg4_encode_init(s);
776     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
777         && s->out_format == FMT_MPEG1)
778         ff_mpeg1_encode_init(s);
779
780     /* init q matrix */
781     for (i = 0; i < 64; i++) {
782         int j = s->dsp.idct_permutation[i];
783         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
784             s->mpeg_quant) {
785             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
786             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
787         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
788             s->intra_matrix[j] =
789             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
790         } else {
791             /* mpeg1/2 */
792             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
793             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
794         }
795         if (s->avctx->intra_matrix)
796             s->intra_matrix[j] = s->avctx->intra_matrix[i];
797         if (s->avctx->inter_matrix)
798             s->inter_matrix[j] = s->avctx->inter_matrix[i];
799     }
800
801     /* precompute matrix */
802     /* for mjpeg, we do include qscale in the matrix */
803     if (s->out_format != FMT_MJPEG) {
804         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
805                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
806                           31, 1);
807         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
808                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
809                           31, 0);
810     }
811
812     if (ff_rate_control_init(s) < 0)
813         return -1;
814
815     return 0;
816 }
817
818 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
819 {
820     MpegEncContext *s = avctx->priv_data;
821
822     ff_rate_control_uninit(s);
823
824     ff_MPV_common_end(s);
825     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
826         s->out_format == FMT_MJPEG)
827         ff_mjpeg_encode_close(s);
828
829     av_freep(&avctx->extradata);
830
831     return 0;
832 }
833
834 static int get_sae(uint8_t *src, int ref, int stride)
835 {
836     int x,y;
837     int acc = 0;
838
839     for (y = 0; y < 16; y++) {
840         for (x = 0; x < 16; x++) {
841             acc += FFABS(src[x + y * stride] - ref);
842         }
843     }
844
845     return acc;
846 }
847
848 static int get_intra_count(MpegEncContext *s, uint8_t *src,
849                            uint8_t *ref, int stride)
850 {
851     int x, y, w, h;
852     int acc = 0;
853
854     w = s->width  & ~15;
855     h = s->height & ~15;
856
857     for (y = 0; y < h; y += 16) {
858         for (x = 0; x < w; x += 16) {
859             int offset = x + y * stride;
860             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
861                                      16);
862             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
863             int sae  = get_sae(src + offset, mean, stride);
864
865             acc += sae + 500 < sad;
866         }
867     }
868     return acc;
869 }
870
871
872 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
873 {
874     Picture *pic = NULL;
875     int64_t pts;
876     int i, display_picture_number = 0, ret;
877     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
878                                                  (s->low_delay ? 0 : 1);
879     int direct = 1;
880
881     if (pic_arg) {
882         pts = pic_arg->pts;
883         display_picture_number = s->input_picture_number++;
884
885         if (pts != AV_NOPTS_VALUE) {
886             if (s->user_specified_pts != AV_NOPTS_VALUE) {
887                 int64_t time = pts;
888                 int64_t last = s->user_specified_pts;
889
890                 if (time <= last) {
891                     av_log(s->avctx, AV_LOG_ERROR,
892                            "Error, Invalid timestamp=%"PRId64", "
893                            "last=%"PRId64"\n", pts, s->user_specified_pts);
894                     return -1;
895                 }
896
897                 if (!s->low_delay && display_picture_number == 1)
898                     s->dts_delta = time - last;
899             }
900             s->user_specified_pts = pts;
901         } else {
902             if (s->user_specified_pts != AV_NOPTS_VALUE) {
903                 s->user_specified_pts =
904                 pts = s->user_specified_pts + 1;
905                 av_log(s->avctx, AV_LOG_INFO,
906                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
907                        pts);
908             } else {
909                 pts = display_picture_number;
910             }
911         }
912     }
913
914     if (pic_arg) {
915         if (!pic_arg->buf[0]);
916             direct = 0;
917         if (pic_arg->linesize[0] != s->linesize)
918             direct = 0;
919         if (pic_arg->linesize[1] != s->uvlinesize)
920             direct = 0;
921         if (pic_arg->linesize[2] != s->uvlinesize)
922             direct = 0;
923
924         av_dlog(s->avctx, "%d %d %d %d\n", pic_arg->linesize[0],
925                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
926
927         if (direct) {
928             i = ff_find_unused_picture(s, 1);
929             if (i < 0)
930                 return i;
931
932             pic = &s->picture[i];
933             pic->reference = 3;
934
935             if ((ret = av_frame_ref(&pic->f, pic_arg)) < 0)
936                 return ret;
937             if (ff_alloc_picture(s, pic, 1) < 0) {
938                 return -1;
939             }
940         } else {
941             i = ff_find_unused_picture(s, 0);
942             if (i < 0)
943                 return i;
944
945             pic = &s->picture[i];
946             pic->reference = 3;
947
948             if (ff_alloc_picture(s, pic, 0) < 0) {
949                 return -1;
950             }
951
952             if (pic->f.data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
953                 pic->f.data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
954                 pic->f.data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
955                 // empty
956             } else {
957                 int h_chroma_shift, v_chroma_shift;
958                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
959                                                  &h_chroma_shift,
960                                                  &v_chroma_shift);
961
962                 for (i = 0; i < 3; i++) {
963                     int src_stride = pic_arg->linesize[i];
964                     int dst_stride = i ? s->uvlinesize : s->linesize;
965                     int h_shift = i ? h_chroma_shift : 0;
966                     int v_shift = i ? v_chroma_shift : 0;
967                     int w = s->width  >> h_shift;
968                     int h = s->height >> v_shift;
969                     uint8_t *src = pic_arg->data[i];
970                     uint8_t *dst = pic->f.data[i];
971
972                     if (!s->avctx->rc_buffer_size)
973                         dst += INPLACE_OFFSET;
974
975                     if (src_stride == dst_stride)
976                         memcpy(dst, src, src_stride * h);
977                     else {
978                         while (h--) {
979                             memcpy(dst, src, w);
980                             dst += dst_stride;
981                             src += src_stride;
982                         }
983                     }
984                 }
985             }
986         }
987         copy_picture_attributes(s, &pic->f, pic_arg);
988         pic->f.display_picture_number = display_picture_number;
989         pic->f.pts = pts; // we set this here to avoid modifiying pic_arg
990     }
991
992     /* shift buffer entries */
993     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
994         s->input_picture[i - 1] = s->input_picture[i];
995
996     s->input_picture[encoding_delay] = (Picture*) pic;
997
998     return 0;
999 }
1000
1001 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1002 {
1003     int x, y, plane;
1004     int score = 0;
1005     int64_t score64 = 0;
1006
1007     for (plane = 0; plane < 3; plane++) {
1008         const int stride = p->f.linesize[plane];
1009         const int bw = plane ? 1 : 2;
1010         for (y = 0; y < s->mb_height * bw; y++) {
1011             for (x = 0; x < s->mb_width * bw; x++) {
1012                 int off = p->shared ? 0 : 16;
1013                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1014                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1015                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1016
1017                 switch (s->avctx->frame_skip_exp) {
1018                 case 0: score    =  FFMAX(score, v);          break;
1019                 case 1: score   += FFABS(v);                  break;
1020                 case 2: score   += v * v;                     break;
1021                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1022                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1023                 }
1024             }
1025         }
1026     }
1027
1028     if (score)
1029         score64 = score;
1030
1031     if (score64 < s->avctx->frame_skip_threshold)
1032         return 1;
1033     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1034         return 1;
1035     return 0;
1036 }
1037
1038 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1039 {
1040     AVPacket pkt = { 0 };
1041     int ret, got_output;
1042
1043     av_init_packet(&pkt);
1044     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1045     if (ret < 0)
1046         return ret;
1047
1048     ret = pkt.size;
1049     av_free_packet(&pkt);
1050     return ret;
1051 }
1052
1053 static int estimate_best_b_count(MpegEncContext *s)
1054 {
1055     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1056     AVCodecContext *c = avcodec_alloc_context3(NULL);
1057     AVFrame input[FF_MAX_B_FRAMES + 2];
1058     const int scale = s->avctx->brd_scale;
1059     int i, j, out_size, p_lambda, b_lambda, lambda2;
1060     int64_t best_rd  = INT64_MAX;
1061     int best_b_count = -1;
1062
1063     assert(scale >= 0 && scale <= 3);
1064
1065     //emms_c();
1066     //s->next_picture_ptr->quality;
1067     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1068     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1069     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1070     if (!b_lambda) // FIXME we should do this somewhere else
1071         b_lambda = p_lambda;
1072     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1073                FF_LAMBDA_SHIFT;
1074
1075     c->width        = s->width  >> scale;
1076     c->height       = s->height >> scale;
1077     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1078                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1079     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1080     c->mb_decision  = s->avctx->mb_decision;
1081     c->me_cmp       = s->avctx->me_cmp;
1082     c->mb_cmp       = s->avctx->mb_cmp;
1083     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1084     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1085     c->time_base    = s->avctx->time_base;
1086     c->max_b_frames = s->max_b_frames;
1087
1088     if (avcodec_open2(c, codec, NULL) < 0)
1089         return -1;
1090
1091     for (i = 0; i < s->max_b_frames + 2; i++) {
1092         int ysize = c->width * c->height;
1093         int csize = (c->width / 2) * (c->height / 2);
1094         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1095                                                 s->next_picture_ptr;
1096
1097         avcodec_get_frame_defaults(&input[i]);
1098         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1099         input[i].data[1]     = input[i].data[0] + ysize;
1100         input[i].data[2]     = input[i].data[1] + csize;
1101         input[i].linesize[0] = c->width;
1102         input[i].linesize[1] =
1103         input[i].linesize[2] = c->width / 2;
1104
1105         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1106             pre_input = *pre_input_ptr;
1107
1108             if (!pre_input.shared && i) {
1109                 pre_input.f.data[0] += INPLACE_OFFSET;
1110                 pre_input.f.data[1] += INPLACE_OFFSET;
1111                 pre_input.f.data[2] += INPLACE_OFFSET;
1112             }
1113
1114             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1115                                  pre_input.f.data[0], pre_input.f.linesize[0],
1116                                  c->width,      c->height);
1117             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1118                                  pre_input.f.data[1], pre_input.f.linesize[1],
1119                                  c->width >> 1, c->height >> 1);
1120             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1121                                  pre_input.f.data[2], pre_input.f.linesize[2],
1122                                  c->width >> 1, c->height >> 1);
1123         }
1124     }
1125
1126     for (j = 0; j < s->max_b_frames + 1; j++) {
1127         int64_t rd = 0;
1128
1129         if (!s->input_picture[j])
1130             break;
1131
1132         c->error[0] = c->error[1] = c->error[2] = 0;
1133
1134         input[0].pict_type = AV_PICTURE_TYPE_I;
1135         input[0].quality   = 1 * FF_QP2LAMBDA;
1136
1137         out_size = encode_frame(c, &input[0]);
1138
1139         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1140
1141         for (i = 0; i < s->max_b_frames + 1; i++) {
1142             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1143
1144             input[i + 1].pict_type = is_p ?
1145                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1146             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1147
1148             out_size = encode_frame(c, &input[i + 1]);
1149
1150             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1151         }
1152
1153         /* get the delayed frames */
1154         while (out_size) {
1155             out_size = encode_frame(c, NULL);
1156             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1157         }
1158
1159         rd += c->error[0] + c->error[1] + c->error[2];
1160
1161         if (rd < best_rd) {
1162             best_rd = rd;
1163             best_b_count = j;
1164         }
1165     }
1166
1167     avcodec_close(c);
1168     av_freep(&c);
1169
1170     for (i = 0; i < s->max_b_frames + 2; i++) {
1171         av_freep(&input[i].data[0]);
1172     }
1173
1174     return best_b_count;
1175 }
1176
1177 static int select_input_picture(MpegEncContext *s)
1178 {
1179     int i, ret;
1180
1181     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1182         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1183     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1184
1185     /* set next picture type & ordering */
1186     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1187         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1188             s->next_picture_ptr == NULL || s->intra_only) {
1189             s->reordered_input_picture[0] = s->input_picture[0];
1190             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1191             s->reordered_input_picture[0]->f.coded_picture_number =
1192                 s->coded_picture_number++;
1193         } else {
1194             int b_frames;
1195
1196             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1197                 if (s->picture_in_gop_number < s->gop_size &&
1198                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1199                     // FIXME check that te gop check above is +-1 correct
1200                     av_frame_unref(&s->input_picture[0]->f);
1201
1202                     emms_c();
1203                     ff_vbv_update(s, 0);
1204
1205                     goto no_output_pic;
1206                 }
1207             }
1208
1209             if (s->flags & CODEC_FLAG_PASS2) {
1210                 for (i = 0; i < s->max_b_frames + 1; i++) {
1211                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1212
1213                     if (pict_num >= s->rc_context.num_entries)
1214                         break;
1215                     if (!s->input_picture[i]) {
1216                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1217                         break;
1218                     }
1219
1220                     s->input_picture[i]->f.pict_type =
1221                         s->rc_context.entry[pict_num].new_pict_type;
1222                 }
1223             }
1224
1225             if (s->avctx->b_frame_strategy == 0) {
1226                 b_frames = s->max_b_frames;
1227                 while (b_frames && !s->input_picture[b_frames])
1228                     b_frames--;
1229             } else if (s->avctx->b_frame_strategy == 1) {
1230                 for (i = 1; i < s->max_b_frames + 1; i++) {
1231                     if (s->input_picture[i] &&
1232                         s->input_picture[i]->b_frame_score == 0) {
1233                         s->input_picture[i]->b_frame_score =
1234                             get_intra_count(s,
1235                                             s->input_picture[i    ]->f.data[0],
1236                                             s->input_picture[i - 1]->f.data[0],
1237                                             s->linesize) + 1;
1238                     }
1239                 }
1240                 for (i = 0; i < s->max_b_frames + 1; i++) {
1241                     if (s->input_picture[i] == NULL ||
1242                         s->input_picture[i]->b_frame_score - 1 >
1243                             s->mb_num / s->avctx->b_sensitivity)
1244                         break;
1245                 }
1246
1247                 b_frames = FFMAX(0, i - 1);
1248
1249                 /* reset scores */
1250                 for (i = 0; i < b_frames + 1; i++) {
1251                     s->input_picture[i]->b_frame_score = 0;
1252                 }
1253             } else if (s->avctx->b_frame_strategy == 2) {
1254                 b_frames = estimate_best_b_count(s);
1255             } else {
1256                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1257                 b_frames = 0;
1258             }
1259
1260             emms_c();
1261
1262             for (i = b_frames - 1; i >= 0; i--) {
1263                 int type = s->input_picture[i]->f.pict_type;
1264                 if (type && type != AV_PICTURE_TYPE_B)
1265                     b_frames = i;
1266             }
1267             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1268                 b_frames == s->max_b_frames) {
1269                 av_log(s->avctx, AV_LOG_ERROR,
1270                        "warning, too many b frames in a row\n");
1271             }
1272
1273             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1274                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1275                     s->gop_size > s->picture_in_gop_number) {
1276                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1277                 } else {
1278                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1279                         b_frames = 0;
1280                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1281                 }
1282             }
1283
1284             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1285                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1286                 b_frames--;
1287
1288             s->reordered_input_picture[0] = s->input_picture[b_frames];
1289             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1290                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1291             s->reordered_input_picture[0]->f.coded_picture_number =
1292                 s->coded_picture_number++;
1293             for (i = 0; i < b_frames; i++) {
1294                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1295                 s->reordered_input_picture[i + 1]->f.pict_type =
1296                     AV_PICTURE_TYPE_B;
1297                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1298                     s->coded_picture_number++;
1299             }
1300         }
1301     }
1302 no_output_pic:
1303     if (s->reordered_input_picture[0]) {
1304         s->reordered_input_picture[0]->reference =
1305            s->reordered_input_picture[0]->f.pict_type !=
1306                AV_PICTURE_TYPE_B ? 3 : 0;
1307
1308         ff_mpeg_unref_picture(s, &s->new_picture);
1309         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1310             return ret;
1311
1312         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1313             // input is a shared pix, so we can't modifiy it -> alloc a new
1314             // one & ensure that the shared one is reuseable
1315
1316             Picture *pic;
1317             int i = ff_find_unused_picture(s, 0);
1318             if (i < 0)
1319                 return i;
1320             pic = &s->picture[i];
1321
1322             pic->reference = s->reordered_input_picture[0]->reference;
1323             if (ff_alloc_picture(s, pic, 0) < 0) {
1324                 return -1;
1325             }
1326
1327             copy_picture_attributes(s, &pic->f,
1328                                     &s->reordered_input_picture[0]->f);
1329
1330             /* mark us unused / free shared pic */
1331             av_frame_unref(&s->reordered_input_picture[0]->f);
1332             s->reordered_input_picture[0]->shared = 0;
1333
1334             s->current_picture_ptr = pic;
1335         } else {
1336             // input is not a shared pix -> reuse buffer for current_pix
1337             s->current_picture_ptr = s->reordered_input_picture[0];
1338             for (i = 0; i < 4; i++) {
1339                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1340             }
1341         }
1342         ff_mpeg_unref_picture(s, &s->current_picture);
1343         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1344                                        s->current_picture_ptr)) < 0)
1345             return ret;
1346
1347         s->picture_number = s->new_picture.f.display_picture_number;
1348     } else {
1349         ff_mpeg_unref_picture(s, &s->new_picture);
1350     }
1351     return 0;
1352 }
1353
1354 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1355                           const AVFrame *pic_arg, int *got_packet)
1356 {
1357     MpegEncContext *s = avctx->priv_data;
1358     int i, stuffing_count, ret;
1359     int context_count = s->slice_context_count;
1360
1361     s->picture_in_gop_number++;
1362
1363     if (load_input_picture(s, pic_arg) < 0)
1364         return -1;
1365
1366     if (select_input_picture(s) < 0) {
1367         return -1;
1368     }
1369
1370     /* output? */
1371     if (s->new_picture.f.data[0]) {
1372         if (!pkt->data &&
1373             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1374             return ret;
1375         if (s->mb_info) {
1376             s->mb_info_ptr = av_packet_new_side_data(pkt,
1377                                  AV_PKT_DATA_H263_MB_INFO,
1378                                  s->mb_width*s->mb_height*12);
1379             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1380         }
1381
1382         for (i = 0; i < context_count; i++) {
1383             int start_y = s->thread_context[i]->start_mb_y;
1384             int   end_y = s->thread_context[i]->  end_mb_y;
1385             int h       = s->mb_height;
1386             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1387             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1388
1389             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1390         }
1391
1392         s->pict_type = s->new_picture.f.pict_type;
1393         //emms_c();
1394         ff_MPV_frame_start(s, avctx);
1395 vbv_retry:
1396         if (encode_picture(s, s->picture_number) < 0)
1397             return -1;
1398
1399         avctx->header_bits = s->header_bits;
1400         avctx->mv_bits     = s->mv_bits;
1401         avctx->misc_bits   = s->misc_bits;
1402         avctx->i_tex_bits  = s->i_tex_bits;
1403         avctx->p_tex_bits  = s->p_tex_bits;
1404         avctx->i_count     = s->i_count;
1405         // FIXME f/b_count in avctx
1406         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1407         avctx->skip_count  = s->skip_count;
1408
1409         ff_MPV_frame_end(s);
1410
1411         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1412             ff_mjpeg_encode_picture_trailer(s);
1413
1414         if (avctx->rc_buffer_size) {
1415             RateControlContext *rcc = &s->rc_context;
1416             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1417
1418             if (put_bits_count(&s->pb) > max_size &&
1419                 s->lambda < s->avctx->lmax) {
1420                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1421                                        (s->qscale + 1) / s->qscale);
1422                 if (s->adaptive_quant) {
1423                     int i;
1424                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1425                         s->lambda_table[i] =
1426                             FFMAX(s->lambda_table[i] + 1,
1427                                   s->lambda_table[i] * (s->qscale + 1) /
1428                                   s->qscale);
1429                 }
1430                 s->mb_skipped = 0;        // done in MPV_frame_start()
1431                 // done in encode_picture() so we must undo it
1432                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1433                     if (s->flipflop_rounding          ||
1434                         s->codec_id == AV_CODEC_ID_H263P ||
1435                         s->codec_id == AV_CODEC_ID_MPEG4)
1436                         s->no_rounding ^= 1;
1437                 }
1438                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1439                     s->time_base       = s->last_time_base;
1440                     s->last_non_b_time = s->time - s->pp_time;
1441                 }
1442                 for (i = 0; i < context_count; i++) {
1443                     PutBitContext *pb = &s->thread_context[i]->pb;
1444                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1445                 }
1446                 goto vbv_retry;
1447             }
1448
1449             assert(s->avctx->rc_max_rate);
1450         }
1451
1452         if (s->flags & CODEC_FLAG_PASS1)
1453             ff_write_pass1_stats(s);
1454
1455         for (i = 0; i < 4; i++) {
1456             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1457             avctx->error[i] += s->current_picture_ptr->f.error[i];
1458         }
1459
1460         if (s->flags & CODEC_FLAG_PASS1)
1461             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1462                    avctx->i_tex_bits + avctx->p_tex_bits ==
1463                        put_bits_count(&s->pb));
1464         flush_put_bits(&s->pb);
1465         s->frame_bits  = put_bits_count(&s->pb);
1466
1467         stuffing_count = ff_vbv_update(s, s->frame_bits);
1468         if (stuffing_count) {
1469             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1470                     stuffing_count + 50) {
1471                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1472                 return -1;
1473             }
1474
1475             switch (s->codec_id) {
1476             case AV_CODEC_ID_MPEG1VIDEO:
1477             case AV_CODEC_ID_MPEG2VIDEO:
1478                 while (stuffing_count--) {
1479                     put_bits(&s->pb, 8, 0);
1480                 }
1481             break;
1482             case AV_CODEC_ID_MPEG4:
1483                 put_bits(&s->pb, 16, 0);
1484                 put_bits(&s->pb, 16, 0x1C3);
1485                 stuffing_count -= 4;
1486                 while (stuffing_count--) {
1487                     put_bits(&s->pb, 8, 0xFF);
1488                 }
1489             break;
1490             default:
1491                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1492             }
1493             flush_put_bits(&s->pb);
1494             s->frame_bits  = put_bits_count(&s->pb);
1495         }
1496
1497         /* update mpeg1/2 vbv_delay for CBR */
1498         if (s->avctx->rc_max_rate                          &&
1499             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1500             s->out_format == FMT_MPEG1                     &&
1501             90000LL * (avctx->rc_buffer_size - 1) <=
1502                 s->avctx->rc_max_rate * 0xFFFFLL) {
1503             int vbv_delay, min_delay;
1504             double inbits  = s->avctx->rc_max_rate *
1505                              av_q2d(s->avctx->time_base);
1506             int    minbits = s->frame_bits - 8 *
1507                              (s->vbv_delay_ptr - s->pb.buf - 1);
1508             double bits    = s->rc_context.buffer_index + minbits - inbits;
1509
1510             if (bits < 0)
1511                 av_log(s->avctx, AV_LOG_ERROR,
1512                        "Internal error, negative bits\n");
1513
1514             assert(s->repeat_first_field == 0);
1515
1516             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1517             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1518                         s->avctx->rc_max_rate;
1519
1520             vbv_delay = FFMAX(vbv_delay, min_delay);
1521
1522             assert(vbv_delay < 0xFFFF);
1523
1524             s->vbv_delay_ptr[0] &= 0xF8;
1525             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1526             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1527             s->vbv_delay_ptr[2] &= 0x07;
1528             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1529             avctx->vbv_delay     = vbv_delay * 300;
1530         }
1531         s->total_bits     += s->frame_bits;
1532         avctx->frame_bits  = s->frame_bits;
1533
1534         pkt->pts = s->current_picture.f.pts;
1535         if (!s->low_delay) {
1536             if (!s->current_picture.f.coded_picture_number)
1537                 pkt->dts = pkt->pts - s->dts_delta;
1538             else
1539                 pkt->dts = s->reordered_pts;
1540             s->reordered_pts = s->input_picture[0]->f.pts;
1541         } else
1542             pkt->dts = pkt->pts;
1543         if (s->current_picture.f.key_frame)
1544             pkt->flags |= AV_PKT_FLAG_KEY;
1545         if (s->mb_info)
1546             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1547     } else {
1548         s->frame_bits = 0;
1549     }
1550     assert((s->frame_bits & 7) == 0);
1551
1552     pkt->size = s->frame_bits / 8;
1553     *got_packet = !!pkt->size;
1554     return 0;
1555 }
1556
1557 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1558                                                 int n, int threshold)
1559 {
1560     static const char tab[64] = {
1561         3, 2, 2, 1, 1, 1, 1, 1,
1562         1, 1, 1, 1, 1, 1, 1, 1,
1563         1, 1, 1, 1, 1, 1, 1, 1,
1564         0, 0, 0, 0, 0, 0, 0, 0,
1565         0, 0, 0, 0, 0, 0, 0, 0,
1566         0, 0, 0, 0, 0, 0, 0, 0,
1567         0, 0, 0, 0, 0, 0, 0, 0,
1568         0, 0, 0, 0, 0, 0, 0, 0
1569     };
1570     int score = 0;
1571     int run = 0;
1572     int i;
1573     int16_t *block = s->block[n];
1574     const int last_index = s->block_last_index[n];
1575     int skip_dc;
1576
1577     if (threshold < 0) {
1578         skip_dc = 0;
1579         threshold = -threshold;
1580     } else
1581         skip_dc = 1;
1582
1583     /* Are all we could set to zero already zero? */
1584     if (last_index <= skip_dc - 1)
1585         return;
1586
1587     for (i = 0; i <= last_index; i++) {
1588         const int j = s->intra_scantable.permutated[i];
1589         const int level = FFABS(block[j]);
1590         if (level == 1) {
1591             if (skip_dc && i == 0)
1592                 continue;
1593             score += tab[run];
1594             run = 0;
1595         } else if (level > 1) {
1596             return;
1597         } else {
1598             run++;
1599         }
1600     }
1601     if (score >= threshold)
1602         return;
1603     for (i = skip_dc; i <= last_index; i++) {
1604         const int j = s->intra_scantable.permutated[i];
1605         block[j] = 0;
1606     }
1607     if (block[0])
1608         s->block_last_index[n] = 0;
1609     else
1610         s->block_last_index[n] = -1;
1611 }
1612
1613 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1614                                int last_index)
1615 {
1616     int i;
1617     const int maxlevel = s->max_qcoeff;
1618     const int minlevel = s->min_qcoeff;
1619     int overflow = 0;
1620
1621     if (s->mb_intra) {
1622         i = 1; // skip clipping of intra dc
1623     } else
1624         i = 0;
1625
1626     for (; i <= last_index; i++) {
1627         const int j = s->intra_scantable.permutated[i];
1628         int level = block[j];
1629
1630         if (level > maxlevel) {
1631             level = maxlevel;
1632             overflow++;
1633         } else if (level < minlevel) {
1634             level = minlevel;
1635             overflow++;
1636         }
1637
1638         block[j] = level;
1639     }
1640
1641     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1642         av_log(s->avctx, AV_LOG_INFO,
1643                "warning, clipping %d dct coefficients to %d..%d\n",
1644                overflow, minlevel, maxlevel);
1645 }
1646
1647 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1648 {
1649     int x, y;
1650     // FIXME optimize
1651     for (y = 0; y < 8; y++) {
1652         for (x = 0; x < 8; x++) {
1653             int x2, y2;
1654             int sum = 0;
1655             int sqr = 0;
1656             int count = 0;
1657
1658             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1659                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1660                     int v = ptr[x2 + y2 * stride];
1661                     sum += v;
1662                     sqr += v * v;
1663                     count++;
1664                 }
1665             }
1666             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1667         }
1668     }
1669 }
1670
1671 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1672                                                 int motion_x, int motion_y,
1673                                                 int mb_block_height,
1674                                                 int mb_block_count)
1675 {
1676     int16_t weight[8][64];
1677     int16_t orig[8][64];
1678     const int mb_x = s->mb_x;
1679     const int mb_y = s->mb_y;
1680     int i;
1681     int skip_dct[8];
1682     int dct_offset = s->linesize * 8; // default for progressive frames
1683     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1684     int wrap_y, wrap_c;
1685
1686     for (i = 0; i < mb_block_count; i++)
1687         skip_dct[i] = s->skipdct;
1688
1689     if (s->adaptive_quant) {
1690         const int last_qp = s->qscale;
1691         const int mb_xy = mb_x + mb_y * s->mb_stride;
1692
1693         s->lambda = s->lambda_table[mb_xy];
1694         update_qscale(s);
1695
1696         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1697             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1698             s->dquant = s->qscale - last_qp;
1699
1700             if (s->out_format == FMT_H263) {
1701                 s->dquant = av_clip(s->dquant, -2, 2);
1702
1703                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1704                     if (!s->mb_intra) {
1705                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1706                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1707                                 s->dquant = 0;
1708                         }
1709                         if (s->mv_type == MV_TYPE_8X8)
1710                             s->dquant = 0;
1711                     }
1712                 }
1713             }
1714         }
1715         ff_set_qscale(s, last_qp + s->dquant);
1716     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1717         ff_set_qscale(s, s->qscale + s->dquant);
1718
1719     wrap_y = s->linesize;
1720     wrap_c = s->uvlinesize;
1721     ptr_y  = s->new_picture.f.data[0] +
1722              (mb_y * 16 * wrap_y)              + mb_x * 16;
1723     ptr_cb = s->new_picture.f.data[1] +
1724              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1725     ptr_cr = s->new_picture.f.data[2] +
1726              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1727
1728     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1729         uint8_t *ebuf = s->edge_emu_buffer + 32;
1730         s->vdsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1731                                  mb_y * 16, s->width, s->height);
1732         ptr_y = ebuf;
1733         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1734                                  mb_block_height, mb_x * 8, mb_y * 8,
1735                                  s->width >> 1, s->height >> 1);
1736         ptr_cb = ebuf + 18 * wrap_y;
1737         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1738                                  mb_block_height, mb_x * 8, mb_y * 8,
1739                                  s->width >> 1, s->height >> 1);
1740         ptr_cr = ebuf + 18 * wrap_y + 8;
1741     }
1742
1743     if (s->mb_intra) {
1744         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1745             int progressive_score, interlaced_score;
1746
1747             s->interlaced_dct = 0;
1748             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1749                                                     NULL, wrap_y, 8) +
1750                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1751                                                     NULL, wrap_y, 8) - 400;
1752
1753             if (progressive_score > 0) {
1754                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1755                                                        NULL, wrap_y * 2, 8) +
1756                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1757                                                        NULL, wrap_y * 2, 8);
1758                 if (progressive_score > interlaced_score) {
1759                     s->interlaced_dct = 1;
1760
1761                     dct_offset = wrap_y;
1762                     wrap_y <<= 1;
1763                     if (s->chroma_format == CHROMA_422)
1764                         wrap_c <<= 1;
1765                 }
1766             }
1767         }
1768
1769         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1770         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1771         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1772         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1773
1774         if (s->flags & CODEC_FLAG_GRAY) {
1775             skip_dct[4] = 1;
1776             skip_dct[5] = 1;
1777         } else {
1778             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1779             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1780             if (!s->chroma_y_shift) { /* 422 */
1781                 s->dsp.get_pixels(s->block[6],
1782                                   ptr_cb + (dct_offset >> 1), wrap_c);
1783                 s->dsp.get_pixels(s->block[7],
1784                                   ptr_cr + (dct_offset >> 1), wrap_c);
1785             }
1786         }
1787     } else {
1788         op_pixels_func (*op_pix)[4];
1789         qpel_mc_func (*op_qpix)[16];
1790         uint8_t *dest_y, *dest_cb, *dest_cr;
1791
1792         dest_y  = s->dest[0];
1793         dest_cb = s->dest[1];
1794         dest_cr = s->dest[2];
1795
1796         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1797             op_pix  = s->dsp.put_pixels_tab;
1798             op_qpix = s->dsp.put_qpel_pixels_tab;
1799         } else {
1800             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1801             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1802         }
1803
1804         if (s->mv_dir & MV_DIR_FORWARD) {
1805             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1806                           s->last_picture.f.data,
1807                           op_pix, op_qpix);
1808             op_pix  = s->dsp.avg_pixels_tab;
1809             op_qpix = s->dsp.avg_qpel_pixels_tab;
1810         }
1811         if (s->mv_dir & MV_DIR_BACKWARD) {
1812             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1813                           s->next_picture.f.data,
1814                           op_pix, op_qpix);
1815         }
1816
1817         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1818             int progressive_score, interlaced_score;
1819
1820             s->interlaced_dct = 0;
1821             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1822                                                     ptr_y,              wrap_y,
1823                                                     8) +
1824                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1825                                                     ptr_y + wrap_y * 8, wrap_y,
1826                                                     8) - 400;
1827
1828             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1829                 progressive_score -= 400;
1830
1831             if (progressive_score > 0) {
1832                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1833                                                        ptr_y,
1834                                                        wrap_y * 2, 8) +
1835                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1836                                                        ptr_y + wrap_y,
1837                                                        wrap_y * 2, 8);
1838
1839                 if (progressive_score > interlaced_score) {
1840                     s->interlaced_dct = 1;
1841
1842                     dct_offset = wrap_y;
1843                     wrap_y <<= 1;
1844                     if (s->chroma_format == CHROMA_422)
1845                         wrap_c <<= 1;
1846                 }
1847             }
1848         }
1849
1850         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1851         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1852         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1853                            dest_y + dct_offset, wrap_y);
1854         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1855                            dest_y + dct_offset + 8, wrap_y);
1856
1857         if (s->flags & CODEC_FLAG_GRAY) {
1858             skip_dct[4] = 1;
1859             skip_dct[5] = 1;
1860         } else {
1861             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1862             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1863             if (!s->chroma_y_shift) { /* 422 */
1864                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1865                                    dest_cb + (dct_offset >> 1), wrap_c);
1866                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1867                                    dest_cr + (dct_offset >> 1), wrap_c);
1868             }
1869         }
1870         /* pre quantization */
1871         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1872                 2 * s->qscale * s->qscale) {
1873             // FIXME optimize
1874             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1875                               wrap_y, 8) < 20 * s->qscale)
1876                 skip_dct[0] = 1;
1877             if (s->dsp.sad[1](NULL, ptr_y + 8,
1878                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1879                 skip_dct[1] = 1;
1880             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1881                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1882                 skip_dct[2] = 1;
1883             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1884                               dest_y + dct_offset + 8,
1885                               wrap_y, 8) < 20 * s->qscale)
1886                 skip_dct[3] = 1;
1887             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1888                               wrap_c, 8) < 20 * s->qscale)
1889                 skip_dct[4] = 1;
1890             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1891                               wrap_c, 8) < 20 * s->qscale)
1892                 skip_dct[5] = 1;
1893             if (!s->chroma_y_shift) { /* 422 */
1894                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1895                                   dest_cb + (dct_offset >> 1),
1896                                   wrap_c, 8) < 20 * s->qscale)
1897                     skip_dct[6] = 1;
1898                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1899                                   dest_cr + (dct_offset >> 1),
1900                                   wrap_c, 8) < 20 * s->qscale)
1901                     skip_dct[7] = 1;
1902             }
1903         }
1904     }
1905
1906     if (s->quantizer_noise_shaping) {
1907         if (!skip_dct[0])
1908             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1909         if (!skip_dct[1])
1910             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1911         if (!skip_dct[2])
1912             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1913         if (!skip_dct[3])
1914             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1915         if (!skip_dct[4])
1916             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1917         if (!skip_dct[5])
1918             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1919         if (!s->chroma_y_shift) { /* 422 */
1920             if (!skip_dct[6])
1921                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1922                                   wrap_c);
1923             if (!skip_dct[7])
1924                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1925                                   wrap_c);
1926         }
1927         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
1928     }
1929
1930     /* DCT & quantize */
1931     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1932     {
1933         for (i = 0; i < mb_block_count; i++) {
1934             if (!skip_dct[i]) {
1935                 int overflow;
1936                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1937                 // FIXME we could decide to change to quantizer instead of
1938                 // clipping
1939                 // JS: I don't think that would be a good idea it could lower
1940                 //     quality instead of improve it. Just INTRADC clipping
1941                 //     deserves changes in quantizer
1942                 if (overflow)
1943                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
1944             } else
1945                 s->block_last_index[i] = -1;
1946         }
1947         if (s->quantizer_noise_shaping) {
1948             for (i = 0; i < mb_block_count; i++) {
1949                 if (!skip_dct[i]) {
1950                     s->block_last_index[i] =
1951                         dct_quantize_refine(s, s->block[i], weight[i],
1952                                             orig[i], i, s->qscale);
1953                 }
1954             }
1955         }
1956
1957         if (s->luma_elim_threshold && !s->mb_intra)
1958             for (i = 0; i < 4; i++)
1959                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
1960         if (s->chroma_elim_threshold && !s->mb_intra)
1961             for (i = 4; i < mb_block_count; i++)
1962                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
1963
1964         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
1965             for (i = 0; i < mb_block_count; i++) {
1966                 if (s->block_last_index[i] == -1)
1967                     s->coded_score[i] = INT_MAX / 256;
1968             }
1969         }
1970     }
1971
1972     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
1973         s->block_last_index[4] =
1974         s->block_last_index[5] = 0;
1975         s->block[4][0] =
1976         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
1977     }
1978
1979     // non c quantize code returns incorrect block_last_index FIXME
1980     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
1981         for (i = 0; i < mb_block_count; i++) {
1982             int j;
1983             if (s->block_last_index[i] > 0) {
1984                 for (j = 63; j > 0; j--) {
1985                     if (s->block[i][s->intra_scantable.permutated[j]])
1986                         break;
1987                 }
1988                 s->block_last_index[i] = j;
1989             }
1990         }
1991     }
1992
1993     /* huffman encode */
1994     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
1995     case AV_CODEC_ID_MPEG1VIDEO:
1996     case AV_CODEC_ID_MPEG2VIDEO:
1997         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
1998             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
1999         break;
2000     case AV_CODEC_ID_MPEG4:
2001         if (CONFIG_MPEG4_ENCODER)
2002             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2003         break;
2004     case AV_CODEC_ID_MSMPEG4V2:
2005     case AV_CODEC_ID_MSMPEG4V3:
2006     case AV_CODEC_ID_WMV1:
2007         if (CONFIG_MSMPEG4_ENCODER)
2008             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2009         break;
2010     case AV_CODEC_ID_WMV2:
2011         if (CONFIG_WMV2_ENCODER)
2012             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2013         break;
2014     case AV_CODEC_ID_H261:
2015         if (CONFIG_H261_ENCODER)
2016             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2017         break;
2018     case AV_CODEC_ID_H263:
2019     case AV_CODEC_ID_H263P:
2020     case AV_CODEC_ID_FLV1:
2021     case AV_CODEC_ID_RV10:
2022     case AV_CODEC_ID_RV20:
2023         if (CONFIG_H263_ENCODER)
2024             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2025         break;
2026     case AV_CODEC_ID_MJPEG:
2027         if (CONFIG_MJPEG_ENCODER)
2028             ff_mjpeg_encode_mb(s, s->block);
2029         break;
2030     default:
2031         assert(0);
2032     }
2033 }
2034
2035 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2036 {
2037     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2038     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2039 }
2040
2041 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2042     int i;
2043
2044     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2045
2046     /* mpeg1 */
2047     d->mb_skip_run= s->mb_skip_run;
2048     for(i=0; i<3; i++)
2049         d->last_dc[i] = s->last_dc[i];
2050
2051     /* statistics */
2052     d->mv_bits= s->mv_bits;
2053     d->i_tex_bits= s->i_tex_bits;
2054     d->p_tex_bits= s->p_tex_bits;
2055     d->i_count= s->i_count;
2056     d->f_count= s->f_count;
2057     d->b_count= s->b_count;
2058     d->skip_count= s->skip_count;
2059     d->misc_bits= s->misc_bits;
2060     d->last_bits= 0;
2061
2062     d->mb_skipped= 0;
2063     d->qscale= s->qscale;
2064     d->dquant= s->dquant;
2065
2066     d->esc3_level_length= s->esc3_level_length;
2067 }
2068
2069 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2070     int i;
2071
2072     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2073     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2074
2075     /* mpeg1 */
2076     d->mb_skip_run= s->mb_skip_run;
2077     for(i=0; i<3; i++)
2078         d->last_dc[i] = s->last_dc[i];
2079
2080     /* statistics */
2081     d->mv_bits= s->mv_bits;
2082     d->i_tex_bits= s->i_tex_bits;
2083     d->p_tex_bits= s->p_tex_bits;
2084     d->i_count= s->i_count;
2085     d->f_count= s->f_count;
2086     d->b_count= s->b_count;
2087     d->skip_count= s->skip_count;
2088     d->misc_bits= s->misc_bits;
2089
2090     d->mb_intra= s->mb_intra;
2091     d->mb_skipped= s->mb_skipped;
2092     d->mv_type= s->mv_type;
2093     d->mv_dir= s->mv_dir;
2094     d->pb= s->pb;
2095     if(s->data_partitioning){
2096         d->pb2= s->pb2;
2097         d->tex_pb= s->tex_pb;
2098     }
2099     d->block= s->block;
2100     for(i=0; i<8; i++)
2101         d->block_last_index[i]= s->block_last_index[i];
2102     d->interlaced_dct= s->interlaced_dct;
2103     d->qscale= s->qscale;
2104
2105     d->esc3_level_length= s->esc3_level_length;
2106 }
2107
2108 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2109                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2110                            int *dmin, int *next_block, int motion_x, int motion_y)
2111 {
2112     int score;
2113     uint8_t *dest_backup[3];
2114
2115     copy_context_before_encode(s, backup, type);
2116
2117     s->block= s->blocks[*next_block];
2118     s->pb= pb[*next_block];
2119     if(s->data_partitioning){
2120         s->pb2   = pb2   [*next_block];
2121         s->tex_pb= tex_pb[*next_block];
2122     }
2123
2124     if(*next_block){
2125         memcpy(dest_backup, s->dest, sizeof(s->dest));
2126         s->dest[0] = s->rd_scratchpad;
2127         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2128         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2129         assert(s->linesize >= 32); //FIXME
2130     }
2131
2132     encode_mb(s, motion_x, motion_y);
2133
2134     score= put_bits_count(&s->pb);
2135     if(s->data_partitioning){
2136         score+= put_bits_count(&s->pb2);
2137         score+= put_bits_count(&s->tex_pb);
2138     }
2139
2140     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2141         ff_MPV_decode_mb(s, s->block);
2142
2143         score *= s->lambda2;
2144         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2145     }
2146
2147     if(*next_block){
2148         memcpy(s->dest, dest_backup, sizeof(s->dest));
2149     }
2150
2151     if(score<*dmin){
2152         *dmin= score;
2153         *next_block^=1;
2154
2155         copy_context_after_encode(best, s, type);
2156     }
2157 }
2158
2159 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2160     uint32_t *sq = ff_squareTbl + 256;
2161     int acc=0;
2162     int x,y;
2163
2164     if(w==16 && h==16)
2165         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2166     else if(w==8 && h==8)
2167         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2168
2169     for(y=0; y<h; y++){
2170         for(x=0; x<w; x++){
2171             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2172         }
2173     }
2174
2175     assert(acc>=0);
2176
2177     return acc;
2178 }
2179
2180 static int sse_mb(MpegEncContext *s){
2181     int w= 16;
2182     int h= 16;
2183
2184     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2185     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2186
2187     if(w==16 && h==16)
2188       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2189         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2190                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2191                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2192       }else{
2193         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2194                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2195                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2196       }
2197     else
2198         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2199                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2200                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2201 }
2202
2203 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2204     MpegEncContext *s= *(void**)arg;
2205
2206
2207     s->me.pre_pass=1;
2208     s->me.dia_size= s->avctx->pre_dia_size;
2209     s->first_slice_line=1;
2210     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2211         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2212             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2213         }
2214         s->first_slice_line=0;
2215     }
2216
2217     s->me.pre_pass=0;
2218
2219     return 0;
2220 }
2221
2222 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2223     MpegEncContext *s= *(void**)arg;
2224
2225     ff_check_alignment();
2226
2227     s->me.dia_size= s->avctx->dia_size;
2228     s->first_slice_line=1;
2229     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2230         s->mb_x=0; //for block init below
2231         ff_init_block_index(s);
2232         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2233             s->block_index[0]+=2;
2234             s->block_index[1]+=2;
2235             s->block_index[2]+=2;
2236             s->block_index[3]+=2;
2237
2238             /* compute motion vector & mb_type and store in context */
2239             if(s->pict_type==AV_PICTURE_TYPE_B)
2240                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2241             else
2242                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2243         }
2244         s->first_slice_line=0;
2245     }
2246     return 0;
2247 }
2248
2249 static int mb_var_thread(AVCodecContext *c, void *arg){
2250     MpegEncContext *s= *(void**)arg;
2251     int mb_x, mb_y;
2252
2253     ff_check_alignment();
2254
2255     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2256         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2257             int xx = mb_x * 16;
2258             int yy = mb_y * 16;
2259             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2260             int varc;
2261             int sum = s->dsp.pix_sum(pix, s->linesize);
2262
2263             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2264
2265             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2266             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2267             s->me.mb_var_sum_temp    += varc;
2268         }
2269     }
2270     return 0;
2271 }
2272
2273 static void write_slice_end(MpegEncContext *s){
2274     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2275         if(s->partitioned_frame){
2276             ff_mpeg4_merge_partitions(s);
2277         }
2278
2279         ff_mpeg4_stuffing(&s->pb);
2280     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2281         ff_mjpeg_encode_stuffing(&s->pb);
2282     }
2283
2284     avpriv_align_put_bits(&s->pb);
2285     flush_put_bits(&s->pb);
2286
2287     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2288         s->misc_bits+= get_bits_diff(s);
2289 }
2290
2291 static void write_mb_info(MpegEncContext *s)
2292 {
2293     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2294     int offset = put_bits_count(&s->pb);
2295     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2296     int gobn = s->mb_y / s->gob_index;
2297     int pred_x, pred_y;
2298     if (CONFIG_H263_ENCODER)
2299         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2300     bytestream_put_le32(&ptr, offset);
2301     bytestream_put_byte(&ptr, s->qscale);
2302     bytestream_put_byte(&ptr, gobn);
2303     bytestream_put_le16(&ptr, mba);
2304     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2305     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2306     /* 4MV not implemented */
2307     bytestream_put_byte(&ptr, 0); /* hmv2 */
2308     bytestream_put_byte(&ptr, 0); /* vmv2 */
2309 }
2310
2311 static void update_mb_info(MpegEncContext *s, int startcode)
2312 {
2313     if (!s->mb_info)
2314         return;
2315     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2316         s->mb_info_size += 12;
2317         s->prev_mb_info = s->last_mb_info;
2318     }
2319     if (startcode) {
2320         s->prev_mb_info = put_bits_count(&s->pb)/8;
2321         /* This might have incremented mb_info_size above, and we return without
2322          * actually writing any info into that slot yet. But in that case,
2323          * this will be called again at the start of the after writing the
2324          * start code, actually writing the mb info. */
2325         return;
2326     }
2327
2328     s->last_mb_info = put_bits_count(&s->pb)/8;
2329     if (!s->mb_info_size)
2330         s->mb_info_size += 12;
2331     write_mb_info(s);
2332 }
2333
2334 static int encode_thread(AVCodecContext *c, void *arg){
2335     MpegEncContext *s= *(void**)arg;
2336     int mb_x, mb_y, pdif = 0;
2337     int chr_h= 16>>s->chroma_y_shift;
2338     int i, j;
2339     MpegEncContext best_s, backup_s;
2340     uint8_t bit_buf[2][MAX_MB_BYTES];
2341     uint8_t bit_buf2[2][MAX_MB_BYTES];
2342     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2343     PutBitContext pb[2], pb2[2], tex_pb[2];
2344
2345     ff_check_alignment();
2346
2347     for(i=0; i<2; i++){
2348         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2349         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2350         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2351     }
2352
2353     s->last_bits= put_bits_count(&s->pb);
2354     s->mv_bits=0;
2355     s->misc_bits=0;
2356     s->i_tex_bits=0;
2357     s->p_tex_bits=0;
2358     s->i_count=0;
2359     s->f_count=0;
2360     s->b_count=0;
2361     s->skip_count=0;
2362
2363     for(i=0; i<3; i++){
2364         /* init last dc values */
2365         /* note: quant matrix value (8) is implied here */
2366         s->last_dc[i] = 128 << s->intra_dc_precision;
2367
2368         s->current_picture.f.error[i] = 0;
2369     }
2370     s->mb_skip_run = 0;
2371     memset(s->last_mv, 0, sizeof(s->last_mv));
2372
2373     s->last_mv_dir = 0;
2374
2375     switch(s->codec_id){
2376     case AV_CODEC_ID_H263:
2377     case AV_CODEC_ID_H263P:
2378     case AV_CODEC_ID_FLV1:
2379         if (CONFIG_H263_ENCODER)
2380             s->gob_index = ff_h263_get_gob_height(s);
2381         break;
2382     case AV_CODEC_ID_MPEG4:
2383         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2384             ff_mpeg4_init_partitions(s);
2385         break;
2386     }
2387
2388     s->resync_mb_x=0;
2389     s->resync_mb_y=0;
2390     s->first_slice_line = 1;
2391     s->ptr_lastgob = s->pb.buf;
2392     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2393         s->mb_x=0;
2394         s->mb_y= mb_y;
2395
2396         ff_set_qscale(s, s->qscale);
2397         ff_init_block_index(s);
2398
2399         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2400             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2401             int mb_type= s->mb_type[xy];
2402 //            int d;
2403             int dmin= INT_MAX;
2404             int dir;
2405
2406             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2407                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2408                 return -1;
2409             }
2410             if(s->data_partitioning){
2411                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2412                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2413                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2414                     return -1;
2415                 }
2416             }
2417
2418             s->mb_x = mb_x;
2419             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2420             ff_update_block_index(s);
2421
2422             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2423                 ff_h261_reorder_mb_index(s);
2424                 xy= s->mb_y*s->mb_stride + s->mb_x;
2425                 mb_type= s->mb_type[xy];
2426             }
2427
2428             /* write gob / video packet header  */
2429             if(s->rtp_mode){
2430                 int current_packet_size, is_gob_start;
2431
2432                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2433
2434                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2435
2436                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2437
2438                 switch(s->codec_id){
2439                 case AV_CODEC_ID_H263:
2440                 case AV_CODEC_ID_H263P:
2441                     if(!s->h263_slice_structured)
2442                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2443                     break;
2444                 case AV_CODEC_ID_MPEG2VIDEO:
2445                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2446                 case AV_CODEC_ID_MPEG1VIDEO:
2447                     if(s->mb_skip_run) is_gob_start=0;
2448                     break;
2449                 }
2450
2451                 if(is_gob_start){
2452                     if(s->start_mb_y != mb_y || mb_x!=0){
2453                         write_slice_end(s);
2454
2455                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2456                             ff_mpeg4_init_partitions(s);
2457                         }
2458                     }
2459
2460                     assert((put_bits_count(&s->pb)&7) == 0);
2461                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2462
2463                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2464                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2465                         int d= 100 / s->avctx->error_rate;
2466                         if(r % d == 0){
2467                             current_packet_size=0;
2468                             s->pb.buf_ptr= s->ptr_lastgob;
2469                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2470                         }
2471                     }
2472
2473                     if (s->avctx->rtp_callback){
2474                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2475                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2476                     }
2477                     update_mb_info(s, 1);
2478
2479                     switch(s->codec_id){
2480                     case AV_CODEC_ID_MPEG4:
2481                         if (CONFIG_MPEG4_ENCODER) {
2482                             ff_mpeg4_encode_video_packet_header(s);
2483                             ff_mpeg4_clean_buffers(s);
2484                         }
2485                     break;
2486                     case AV_CODEC_ID_MPEG1VIDEO:
2487                     case AV_CODEC_ID_MPEG2VIDEO:
2488                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2489                             ff_mpeg1_encode_slice_header(s);
2490                             ff_mpeg1_clean_buffers(s);
2491                         }
2492                     break;
2493                     case AV_CODEC_ID_H263:
2494                     case AV_CODEC_ID_H263P:
2495                         if (CONFIG_H263_ENCODER)
2496                             ff_h263_encode_gob_header(s, mb_y);
2497                     break;
2498                     }
2499
2500                     if(s->flags&CODEC_FLAG_PASS1){
2501                         int bits= put_bits_count(&s->pb);
2502                         s->misc_bits+= bits - s->last_bits;
2503                         s->last_bits= bits;
2504                     }
2505
2506                     s->ptr_lastgob += current_packet_size;
2507                     s->first_slice_line=1;
2508                     s->resync_mb_x=mb_x;
2509                     s->resync_mb_y=mb_y;
2510                 }
2511             }
2512
2513             if(  (s->resync_mb_x   == s->mb_x)
2514                && s->resync_mb_y+1 == s->mb_y){
2515                 s->first_slice_line=0;
2516             }
2517
2518             s->mb_skipped=0;
2519             s->dquant=0; //only for QP_RD
2520
2521             update_mb_info(s, 0);
2522
2523             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2524                 int next_block=0;
2525                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2526
2527                 copy_context_before_encode(&backup_s, s, -1);
2528                 backup_s.pb= s->pb;
2529                 best_s.data_partitioning= s->data_partitioning;
2530                 best_s.partitioned_frame= s->partitioned_frame;
2531                 if(s->data_partitioning){
2532                     backup_s.pb2= s->pb2;
2533                     backup_s.tex_pb= s->tex_pb;
2534                 }
2535
2536                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2537                     s->mv_dir = MV_DIR_FORWARD;
2538                     s->mv_type = MV_TYPE_16X16;
2539                     s->mb_intra= 0;
2540                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2541                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2542                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2543                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2544                 }
2545                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2546                     s->mv_dir = MV_DIR_FORWARD;
2547                     s->mv_type = MV_TYPE_FIELD;
2548                     s->mb_intra= 0;
2549                     for(i=0; i<2; i++){
2550                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2551                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2552                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2553                     }
2554                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2555                                  &dmin, &next_block, 0, 0);
2556                 }
2557                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2558                     s->mv_dir = MV_DIR_FORWARD;
2559                     s->mv_type = MV_TYPE_16X16;
2560                     s->mb_intra= 0;
2561                     s->mv[0][0][0] = 0;
2562                     s->mv[0][0][1] = 0;
2563                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2564                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2565                 }
2566                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2567                     s->mv_dir = MV_DIR_FORWARD;
2568                     s->mv_type = MV_TYPE_8X8;
2569                     s->mb_intra= 0;
2570                     for(i=0; i<4; i++){
2571                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2572                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2573                     }
2574                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2575                                  &dmin, &next_block, 0, 0);
2576                 }
2577                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2578                     s->mv_dir = MV_DIR_FORWARD;
2579                     s->mv_type = MV_TYPE_16X16;
2580                     s->mb_intra= 0;
2581                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2582                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2583                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2584                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2585                 }
2586                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2587                     s->mv_dir = MV_DIR_BACKWARD;
2588                     s->mv_type = MV_TYPE_16X16;
2589                     s->mb_intra= 0;
2590                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2591                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2592                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2593                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2594                 }
2595                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2596                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2597                     s->mv_type = MV_TYPE_16X16;
2598                     s->mb_intra= 0;
2599                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2600                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2601                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2602                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2603                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2604                                  &dmin, &next_block, 0, 0);
2605                 }
2606                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2607                     s->mv_dir = MV_DIR_FORWARD;
2608                     s->mv_type = MV_TYPE_FIELD;
2609                     s->mb_intra= 0;
2610                     for(i=0; i<2; i++){
2611                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2612                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2613                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2614                     }
2615                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2616                                  &dmin, &next_block, 0, 0);
2617                 }
2618                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2619                     s->mv_dir = MV_DIR_BACKWARD;
2620                     s->mv_type = MV_TYPE_FIELD;
2621                     s->mb_intra= 0;
2622                     for(i=0; i<2; i++){
2623                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2624                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2625                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2626                     }
2627                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2628                                  &dmin, &next_block, 0, 0);
2629                 }
2630                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2631                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2632                     s->mv_type = MV_TYPE_FIELD;
2633                     s->mb_intra= 0;
2634                     for(dir=0; dir<2; dir++){
2635                         for(i=0; i<2; i++){
2636                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2637                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2638                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2639                         }
2640                     }
2641                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2642                                  &dmin, &next_block, 0, 0);
2643                 }
2644                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2645                     s->mv_dir = 0;
2646                     s->mv_type = MV_TYPE_16X16;
2647                     s->mb_intra= 1;
2648                     s->mv[0][0][0] = 0;
2649                     s->mv[0][0][1] = 0;
2650                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2651                                  &dmin, &next_block, 0, 0);
2652                     if(s->h263_pred || s->h263_aic){
2653                         if(best_s.mb_intra)
2654                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2655                         else
2656                             ff_clean_intra_table_entries(s); //old mode?
2657                     }
2658                 }
2659
2660                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2661                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2662                         const int last_qp= backup_s.qscale;
2663                         int qpi, qp, dc[6];
2664                         int16_t ac[6][16];
2665                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2666                         static const int dquant_tab[4]={-1,1,-2,2};
2667
2668                         assert(backup_s.dquant == 0);
2669
2670                         //FIXME intra
2671                         s->mv_dir= best_s.mv_dir;
2672                         s->mv_type = MV_TYPE_16X16;
2673                         s->mb_intra= best_s.mb_intra;
2674                         s->mv[0][0][0] = best_s.mv[0][0][0];
2675                         s->mv[0][0][1] = best_s.mv[0][0][1];
2676                         s->mv[1][0][0] = best_s.mv[1][0][0];
2677                         s->mv[1][0][1] = best_s.mv[1][0][1];
2678
2679                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2680                         for(; qpi<4; qpi++){
2681                             int dquant= dquant_tab[qpi];
2682                             qp= last_qp + dquant;
2683                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2684                                 continue;
2685                             backup_s.dquant= dquant;
2686                             if(s->mb_intra && s->dc_val[0]){
2687                                 for(i=0; i<6; i++){
2688                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2689                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2690                                 }
2691                             }
2692
2693                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2694                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2695                             if(best_s.qscale != qp){
2696                                 if(s->mb_intra && s->dc_val[0]){
2697                                     for(i=0; i<6; i++){
2698                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2699                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2700                                     }
2701                                 }
2702                             }
2703                         }
2704                     }
2705                 }
2706                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2707                     int mx= s->b_direct_mv_table[xy][0];
2708                     int my= s->b_direct_mv_table[xy][1];
2709
2710                     backup_s.dquant = 0;
2711                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2712                     s->mb_intra= 0;
2713                     ff_mpeg4_set_direct_mv(s, mx, my);
2714                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2715                                  &dmin, &next_block, mx, my);
2716                 }
2717                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2718                     backup_s.dquant = 0;
2719                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2720                     s->mb_intra= 0;
2721                     ff_mpeg4_set_direct_mv(s, 0, 0);
2722                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2723                                  &dmin, &next_block, 0, 0);
2724                 }
2725                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2726                     int coded=0;
2727                     for(i=0; i<6; i++)
2728                         coded |= s->block_last_index[i];
2729                     if(coded){
2730                         int mx,my;
2731                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2732                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2733                             mx=my=0; //FIXME find the one we actually used
2734                             ff_mpeg4_set_direct_mv(s, mx, my);
2735                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2736                             mx= s->mv[1][0][0];
2737                             my= s->mv[1][0][1];
2738                         }else{
2739                             mx= s->mv[0][0][0];
2740                             my= s->mv[0][0][1];
2741                         }
2742
2743                         s->mv_dir= best_s.mv_dir;
2744                         s->mv_type = best_s.mv_type;
2745                         s->mb_intra= 0;
2746 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2747                         s->mv[0][0][1] = best_s.mv[0][0][1];
2748                         s->mv[1][0][0] = best_s.mv[1][0][0];
2749                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2750                         backup_s.dquant= 0;
2751                         s->skipdct=1;
2752                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2753                                         &dmin, &next_block, mx, my);
2754                         s->skipdct=0;
2755                     }
2756                 }
2757
2758                 s->current_picture.qscale_table[xy] = best_s.qscale;
2759
2760                 copy_context_after_encode(s, &best_s, -1);
2761
2762                 pb_bits_count= put_bits_count(&s->pb);
2763                 flush_put_bits(&s->pb);
2764                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2765                 s->pb= backup_s.pb;
2766
2767                 if(s->data_partitioning){
2768                     pb2_bits_count= put_bits_count(&s->pb2);
2769                     flush_put_bits(&s->pb2);
2770                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2771                     s->pb2= backup_s.pb2;
2772
2773                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2774                     flush_put_bits(&s->tex_pb);
2775                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2776                     s->tex_pb= backup_s.tex_pb;
2777                 }
2778                 s->last_bits= put_bits_count(&s->pb);
2779
2780                 if (CONFIG_H263_ENCODER &&
2781                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2782                     ff_h263_update_motion_val(s);
2783
2784                 if(next_block==0){ //FIXME 16 vs linesize16
2785                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2786                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2787                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2788                 }
2789
2790                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2791                     ff_MPV_decode_mb(s, s->block);
2792             } else {
2793                 int motion_x = 0, motion_y = 0;
2794                 s->mv_type=MV_TYPE_16X16;
2795                 // only one MB-Type possible
2796
2797                 switch(mb_type){
2798                 case CANDIDATE_MB_TYPE_INTRA:
2799                     s->mv_dir = 0;
2800                     s->mb_intra= 1;
2801                     motion_x= s->mv[0][0][0] = 0;
2802                     motion_y= s->mv[0][0][1] = 0;
2803                     break;
2804                 case CANDIDATE_MB_TYPE_INTER:
2805                     s->mv_dir = MV_DIR_FORWARD;
2806                     s->mb_intra= 0;
2807                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2808                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2809                     break;
2810                 case CANDIDATE_MB_TYPE_INTER_I:
2811                     s->mv_dir = MV_DIR_FORWARD;
2812                     s->mv_type = MV_TYPE_FIELD;
2813                     s->mb_intra= 0;
2814                     for(i=0; i<2; i++){
2815                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2816                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2817                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2818                     }
2819                     break;
2820                 case CANDIDATE_MB_TYPE_INTER4V:
2821                     s->mv_dir = MV_DIR_FORWARD;
2822                     s->mv_type = MV_TYPE_8X8;
2823                     s->mb_intra= 0;
2824                     for(i=0; i<4; i++){
2825                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2826                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2827                     }
2828                     break;
2829                 case CANDIDATE_MB_TYPE_DIRECT:
2830                     if (CONFIG_MPEG4_ENCODER) {
2831                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2832                         s->mb_intra= 0;
2833                         motion_x=s->b_direct_mv_table[xy][0];
2834                         motion_y=s->b_direct_mv_table[xy][1];
2835                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2836                     }
2837                     break;
2838                 case CANDIDATE_MB_TYPE_DIRECT0:
2839                     if (CONFIG_MPEG4_ENCODER) {
2840                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2841                         s->mb_intra= 0;
2842                         ff_mpeg4_set_direct_mv(s, 0, 0);
2843                     }
2844                     break;
2845                 case CANDIDATE_MB_TYPE_BIDIR:
2846                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2847                     s->mb_intra= 0;
2848                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2849                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2850                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2851                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2852                     break;
2853                 case CANDIDATE_MB_TYPE_BACKWARD:
2854                     s->mv_dir = MV_DIR_BACKWARD;
2855                     s->mb_intra= 0;
2856                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2857                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2858                     break;
2859                 case CANDIDATE_MB_TYPE_FORWARD:
2860                     s->mv_dir = MV_DIR_FORWARD;
2861                     s->mb_intra= 0;
2862                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2863                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2864                     break;
2865                 case CANDIDATE_MB_TYPE_FORWARD_I:
2866                     s->mv_dir = MV_DIR_FORWARD;
2867                     s->mv_type = MV_TYPE_FIELD;
2868                     s->mb_intra= 0;
2869                     for(i=0; i<2; i++){
2870                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2871                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2872                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2873                     }
2874                     break;
2875                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2876                     s->mv_dir = MV_DIR_BACKWARD;
2877                     s->mv_type = MV_TYPE_FIELD;
2878                     s->mb_intra= 0;
2879                     for(i=0; i<2; i++){
2880                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2881                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2882                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2883                     }
2884                     break;
2885                 case CANDIDATE_MB_TYPE_BIDIR_I:
2886                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2887                     s->mv_type = MV_TYPE_FIELD;
2888                     s->mb_intra= 0;
2889                     for(dir=0; dir<2; dir++){
2890                         for(i=0; i<2; i++){
2891                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2892                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2893                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2894                         }
2895                     }
2896                     break;
2897                 default:
2898                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2899                 }
2900
2901                 encode_mb(s, motion_x, motion_y);
2902
2903                 // RAL: Update last macroblock type
2904                 s->last_mv_dir = s->mv_dir;
2905
2906                 if (CONFIG_H263_ENCODER &&
2907                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2908                     ff_h263_update_motion_val(s);
2909
2910                 ff_MPV_decode_mb(s, s->block);
2911             }
2912
2913             /* clean the MV table in IPS frames for direct mode in B frames */
2914             if(s->mb_intra /* && I,P,S_TYPE */){
2915                 s->p_mv_table[xy][0]=0;
2916                 s->p_mv_table[xy][1]=0;
2917             }
2918
2919             if(s->flags&CODEC_FLAG_PSNR){
2920                 int w= 16;
2921                 int h= 16;
2922
2923                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2924                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2925
2926                 s->current_picture.f.error[0] += sse(
2927                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2928                     s->dest[0], w, h, s->linesize);
2929                 s->current_picture.f.error[1] += sse(
2930                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2931                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2932                 s->current_picture.f.error[2] += sse(
2933                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2934                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2935             }
2936             if(s->loop_filter){
2937                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2938                     ff_h263_loop_filter(s);
2939             }
2940             av_dlog(s->avctx, "MB %d %d bits\n",
2941                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
2942         }
2943     }
2944
2945     //not beautiful here but we must write it before flushing so it has to be here
2946     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
2947         ff_msmpeg4_encode_ext_header(s);
2948
2949     write_slice_end(s);
2950
2951     /* Send the last GOB if RTP */
2952     if (s->avctx->rtp_callback) {
2953         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
2954         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
2955         /* Call the RTP callback to send the last GOB */
2956         emms_c();
2957         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
2958     }
2959
2960     return 0;
2961 }
2962
2963 #define MERGE(field) dst->field += src->field; src->field=0
2964 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
2965     MERGE(me.scene_change_score);
2966     MERGE(me.mc_mb_var_sum_temp);
2967     MERGE(me.mb_var_sum_temp);
2968 }
2969
2970 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
2971     int i;
2972
2973     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
2974     MERGE(dct_count[1]);
2975     MERGE(mv_bits);
2976     MERGE(i_tex_bits);
2977     MERGE(p_tex_bits);
2978     MERGE(i_count);
2979     MERGE(f_count);
2980     MERGE(b_count);
2981     MERGE(skip_count);
2982     MERGE(misc_bits);
2983     MERGE(er.error_count);
2984     MERGE(padding_bug_score);
2985     MERGE(current_picture.f.error[0]);
2986     MERGE(current_picture.f.error[1]);
2987     MERGE(current_picture.f.error[2]);
2988
2989     if(dst->avctx->noise_reduction){
2990         for(i=0; i<64; i++){
2991             MERGE(dct_error_sum[0][i]);
2992             MERGE(dct_error_sum[1][i]);
2993         }
2994     }
2995
2996     assert(put_bits_count(&src->pb) % 8 ==0);
2997     assert(put_bits_count(&dst->pb) % 8 ==0);
2998     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
2999     flush_put_bits(&dst->pb);
3000 }
3001
3002 static int estimate_qp(MpegEncContext *s, int dry_run){
3003     if (s->next_lambda){
3004         s->current_picture_ptr->f.quality =
3005         s->current_picture.f.quality = s->next_lambda;
3006         if(!dry_run) s->next_lambda= 0;
3007     } else if (!s->fixed_qscale) {
3008         s->current_picture_ptr->f.quality =
3009         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3010         if (s->current_picture.f.quality < 0)
3011             return -1;
3012     }
3013
3014     if(s->adaptive_quant){
3015         switch(s->codec_id){
3016         case AV_CODEC_ID_MPEG4:
3017             if (CONFIG_MPEG4_ENCODER)
3018                 ff_clean_mpeg4_qscales(s);
3019             break;
3020         case AV_CODEC_ID_H263:
3021         case AV_CODEC_ID_H263P:
3022         case AV_CODEC_ID_FLV1:
3023             if (CONFIG_H263_ENCODER)
3024                 ff_clean_h263_qscales(s);
3025             break;
3026         default:
3027             ff_init_qscale_tab(s);
3028         }
3029
3030         s->lambda= s->lambda_table[0];
3031         //FIXME broken
3032     }else
3033         s->lambda = s->current_picture.f.quality;
3034     update_qscale(s);
3035     return 0;
3036 }
3037
3038 /* must be called before writing the header */
3039 static void set_frame_distances(MpegEncContext * s){
3040     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3041     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3042
3043     if(s->pict_type==AV_PICTURE_TYPE_B){
3044         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3045         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3046     }else{
3047         s->pp_time= s->time - s->last_non_b_time;
3048         s->last_non_b_time= s->time;
3049         assert(s->picture_number==0 || s->pp_time > 0);
3050     }
3051 }
3052
3053 static int encode_picture(MpegEncContext *s, int picture_number)
3054 {
3055     int i, ret;
3056     int bits;
3057     int context_count = s->slice_context_count;
3058
3059     s->picture_number = picture_number;
3060
3061     /* Reset the average MB variance */
3062     s->me.mb_var_sum_temp    =
3063     s->me.mc_mb_var_sum_temp = 0;
3064
3065     /* we need to initialize some time vars before we can encode b-frames */
3066     // RAL: Condition added for MPEG1VIDEO
3067     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3068         set_frame_distances(s);
3069     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3070         ff_set_mpeg4_time(s);
3071
3072     s->me.scene_change_score=0;
3073
3074 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3075
3076     if(s->pict_type==AV_PICTURE_TYPE_I){
3077         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3078         else                        s->no_rounding=0;
3079     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3080         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3081             s->no_rounding ^= 1;
3082     }
3083
3084     if(s->flags & CODEC_FLAG_PASS2){
3085         if (estimate_qp(s,1) < 0)
3086             return -1;
3087         ff_get_2pass_fcode(s);
3088     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3089         if(s->pict_type==AV_PICTURE_TYPE_B)
3090             s->lambda= s->last_lambda_for[s->pict_type];
3091         else
3092             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3093         update_qscale(s);
3094     }
3095
3096     s->mb_intra=0; //for the rate distortion & bit compare functions
3097     for(i=1; i<context_count; i++){
3098         ret = ff_update_duplicate_context(s->thread_context[i], s);
3099         if (ret < 0)
3100             return ret;
3101     }
3102
3103     if(ff_init_me(s)<0)
3104         return -1;
3105
3106     /* Estimate motion for every MB */
3107     if(s->pict_type != AV_PICTURE_TYPE_I){
3108         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3109         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3110         if (s->pict_type != AV_PICTURE_TYPE_B) {
3111             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3112                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3113             }
3114         }
3115
3116         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3117     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3118         /* I-Frame */
3119         for(i=0; i<s->mb_stride*s->mb_height; i++)
3120             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3121
3122         if(!s->fixed_qscale){
3123             /* finding spatial complexity for I-frame rate control */
3124             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3125         }
3126     }
3127     for(i=1; i<context_count; i++){
3128         merge_context_after_me(s, s->thread_context[i]);
3129     }
3130     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3131     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3132     emms_c();
3133
3134     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3135         s->pict_type= AV_PICTURE_TYPE_I;
3136         for(i=0; i<s->mb_stride*s->mb_height; i++)
3137             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3138         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3139                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3140     }
3141
3142     if(!s->umvplus){
3143         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3144             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3145
3146             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3147                 int a,b;
3148                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3149                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3150                 s->f_code= FFMAX3(s->f_code, a, b);
3151             }
3152
3153             ff_fix_long_p_mvs(s);
3154             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3155             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3156                 int j;
3157                 for(i=0; i<2; i++){
3158                     for(j=0; j<2; j++)
3159                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3160                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3161                 }
3162             }
3163         }
3164
3165         if(s->pict_type==AV_PICTURE_TYPE_B){
3166             int a, b;
3167
3168             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3169             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3170             s->f_code = FFMAX(a, b);
3171
3172             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3173             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3174             s->b_code = FFMAX(a, b);
3175
3176             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3177             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3178             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3179             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3180             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3181                 int dir, j;
3182                 for(dir=0; dir<2; dir++){
3183                     for(i=0; i<2; i++){
3184                         for(j=0; j<2; j++){
3185                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3186                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3187                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3188                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3189                         }
3190                     }
3191                 }
3192             }
3193         }
3194     }
3195
3196     if (estimate_qp(s, 0) < 0)
3197         return -1;
3198
3199     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3200         s->qscale= 3; //reduce clipping problems
3201
3202     if (s->out_format == FMT_MJPEG) {
3203         /* for mjpeg, we do include qscale in the matrix */
3204         for(i=1;i<64;i++){
3205             int j= s->dsp.idct_permutation[i];
3206
3207             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3208         }
3209         s->y_dc_scale_table=
3210         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3211         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3212         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3213                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3214         s->qscale= 8;
3215     }
3216
3217     //FIXME var duplication
3218     s->current_picture_ptr->f.key_frame =
3219     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3220     s->current_picture_ptr->f.pict_type =
3221     s->current_picture.f.pict_type = s->pict_type;
3222
3223     if (s->current_picture.f.key_frame)
3224         s->picture_in_gop_number=0;
3225
3226     s->last_bits= put_bits_count(&s->pb);
3227     switch(s->out_format) {
3228     case FMT_MJPEG:
3229         if (CONFIG_MJPEG_ENCODER)
3230             ff_mjpeg_encode_picture_header(s);
3231         break;
3232     case FMT_H261:
3233         if (CONFIG_H261_ENCODER)
3234             ff_h261_encode_picture_header(s, picture_number);
3235         break;
3236     case FMT_H263:
3237         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3238             ff_wmv2_encode_picture_header(s, picture_number);
3239         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3240             ff_msmpeg4_encode_picture_header(s, picture_number);
3241         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3242             ff_mpeg4_encode_picture_header(s, picture_number);
3243         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3244             ff_rv10_encode_picture_header(s, picture_number);
3245         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3246             ff_rv20_encode_picture_header(s, picture_number);
3247         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3248             ff_flv_encode_picture_header(s, picture_number);
3249         else if (CONFIG_H263_ENCODER)
3250             ff_h263_encode_picture_header(s, picture_number);
3251         break;
3252     case FMT_MPEG1:
3253         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3254             ff_mpeg1_encode_picture_header(s, picture_number);
3255         break;
3256     case FMT_H264:
3257         break;
3258     default:
3259         assert(0);
3260     }
3261     bits= put_bits_count(&s->pb);
3262     s->header_bits= bits - s->last_bits;
3263
3264     for(i=1; i<context_count; i++){
3265         update_duplicate_context_after_me(s->thread_context[i], s);
3266     }
3267     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3268     for(i=1; i<context_count; i++){
3269         merge_context_after_encode(s, s->thread_context[i]);
3270     }
3271     emms_c();
3272     return 0;
3273 }
3274
3275 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3276     const int intra= s->mb_intra;
3277     int i;
3278
3279     s->dct_count[intra]++;
3280
3281     for(i=0; i<64; i++){
3282         int level= block[i];
3283
3284         if(level){
3285             if(level>0){
3286                 s->dct_error_sum[intra][i] += level;
3287                 level -= s->dct_offset[intra][i];
3288                 if(level<0) level=0;
3289             }else{
3290                 s->dct_error_sum[intra][i] -= level;
3291                 level += s->dct_offset[intra][i];
3292                 if(level>0) level=0;
3293             }
3294             block[i]= level;
3295         }
3296     }
3297 }
3298
3299 static int dct_quantize_trellis_c(MpegEncContext *s,
3300                                   int16_t *block, int n,
3301                                   int qscale, int *overflow){
3302     const int *qmat;
3303     const uint8_t *scantable= s->intra_scantable.scantable;
3304     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3305     int max=0;
3306     unsigned int threshold1, threshold2;
3307     int bias=0;
3308     int run_tab[65];
3309     int level_tab[65];
3310     int score_tab[65];
3311     int survivor[65];
3312     int survivor_count;
3313     int last_run=0;
3314     int last_level=0;
3315     int last_score= 0;
3316     int last_i;
3317     int coeff[2][64];
3318     int coeff_count[64];
3319     int qmul, qadd, start_i, last_non_zero, i, dc;
3320     const int esc_length= s->ac_esc_length;
3321     uint8_t * length;
3322     uint8_t * last_length;
3323     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3324
3325     s->dsp.fdct (block);
3326
3327     if(s->dct_error_sum)
3328         s->denoise_dct(s, block);
3329     qmul= qscale*16;
3330     qadd= ((qscale-1)|1)*8;
3331
3332     if (s->mb_intra) {
3333         int q;
3334         if (!s->h263_aic) {
3335             if (n < 4)
3336                 q = s->y_dc_scale;
3337             else
3338                 q = s->c_dc_scale;
3339             q = q << 3;
3340         } else{
3341             /* For AIC we skip quant/dequant of INTRADC */
3342             q = 1 << 3;
3343             qadd=0;
3344         }
3345
3346         /* note: block[0] is assumed to be positive */
3347         block[0] = (block[0] + (q >> 1)) / q;
3348         start_i = 1;
3349         last_non_zero = 0;
3350         qmat = s->q_intra_matrix[qscale];
3351         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3352             bias= 1<<(QMAT_SHIFT-1);
3353         length     = s->intra_ac_vlc_length;
3354         last_length= s->intra_ac_vlc_last_length;
3355     } else {
3356         start_i = 0;
3357         last_non_zero = -1;
3358         qmat = s->q_inter_matrix[qscale];
3359         length     = s->inter_ac_vlc_length;
3360         last_length= s->inter_ac_vlc_last_length;
3361     }
3362     last_i= start_i;
3363
3364     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3365     threshold2= (threshold1<<1);
3366
3367     for(i=63; i>=start_i; i--) {
3368         const int j = scantable[i];
3369         int level = block[j] * qmat[j];
3370
3371         if(((unsigned)(level+threshold1))>threshold2){
3372             last_non_zero = i;
3373             break;
3374         }
3375     }
3376
3377     for(i=start_i; i<=last_non_zero; i++) {
3378         const int j = scantable[i];
3379         int level = block[j] * qmat[j];
3380
3381 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3382 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3383         if(((unsigned)(level+threshold1))>threshold2){
3384             if(level>0){
3385                 level= (bias + level)>>QMAT_SHIFT;
3386                 coeff[0][i]= level;
3387                 coeff[1][i]= level-1;
3388 //                coeff[2][k]= level-2;
3389             }else{
3390                 level= (bias - level)>>QMAT_SHIFT;
3391                 coeff[0][i]= -level;
3392                 coeff[1][i]= -level+1;
3393 //                coeff[2][k]= -level+2;
3394             }
3395             coeff_count[i]= FFMIN(level, 2);
3396             assert(coeff_count[i]);
3397             max |=level;
3398         }else{
3399             coeff[0][i]= (level>>31)|1;
3400             coeff_count[i]= 1;
3401         }
3402     }
3403
3404     *overflow= s->max_qcoeff < max; //overflow might have happened
3405
3406     if(last_non_zero < start_i){
3407         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3408         return last_non_zero;
3409     }
3410
3411     score_tab[start_i]= 0;
3412     survivor[0]= start_i;
3413     survivor_count= 1;
3414
3415     for(i=start_i; i<=last_non_zero; i++){
3416         int level_index, j, zero_distortion;
3417         int dct_coeff= FFABS(block[ scantable[i] ]);
3418         int best_score=256*256*256*120;
3419
3420         if (s->dsp.fdct == ff_fdct_ifast)
3421             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3422         zero_distortion= dct_coeff*dct_coeff;
3423
3424         for(level_index=0; level_index < coeff_count[i]; level_index++){
3425             int distortion;
3426             int level= coeff[level_index][i];
3427             const int alevel= FFABS(level);
3428             int unquant_coeff;
3429
3430             assert(level);
3431
3432             if(s->out_format == FMT_H263){
3433                 unquant_coeff= alevel*qmul + qadd;
3434             }else{ //MPEG1
3435                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3436                 if(s->mb_intra){
3437                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3438                         unquant_coeff =   (unquant_coeff - 1) | 1;
3439                 }else{
3440                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3441                         unquant_coeff =   (unquant_coeff - 1) | 1;
3442                 }
3443                 unquant_coeff<<= 3;
3444             }
3445
3446             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3447             level+=64;
3448             if((level&(~127)) == 0){
3449                 for(j=survivor_count-1; j>=0; j--){
3450                     int run= i - survivor[j];
3451                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3452                     score += score_tab[i-run];
3453
3454                     if(score < best_score){
3455                         best_score= score;
3456                         run_tab[i+1]= run;
3457                         level_tab[i+1]= level-64;
3458                     }
3459                 }
3460
3461                 if(s->out_format == FMT_H263){
3462                     for(j=survivor_count-1; j>=0; j--){
3463                         int run= i - survivor[j];
3464                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3465                         score += score_tab[i-run];
3466                         if(score < last_score){
3467                             last_score= score;
3468                             last_run= run;
3469                             last_level= level-64;
3470                             last_i= i+1;
3471                         }
3472                     }
3473                 }
3474             }else{
3475                 distortion += esc_length*lambda;
3476                 for(j=survivor_count-1; j>=0; j--){
3477                     int run= i - survivor[j];
3478                     int score= distortion + score_tab[i-run];
3479
3480                     if(score < best_score){
3481                         best_score= score;
3482                         run_tab[i+1]= run;
3483                         level_tab[i+1]= level-64;
3484                     }
3485                 }
3486
3487                 if(s->out_format == FMT_H263){
3488                   for(j=survivor_count-1; j>=0; j--){
3489                         int run= i - survivor[j];
3490                         int score= distortion + score_tab[i-run];
3491                         if(score < last_score){
3492                             last_score= score;
3493                             last_run= run;
3494                             last_level= level-64;
3495                             last_i= i+1;
3496                         }
3497                     }
3498                 }
3499             }
3500         }
3501
3502         score_tab[i+1]= best_score;
3503
3504         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3505         if(last_non_zero <= 27){
3506             for(; survivor_count; survivor_count--){
3507                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3508                     break;
3509             }
3510         }else{
3511             for(; survivor_count; survivor_count--){
3512                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3513                     break;
3514             }
3515         }
3516
3517         survivor[ survivor_count++ ]= i+1;
3518     }
3519
3520     if(s->out_format != FMT_H263){
3521         last_score= 256*256*256*120;
3522         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3523             int score= score_tab[i];
3524             if(i) score += lambda*2; //FIXME exacter?
3525
3526             if(score < last_score){
3527                 last_score= score;
3528                 last_i= i;
3529                 last_level= level_tab[i];
3530                 last_run= run_tab[i];
3531             }
3532         }
3533     }
3534
3535     s->coded_score[n] = last_score;
3536
3537     dc= FFABS(block[0]);
3538     last_non_zero= last_i - 1;
3539     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3540
3541     if(last_non_zero < start_i)
3542         return last_non_zero;
3543
3544     if(last_non_zero == 0 && start_i == 0){
3545         int best_level= 0;
3546         int best_score= dc * dc;
3547
3548         for(i=0; i<coeff_count[0]; i++){
3549             int level= coeff[i][0];
3550             int alevel= FFABS(level);
3551             int unquant_coeff, score, distortion;
3552
3553             if(s->out_format == FMT_H263){
3554                     unquant_coeff= (alevel*qmul + qadd)>>3;
3555             }else{ //MPEG1
3556                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3557                     unquant_coeff =   (unquant_coeff - 1) | 1;
3558             }
3559             unquant_coeff = (unquant_coeff + 4) >> 3;
3560             unquant_coeff<<= 3 + 3;
3561
3562             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3563             level+=64;
3564             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3565             else                    score= distortion + esc_length*lambda;
3566
3567             if(score < best_score){
3568                 best_score= score;
3569                 best_level= level - 64;
3570             }
3571         }
3572         block[0]= best_level;
3573         s->coded_score[n] = best_score - dc*dc;
3574         if(best_level == 0) return -1;
3575         else                return last_non_zero;
3576     }
3577
3578     i= last_i;
3579     assert(last_level);
3580
3581     block[ perm_scantable[last_non_zero] ]= last_level;
3582     i -= last_run + 1;
3583
3584     for(; i>start_i; i -= run_tab[i] + 1){
3585         block[ perm_scantable[i-1] ]= level_tab[i];
3586     }
3587
3588     return last_non_zero;
3589 }
3590
3591 //#define REFINE_STATS 1
3592 static int16_t basis[64][64];
3593
3594 static void build_basis(uint8_t *perm){
3595     int i, j, x, y;
3596     emms_c();
3597     for(i=0; i<8; i++){
3598         for(j=0; j<8; j++){
3599             for(y=0; y<8; y++){
3600                 for(x=0; x<8; x++){
3601                     double s= 0.25*(1<<BASIS_SHIFT);
3602                     int index= 8*i + j;
3603                     int perm_index= perm[index];
3604                     if(i==0) s*= sqrt(0.5);
3605                     if(j==0) s*= sqrt(0.5);
3606                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3607                 }
3608             }
3609         }
3610     }
3611 }
3612
3613 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3614                         int16_t *block, int16_t *weight, int16_t *orig,
3615                         int n, int qscale){
3616     int16_t rem[64];
3617     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3618     const uint8_t *scantable= s->intra_scantable.scantable;
3619     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3620 //    unsigned int threshold1, threshold2;
3621 //    int bias=0;
3622     int run_tab[65];
3623     int prev_run=0;
3624     int prev_level=0;
3625     int qmul, qadd, start_i, last_non_zero, i, dc;
3626     uint8_t * length;
3627     uint8_t * last_length;
3628     int lambda;
3629     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3630 #ifdef REFINE_STATS
3631 static int count=0;
3632 static int after_last=0;
3633 static int to_zero=0;
3634 static int from_zero=0;
3635 static int raise=0;
3636 static int lower=0;
3637 static int messed_sign=0;
3638 #endif
3639
3640     if(basis[0][0] == 0)
3641         build_basis(s->dsp.idct_permutation);
3642
3643     qmul= qscale*2;
3644     qadd= (qscale-1)|1;
3645     if (s->mb_intra) {
3646         if (!s->h263_aic) {
3647             if (n < 4)
3648                 q = s->y_dc_scale;
3649             else
3650                 q = s->c_dc_scale;
3651         } else{
3652             /* For AIC we skip quant/dequant of INTRADC */
3653             q = 1;
3654             qadd=0;
3655         }
3656         q <<= RECON_SHIFT-3;
3657         /* note: block[0] is assumed to be positive */
3658         dc= block[0]*q;
3659 //        block[0] = (block[0] + (q >> 1)) / q;
3660         start_i = 1;
3661 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3662 //            bias= 1<<(QMAT_SHIFT-1);
3663         length     = s->intra_ac_vlc_length;
3664         last_length= s->intra_ac_vlc_last_length;
3665     } else {
3666         dc= 0;
3667         start_i = 0;
3668         length     = s->inter_ac_vlc_length;
3669         last_length= s->inter_ac_vlc_last_length;
3670     }
3671     last_non_zero = s->block_last_index[n];
3672
3673 #ifdef REFINE_STATS
3674 {START_TIMER
3675 #endif
3676     dc += (1<<(RECON_SHIFT-1));
3677     for(i=0; i<64; i++){
3678         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3679     }
3680 #ifdef REFINE_STATS
3681 STOP_TIMER("memset rem[]")}
3682 #endif
3683     sum=0;
3684     for(i=0; i<64; i++){
3685         int one= 36;
3686         int qns=4;
3687         int w;
3688
3689         w= FFABS(weight[i]) + qns*one;
3690         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3691
3692         weight[i] = w;
3693 //        w=weight[i] = (63*qns + (w/2)) / w;
3694
3695         assert(w>0);
3696         assert(w<(1<<6));
3697         sum += w*w;
3698     }
3699     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3700 #ifdef REFINE_STATS
3701 {START_TIMER
3702 #endif
3703     run=0;
3704     rle_index=0;
3705     for(i=start_i; i<=last_non_zero; i++){
3706         int j= perm_scantable[i];
3707         const int level= block[j];
3708         int coeff;
3709
3710         if(level){
3711             if(level<0) coeff= qmul*level - qadd;
3712             else        coeff= qmul*level + qadd;
3713             run_tab[rle_index++]=run;
3714             run=0;
3715
3716             s->dsp.add_8x8basis(rem, basis[j], coeff);
3717         }else{
3718             run++;
3719         }
3720     }
3721 #ifdef REFINE_STATS
3722 if(last_non_zero>0){
3723 STOP_TIMER("init rem[]")
3724 }
3725 }
3726
3727 {START_TIMER
3728 #endif
3729     for(;;){
3730         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3731         int best_coeff=0;
3732         int best_change=0;
3733         int run2, best_unquant_change=0, analyze_gradient;
3734 #ifdef REFINE_STATS
3735 {START_TIMER
3736 #endif
3737         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3738
3739         if(analyze_gradient){
3740 #ifdef REFINE_STATS
3741 {START_TIMER
3742 #endif
3743             for(i=0; i<64; i++){
3744                 int w= weight[i];
3745
3746                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3747             }
3748 #ifdef REFINE_STATS
3749 STOP_TIMER("rem*w*w")}
3750 {START_TIMER
3751 #endif
3752             s->dsp.fdct(d1);
3753 #ifdef REFINE_STATS
3754 STOP_TIMER("dct")}
3755 #endif
3756         }
3757
3758         if(start_i){
3759             const int level= block[0];
3760             int change, old_coeff;
3761
3762             assert(s->mb_intra);
3763
3764             old_coeff= q*level;
3765
3766             for(change=-1; change<=1; change+=2){
3767                 int new_level= level + change;
3768                 int score, new_coeff;
3769
3770                 new_coeff= q*new_level;
3771                 if(new_coeff >= 2048 || new_coeff < 0)
3772                     continue;
3773
3774                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3775                 if(score<best_score){
3776                     best_score= score;
3777                     best_coeff= 0;
3778                     best_change= change;
3779                     best_unquant_change= new_coeff - old_coeff;
3780                 }
3781             }
3782         }
3783
3784         run=0;
3785         rle_index=0;
3786         run2= run_tab[rle_index++];
3787         prev_level=0;
3788         prev_run=0;
3789
3790         for(i=start_i; i<64; i++){
3791             int j= perm_scantable[i];
3792             const int level= block[j];
3793             int change, old_coeff;
3794
3795             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3796                 break;
3797
3798             if(level){
3799                 if(level<0) old_coeff= qmul*level - qadd;
3800                 else        old_coeff= qmul*level + qadd;
3801                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3802             }else{
3803                 old_coeff=0;
3804                 run2--;
3805                 assert(run2>=0 || i >= last_non_zero );
3806             }
3807
3808             for(change=-1; change<=1; change+=2){
3809                 int new_level= level + change;
3810                 int score, new_coeff, unquant_change;
3811
3812                 score=0;
3813                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3814                    continue;
3815
3816                 if(new_level){
3817                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3818                     else            new_coeff= qmul*new_level + qadd;
3819                     if(new_coeff >= 2048 || new_coeff <= -2048)
3820                         continue;
3821                     //FIXME check for overflow
3822
3823                     if(level){
3824                         if(level < 63 && level > -63){
3825                             if(i < last_non_zero)
3826                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3827                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3828                             else
3829                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3830                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3831                         }
3832                     }else{
3833                         assert(FFABS(new_level)==1);
3834
3835                         if(analyze_gradient){
3836                             int g= d1[ scantable[i] ];
3837                             if(g && (g^new_level) >= 0)
3838                                 continue;
3839                         }
3840
3841                         if(i < last_non_zero){
3842                             int next_i= i + run2 + 1;
3843                             int next_level= block[ perm_scantable[next_i] ] + 64;
3844
3845                             if(next_level&(~127))
3846                                 next_level= 0;
3847
3848                             if(next_i < last_non_zero)
3849                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3850                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3851                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3852                             else
3853                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3854                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3855                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3856                         }else{
3857                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3858                             if(prev_level){
3859                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3860                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3861                             }
3862                         }
3863                     }
3864                 }else{
3865                     new_coeff=0;
3866                     assert(FFABS(level)==1);
3867
3868                     if(i < last_non_zero){
3869                         int next_i= i + run2 + 1;
3870                         int next_level= block[ perm_scantable[next_i] ] + 64;
3871
3872                         if(next_level&(~127))
3873                             next_level= 0;
3874
3875                         if(next_i < last_non_zero)
3876                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3877                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3878                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3879                         else
3880                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3881                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3882                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3883                     }else{
3884                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3885                         if(prev_level){
3886                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3887                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3888                         }
3889                     }
3890                 }
3891
3892                 score *= lambda;
3893
3894                 unquant_change= new_coeff - old_coeff;
3895                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3896
3897                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3898                 if(score<best_score){
3899                     best_score= score;
3900                     best_coeff= i;
3901                     best_change= change;
3902                     best_unquant_change= unquant_change;
3903                 }
3904             }
3905             if(level){
3906                 prev_level= level + 64;
3907                 if(prev_level&(~127))
3908                     prev_level= 0;
3909                 prev_run= run;
3910                 run=0;
3911             }else{
3912                 run++;
3913             }
3914         }
3915 #ifdef REFINE_STATS
3916 STOP_TIMER("iterative step")}
3917 #endif
3918
3919         if(best_change){
3920             int j= perm_scantable[ best_coeff ];
3921
3922             block[j] += best_change;
3923
3924             if(best_coeff > last_non_zero){
3925                 last_non_zero= best_coeff;
3926                 assert(block[j]);
3927 #ifdef REFINE_STATS
3928 after_last++;
3929 #endif
3930             }else{
3931 #ifdef REFINE_STATS
3932 if(block[j]){
3933     if(block[j] - best_change){
3934         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3935             raise++;
3936         }else{
3937             lower++;
3938         }
3939     }else{
3940         from_zero++;
3941     }
3942 }else{
3943     to_zero++;
3944 }
3945 #endif
3946                 for(; last_non_zero>=start_i; last_non_zero--){
3947                     if(block[perm_scantable[last_non_zero]])
3948                         break;
3949                 }
3950             }
3951 #ifdef REFINE_STATS
3952 count++;
3953 if(256*256*256*64 % count == 0){
3954     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
3955 }
3956 #endif
3957             run=0;
3958             rle_index=0;
3959             for(i=start_i; i<=last_non_zero; i++){
3960                 int j= perm_scantable[i];
3961                 const int level= block[j];
3962
3963                  if(level){
3964                      run_tab[rle_index++]=run;
3965                      run=0;
3966                  }else{
3967                      run++;
3968                  }
3969             }
3970
3971             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
3972         }else{
3973             break;
3974         }
3975     }
3976 #ifdef REFINE_STATS
3977 if(last_non_zero>0){
3978 STOP_TIMER("iterative search")
3979 }
3980 }
3981 #endif
3982
3983     return last_non_zero;
3984 }
3985
3986 int ff_dct_quantize_c(MpegEncContext *s,
3987                         int16_t *block, int n,
3988                         int qscale, int *overflow)
3989 {
3990     int i, j, level, last_non_zero, q, start_i;
3991     const int *qmat;
3992     const uint8_t *scantable= s->intra_scantable.scantable;
3993     int bias;
3994     int max=0;
3995     unsigned int threshold1, threshold2;
3996
3997     s->dsp.fdct (block);
3998
3999     if(s->dct_error_sum)
4000         s->denoise_dct(s, block);
4001
4002     if (s->mb_intra) {
4003         if (!s->h263_aic) {
4004             if (n < 4)
4005                 q = s->y_dc_scale;
4006             else
4007                 q = s->c_dc_scale;
4008             q = q << 3;
4009         } else
4010             /* For AIC we skip quant/dequant of INTRADC */
4011             q = 1 << 3;
4012
4013         /* note: block[0] is assumed to be positive */
4014         block[0] = (block[0] + (q >> 1)) / q;
4015         start_i = 1;
4016         last_non_zero = 0;
4017         qmat = s->q_intra_matrix[qscale];
4018         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4019     } else {
4020         start_i = 0;
4021         last_non_zero = -1;
4022         qmat = s->q_inter_matrix[qscale];
4023         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4024     }
4025     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4026     threshold2= (threshold1<<1);
4027     for(i=63;i>=start_i;i--) {
4028         j = scantable[i];
4029         level = block[j] * qmat[j];
4030
4031         if(((unsigned)(level+threshold1))>threshold2){
4032             last_non_zero = i;
4033             break;
4034         }else{
4035             block[j]=0;
4036         }
4037     }
4038     for(i=start_i; i<=last_non_zero; i++) {
4039         j = scantable[i];
4040         level = block[j] * qmat[j];
4041
4042 //        if(   bias+level >= (1<<QMAT_SHIFT)
4043 //           || bias-level >= (1<<QMAT_SHIFT)){
4044         if(((unsigned)(level+threshold1))>threshold2){
4045             if(level>0){
4046                 level= (bias + level)>>QMAT_SHIFT;
4047                 block[j]= level;
4048             }else{
4049                 level= (bias - level)>>QMAT_SHIFT;
4050                 block[j]= -level;
4051             }
4052             max |=level;
4053         }else{
4054             block[j]=0;
4055         }
4056     }
4057     *overflow= s->max_qcoeff < max; //overflow might have happened
4058
4059     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4060     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4061         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4062
4063     return last_non_zero;
4064 }
4065
4066 #define OFFSET(x) offsetof(MpegEncContext, x)
4067 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4068 static const AVOption h263_options[] = {
4069     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4070     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4071     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4072     FF_MPV_COMMON_OPTS
4073     { NULL },
4074 };
4075
4076 static const AVClass h263_class = {
4077     .class_name = "H.263 encoder",
4078     .item_name  = av_default_item_name,
4079     .option     = h263_options,
4080     .version    = LIBAVUTIL_VERSION_INT,
4081 };
4082
4083 AVCodec ff_h263_encoder = {
4084     .name           = "h263",
4085     .type           = AVMEDIA_TYPE_VIDEO,
4086     .id             = AV_CODEC_ID_H263,
4087     .priv_data_size = sizeof(MpegEncContext),
4088     .init           = ff_MPV_encode_init,
4089     .encode2        = ff_MPV_encode_picture,
4090     .close          = ff_MPV_encode_end,
4091     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4092     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4093     .priv_class     = &h263_class,
4094 };
4095
4096 static const AVOption h263p_options[] = {
4097     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4098     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4099     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4100     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4101     FF_MPV_COMMON_OPTS
4102     { NULL },
4103 };
4104 static const AVClass h263p_class = {
4105     .class_name = "H.263p encoder",
4106     .item_name  = av_default_item_name,
4107     .option     = h263p_options,
4108     .version    = LIBAVUTIL_VERSION_INT,
4109 };
4110
4111 AVCodec ff_h263p_encoder = {
4112     .name           = "h263p",
4113     .type           = AVMEDIA_TYPE_VIDEO,
4114     .id             = AV_CODEC_ID_H263P,
4115     .priv_data_size = sizeof(MpegEncContext),
4116     .init           = ff_MPV_encode_init,
4117     .encode2        = ff_MPV_encode_picture,
4118     .close          = ff_MPV_encode_end,
4119     .capabilities   = CODEC_CAP_SLICE_THREADS,
4120     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4121     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4122     .priv_class     = &h263p_class,
4123 };
4124
4125 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4126
4127 AVCodec ff_msmpeg4v2_encoder = {
4128     .name           = "msmpeg4v2",
4129     .type           = AVMEDIA_TYPE_VIDEO,
4130     .id             = AV_CODEC_ID_MSMPEG4V2,
4131     .priv_data_size = sizeof(MpegEncContext),
4132     .init           = ff_MPV_encode_init,
4133     .encode2        = ff_MPV_encode_picture,
4134     .close          = ff_MPV_encode_end,
4135     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4136     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4137     .priv_class     = &msmpeg4v2_class,
4138 };
4139
4140 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4141
4142 AVCodec ff_msmpeg4v3_encoder = {
4143     .name           = "msmpeg4",
4144     .type           = AVMEDIA_TYPE_VIDEO,
4145     .id             = AV_CODEC_ID_MSMPEG4V3,
4146     .priv_data_size = sizeof(MpegEncContext),
4147     .init           = ff_MPV_encode_init,
4148     .encode2        = ff_MPV_encode_picture,
4149     .close          = ff_MPV_encode_end,
4150     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4151     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4152     .priv_class     = &msmpeg4v3_class,
4153 };
4154
4155 FF_MPV_GENERIC_CLASS(wmv1)
4156
4157 AVCodec ff_wmv1_encoder = {
4158     .name           = "wmv1",
4159     .type           = AVMEDIA_TYPE_VIDEO,
4160     .id             = AV_CODEC_ID_WMV1,
4161     .priv_data_size = sizeof(MpegEncContext),
4162     .init           = ff_MPV_encode_init,
4163     .encode2        = ff_MPV_encode_picture,
4164     .close          = ff_MPV_encode_end,
4165     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4166     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4167     .priv_class     = &wmv1_class,
4168 };