]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
lavc: make error_rate a private option of mpegvideo encoders
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/internal.h"
31 #include "libavutil/intmath.h"
32 #include "libavutil/mathematics.h"
33 #include "libavutil/pixdesc.h"
34 #include "libavutil/opt.h"
35 #include "avcodec.h"
36 #include "dct.h"
37 #include "dsputil.h"
38 #include "mpeg12.h"
39 #include "mpegvideo.h"
40 #include "h261.h"
41 #include "h263.h"
42 #include "mathops.h"
43 #include "mjpegenc.h"
44 #include "msmpeg4.h"
45 #include "faandct.h"
46 #include "thread.h"
47 #include "aandcttab.h"
48 #include "flv.h"
49 #include "mpeg4video.h"
50 #include "internal.h"
51 #include "bytestream.h"
52 #include <limits.h>
53
54 static int encode_picture(MpegEncContext *s, int picture_number);
55 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
56 static int sse_mb(MpegEncContext *s);
57 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
58 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
59
60 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
61 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
62
63 const AVOption ff_mpv_generic_options[] = {
64     FF_MPV_COMMON_OPTS
65     { NULL },
66 };
67
68 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
69                        uint16_t (*qmat16)[2][64],
70                        const uint16_t *quant_matrix,
71                        int bias, int qmin, int qmax, int intra)
72 {
73     int qscale;
74     int shift = 0;
75
76     for (qscale = qmin; qscale <= qmax; qscale++) {
77         int i;
78         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
79             dsp->fdct == ff_jpeg_fdct_islow_10 ||
80             dsp->fdct == ff_faandct) {
81             for (i = 0; i < 64; i++) {
82                 const int j = dsp->idct_permutation[i];
83                 /* 16 <= qscale * quant_matrix[i] <= 7905
84                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
85                  *             19952 <=              x  <= 249205026
86                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
87                  *           3444240 >= (1 << 36) / (x) >= 275 */
88
89                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
90                                         (qscale * quant_matrix[j]));
91             }
92         } else if (dsp->fdct == ff_fdct_ifast) {
93             for (i = 0; i < 64; i++) {
94                 const int j = dsp->idct_permutation[i];
95                 /* 16 <= qscale * quant_matrix[i] <= 7905
96                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
97                  *             19952 <=              x  <= 249205026
98                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
99                  *           3444240 >= (1 << 36) / (x) >= 275 */
100
101                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
102                                         (ff_aanscales[i] * qscale *
103                                          quant_matrix[j]));
104             }
105         } else {
106             for (i = 0; i < 64; i++) {
107                 const int j = dsp->idct_permutation[i];
108                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
109                  * Assume x = qscale * quant_matrix[i]
110                  * So             16 <=              x  <= 7905
111                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
112                  * so          32768 >= (1 << 19) / (x) >= 67 */
113                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
114                                         (qscale * quant_matrix[j]));
115                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
116                 //                    (qscale * quant_matrix[i]);
117                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
118                                        (qscale * quant_matrix[j]);
119
120                 if (qmat16[qscale][0][i] == 0 ||
121                     qmat16[qscale][0][i] == 128 * 256)
122                     qmat16[qscale][0][i] = 128 * 256 - 1;
123                 qmat16[qscale][1][i] =
124                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
125                                 qmat16[qscale][0][i]);
126             }
127         }
128
129         for (i = intra; i < 64; i++) {
130             int64_t max = 8191;
131             if (dsp->fdct == ff_fdct_ifast) {
132                 max = (8191LL * ff_aanscales[i]) >> 14;
133             }
134             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
135                 shift++;
136             }
137         }
138     }
139     if (shift) {
140         av_log(NULL, AV_LOG_INFO,
141                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
142                QMAT_SHIFT - shift);
143     }
144 }
145
146 static inline void update_qscale(MpegEncContext *s)
147 {
148     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
149                 (FF_LAMBDA_SHIFT + 7);
150     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
151
152     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
153                  FF_LAMBDA_SHIFT;
154 }
155
156 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
157 {
158     int i;
159
160     if (matrix) {
161         put_bits(pb, 1, 1);
162         for (i = 0; i < 64; i++) {
163             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
164         }
165     } else
166         put_bits(pb, 1, 0);
167 }
168
169 /**
170  * init s->current_picture.qscale_table from s->lambda_table
171  */
172 void ff_init_qscale_tab(MpegEncContext *s)
173 {
174     int8_t * const qscale_table = s->current_picture.qscale_table;
175     int i;
176
177     for (i = 0; i < s->mb_num; i++) {
178         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
179         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
180         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
181                                                   s->avctx->qmax);
182     }
183 }
184
185 static void update_duplicate_context_after_me(MpegEncContext *dst,
186                                               MpegEncContext *src)
187 {
188 #define COPY(a) dst->a= src->a
189     COPY(pict_type);
190     COPY(current_picture);
191     COPY(f_code);
192     COPY(b_code);
193     COPY(qscale);
194     COPY(lambda);
195     COPY(lambda2);
196     COPY(picture_in_gop_number);
197     COPY(gop_picture_number);
198     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
199     COPY(progressive_frame);    // FIXME don't set in encode_header
200     COPY(partitioned_frame);    // FIXME don't set in encode_header
201 #undef COPY
202 }
203
204 /**
205  * Set the given MpegEncContext to defaults for encoding.
206  * the changed fields will not depend upon the prior state of the MpegEncContext.
207  */
208 static void MPV_encode_defaults(MpegEncContext *s)
209 {
210     int i;
211     ff_MPV_common_defaults(s);
212
213     for (i = -16; i < 16; i++) {
214         default_fcode_tab[i + MAX_MV] = 1;
215     }
216     s->me.mv_penalty = default_mv_penalty;
217     s->fcode_tab     = default_fcode_tab;
218 }
219
220 /* init video encoder */
221 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
222 {
223     MpegEncContext *s = avctx->priv_data;
224     int i;
225     int chroma_h_shift, chroma_v_shift;
226
227     MPV_encode_defaults(s);
228
229     switch (avctx->codec_id) {
230     case AV_CODEC_ID_MPEG2VIDEO:
231         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
232             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
233             av_log(avctx, AV_LOG_ERROR,
234                    "only YUV420 and YUV422 are supported\n");
235             return -1;
236         }
237         break;
238     case AV_CODEC_ID_LJPEG:
239         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
240             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
241             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
242             avctx->pix_fmt != AV_PIX_FMT_BGRA     &&
243             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
244               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
245               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
246              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
247             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
248             return -1;
249         }
250         break;
251     case AV_CODEC_ID_MJPEG:
252         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
253             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
254             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
255               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
256              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
257             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
258             return -1;
259         }
260         break;
261     default:
262         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
263             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
264             return -1;
265         }
266     }
267
268     switch (avctx->pix_fmt) {
269     case AV_PIX_FMT_YUVJ422P:
270     case AV_PIX_FMT_YUV422P:
271         s->chroma_format = CHROMA_422;
272         break;
273     case AV_PIX_FMT_YUVJ420P:
274     case AV_PIX_FMT_YUV420P:
275     default:
276         s->chroma_format = CHROMA_420;
277         break;
278     }
279
280     s->bit_rate = avctx->bit_rate;
281     s->width    = avctx->width;
282     s->height   = avctx->height;
283     if (avctx->gop_size > 600 &&
284         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
285         av_log(avctx, AV_LOG_ERROR,
286                "Warning keyframe interval too large! reducing it ...\n");
287         avctx->gop_size = 600;
288     }
289     s->gop_size     = avctx->gop_size;
290     s->avctx        = avctx;
291     s->flags        = avctx->flags;
292     s->flags2       = avctx->flags2;
293     s->max_b_frames = avctx->max_b_frames;
294     s->codec_id     = avctx->codec->id;
295     s->strict_std_compliance = avctx->strict_std_compliance;
296     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
297     s->mpeg_quant         = avctx->mpeg_quant;
298     s->rtp_mode           = !!avctx->rtp_payload_size;
299     s->intra_dc_precision = avctx->intra_dc_precision;
300     s->user_specified_pts = AV_NOPTS_VALUE;
301
302     if (s->gop_size <= 1) {
303         s->intra_only = 1;
304         s->gop_size   = 12;
305     } else {
306         s->intra_only = 0;
307     }
308
309     s->me_method = avctx->me_method;
310
311     /* Fixed QSCALE */
312     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
313
314     s->adaptive_quant = (s->avctx->lumi_masking ||
315                          s->avctx->dark_masking ||
316                          s->avctx->temporal_cplx_masking ||
317                          s->avctx->spatial_cplx_masking  ||
318                          s->avctx->p_masking      ||
319                          s->avctx->border_masking ||
320                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
321                         !s->fixed_qscale;
322
323     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
324
325     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
326         av_log(avctx, AV_LOG_ERROR,
327                "a vbv buffer size is needed, "
328                "for encoding with a maximum bitrate\n");
329         return -1;
330     }
331
332     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
333         av_log(avctx, AV_LOG_INFO,
334                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
335     }
336
337     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
338         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
339         return -1;
340     }
341
342     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
343         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
344         return -1;
345     }
346
347     if (avctx->rc_max_rate &&
348         avctx->rc_max_rate == avctx->bit_rate &&
349         avctx->rc_max_rate != avctx->rc_min_rate) {
350         av_log(avctx, AV_LOG_INFO,
351                "impossible bitrate constraints, this will fail\n");
352     }
353
354     if (avctx->rc_buffer_size &&
355         avctx->bit_rate * (int64_t)avctx->time_base.num >
356             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
357         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
358         return -1;
359     }
360
361     if (!s->fixed_qscale &&
362         avctx->bit_rate * av_q2d(avctx->time_base) >
363             avctx->bit_rate_tolerance) {
364         av_log(avctx, AV_LOG_ERROR,
365                "bitrate tolerance too small for bitrate\n");
366         return -1;
367     }
368
369     if (s->avctx->rc_max_rate &&
370         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
371         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
372          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
373         90000LL * (avctx->rc_buffer_size - 1) >
374             s->avctx->rc_max_rate * 0xFFFFLL) {
375         av_log(avctx, AV_LOG_INFO,
376                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
377                "specified vbv buffer is too large for the given bitrate!\n");
378     }
379
380     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
381         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
382         s->codec_id != AV_CODEC_ID_FLV1) {
383         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
384         return -1;
385     }
386
387     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
388         av_log(avctx, AV_LOG_ERROR,
389                "OBMC is only supported with simple mb decision\n");
390         return -1;
391     }
392
393     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
394         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
395         return -1;
396     }
397
398     if (s->max_b_frames                    &&
399         s->codec_id != AV_CODEC_ID_MPEG4      &&
400         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
401         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
402         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
403         return -1;
404     }
405
406     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
407          s->codec_id == AV_CODEC_ID_H263  ||
408          s->codec_id == AV_CODEC_ID_H263P) &&
409         (avctx->sample_aspect_ratio.num > 255 ||
410          avctx->sample_aspect_ratio.den > 255)) {
411         av_log(avctx, AV_LOG_ERROR,
412                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
413                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
414         return -1;
415     }
416
417     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
418         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
419         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
420         return -1;
421     }
422
423     // FIXME mpeg2 uses that too
424     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
425         av_log(avctx, AV_LOG_ERROR,
426                "mpeg2 style quantization not supported by codec\n");
427         return -1;
428     }
429
430     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
431         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
432         return -1;
433     }
434
435     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
436         s->avctx->mb_decision != FF_MB_DECISION_RD) {
437         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
438         return -1;
439     }
440
441     if (s->avctx->scenechange_threshold < 1000000000 &&
442         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
443         av_log(avctx, AV_LOG_ERROR,
444                "closed gop with scene change detection are not supported yet, "
445                "set threshold to 1000000000\n");
446         return -1;
447     }
448
449     if (s->flags & CODEC_FLAG_LOW_DELAY) {
450         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
451             av_log(avctx, AV_LOG_ERROR,
452                   "low delay forcing is only available for mpeg2\n");
453             return -1;
454         }
455         if (s->max_b_frames != 0) {
456             av_log(avctx, AV_LOG_ERROR,
457                    "b frames cannot be used with low delay\n");
458             return -1;
459         }
460     }
461
462     if (s->q_scale_type == 1) {
463         if (avctx->qmax > 12) {
464             av_log(avctx, AV_LOG_ERROR,
465                    "non linear quant only supports qmax <= 12 currently\n");
466             return -1;
467         }
468     }
469
470     if (s->avctx->thread_count > 1         &&
471         s->codec_id != AV_CODEC_ID_MPEG4      &&
472         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
473         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
474         (s->codec_id != AV_CODEC_ID_H263P)) {
475         av_log(avctx, AV_LOG_ERROR,
476                "multi threaded encoding not supported by codec\n");
477         return -1;
478     }
479
480     if (s->avctx->thread_count < 1) {
481         av_log(avctx, AV_LOG_ERROR,
482                "automatic thread number detection not supported by codec,"
483                "patch welcome\n");
484         return -1;
485     }
486
487     if (s->avctx->thread_count > 1)
488         s->rtp_mode = 1;
489
490     if (!avctx->time_base.den || !avctx->time_base.num) {
491         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
492         return -1;
493     }
494
495     i = (INT_MAX / 2 + 128) >> 8;
496     if (avctx->mb_threshold >= i) {
497         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
498                i - 1);
499         return -1;
500     }
501
502     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
503         av_log(avctx, AV_LOG_INFO,
504                "notice: b_frame_strategy only affects the first pass\n");
505         avctx->b_frame_strategy = 0;
506     }
507
508     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
509     if (i > 1) {
510         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
511         avctx->time_base.den /= i;
512         avctx->time_base.num /= i;
513         //return -1;
514     }
515
516     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
517         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
518         // (a + x * 3 / 8) / x
519         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
520         s->inter_quant_bias = 0;
521     } else {
522         s->intra_quant_bias = 0;
523         // (a - x / 4) / x
524         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
525     }
526
527     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
528         s->intra_quant_bias = avctx->intra_quant_bias;
529     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
530         s->inter_quant_bias = avctx->inter_quant_bias;
531
532     av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
533                                      &chroma_v_shift);
534
535     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
536         s->avctx->time_base.den > (1 << 16) - 1) {
537         av_log(avctx, AV_LOG_ERROR,
538                "timebase %d/%d not supported by MPEG 4 standard, "
539                "the maximum admitted value for the timebase denominator "
540                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
541                (1 << 16) - 1);
542         return -1;
543     }
544     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
545
546     switch (avctx->codec->id) {
547     case AV_CODEC_ID_MPEG1VIDEO:
548         s->out_format = FMT_MPEG1;
549         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
550         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
551         break;
552     case AV_CODEC_ID_MPEG2VIDEO:
553         s->out_format = FMT_MPEG1;
554         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
555         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
556         s->rtp_mode   = 1;
557         break;
558     case AV_CODEC_ID_LJPEG:
559     case AV_CODEC_ID_MJPEG:
560         s->out_format = FMT_MJPEG;
561         s->intra_only = 1; /* force intra only for jpeg */
562         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
563             avctx->pix_fmt   == AV_PIX_FMT_BGRA) {
564             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
565             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
566             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
567         } else {
568             s->mjpeg_vsample[0] = 2;
569             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
570             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
571             s->mjpeg_hsample[0] = 2;
572             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
573             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
574         }
575         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
576             ff_mjpeg_encode_init(s) < 0)
577             return -1;
578         avctx->delay = 0;
579         s->low_delay = 1;
580         break;
581     case AV_CODEC_ID_H261:
582         if (!CONFIG_H261_ENCODER)
583             return -1;
584         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
585             av_log(avctx, AV_LOG_ERROR,
586                    "The specified picture size of %dx%d is not valid for the "
587                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
588                     s->width, s->height);
589             return -1;
590         }
591         s->out_format = FMT_H261;
592         avctx->delay  = 0;
593         s->low_delay  = 1;
594         break;
595     case AV_CODEC_ID_H263:
596         if (!CONFIG_H263_ENCODER)
597         return -1;
598         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
599                              s->width, s->height) == 8) {
600             av_log(avctx, AV_LOG_INFO,
601                    "The specified picture size of %dx%d is not valid for "
602                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
603                    "352x288, 704x576, and 1408x1152."
604                    "Try H.263+.\n", s->width, s->height);
605             return -1;
606         }
607         s->out_format = FMT_H263;
608         avctx->delay  = 0;
609         s->low_delay  = 1;
610         break;
611     case AV_CODEC_ID_H263P:
612         s->out_format = FMT_H263;
613         s->h263_plus  = 1;
614         /* Fx */
615         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
616         s->modified_quant  = s->h263_aic;
617         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
618         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
619
620         /* /Fx */
621         /* These are just to be sure */
622         avctx->delay = 0;
623         s->low_delay = 1;
624         break;
625     case AV_CODEC_ID_FLV1:
626         s->out_format      = FMT_H263;
627         s->h263_flv        = 2; /* format = 1; 11-bit codes */
628         s->unrestricted_mv = 1;
629         s->rtp_mode  = 0; /* don't allow GOB */
630         avctx->delay = 0;
631         s->low_delay = 1;
632         break;
633     case AV_CODEC_ID_RV10:
634         s->out_format = FMT_H263;
635         avctx->delay  = 0;
636         s->low_delay  = 1;
637         break;
638     case AV_CODEC_ID_RV20:
639         s->out_format      = FMT_H263;
640         avctx->delay       = 0;
641         s->low_delay       = 1;
642         s->modified_quant  = 1;
643         s->h263_aic        = 1;
644         s->h263_plus       = 1;
645         s->loop_filter     = 1;
646         s->unrestricted_mv = 0;
647         break;
648     case AV_CODEC_ID_MPEG4:
649         s->out_format      = FMT_H263;
650         s->h263_pred       = 1;
651         s->unrestricted_mv = 1;
652         s->low_delay       = s->max_b_frames ? 0 : 1;
653         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
654         break;
655     case AV_CODEC_ID_MSMPEG4V2:
656         s->out_format      = FMT_H263;
657         s->h263_pred       = 1;
658         s->unrestricted_mv = 1;
659         s->msmpeg4_version = 2;
660         avctx->delay       = 0;
661         s->low_delay       = 1;
662         break;
663     case AV_CODEC_ID_MSMPEG4V3:
664         s->out_format        = FMT_H263;
665         s->h263_pred         = 1;
666         s->unrestricted_mv   = 1;
667         s->msmpeg4_version   = 3;
668         s->flipflop_rounding = 1;
669         avctx->delay         = 0;
670         s->low_delay         = 1;
671         break;
672     case AV_CODEC_ID_WMV1:
673         s->out_format        = FMT_H263;
674         s->h263_pred         = 1;
675         s->unrestricted_mv   = 1;
676         s->msmpeg4_version   = 4;
677         s->flipflop_rounding = 1;
678         avctx->delay         = 0;
679         s->low_delay         = 1;
680         break;
681     case AV_CODEC_ID_WMV2:
682         s->out_format        = FMT_H263;
683         s->h263_pred         = 1;
684         s->unrestricted_mv   = 1;
685         s->msmpeg4_version   = 5;
686         s->flipflop_rounding = 1;
687         avctx->delay         = 0;
688         s->low_delay         = 1;
689         break;
690     default:
691         return -1;
692     }
693
694     avctx->has_b_frames = !s->low_delay;
695
696     s->encoding = 1;
697
698     s->progressive_frame    =
699     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
700                                                 CODEC_FLAG_INTERLACED_ME) ||
701                                 s->alternate_scan);
702
703     /* init */
704     if (ff_MPV_common_init(s) < 0)
705         return -1;
706
707     if (ARCH_X86)
708         ff_MPV_encode_init_x86(s);
709
710     ff_h263dsp_init(&s->h263dsp);
711     if (!s->dct_quantize)
712         s->dct_quantize = ff_dct_quantize_c;
713     if (!s->denoise_dct)
714         s->denoise_dct  = denoise_dct_c;
715     s->fast_dct_quantize = s->dct_quantize;
716     if (avctx->trellis)
717         s->dct_quantize  = dct_quantize_trellis_c;
718
719     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
720         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
721
722     s->quant_precision = 5;
723
724     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
725     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
726
727     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
728         ff_h261_encode_init(s);
729     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
730         ff_h263_encode_init(s);
731     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
732         ff_msmpeg4_encode_init(s);
733     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
734         && s->out_format == FMT_MPEG1)
735         ff_mpeg1_encode_init(s);
736
737     /* init q matrix */
738     for (i = 0; i < 64; i++) {
739         int j = s->dsp.idct_permutation[i];
740         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
741             s->mpeg_quant) {
742             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
743             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
744         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
745             s->intra_matrix[j] =
746             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
747         } else {
748             /* mpeg1/2 */
749             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
750             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
751         }
752         if (s->avctx->intra_matrix)
753             s->intra_matrix[j] = s->avctx->intra_matrix[i];
754         if (s->avctx->inter_matrix)
755             s->inter_matrix[j] = s->avctx->inter_matrix[i];
756     }
757
758     /* precompute matrix */
759     /* for mjpeg, we do include qscale in the matrix */
760     if (s->out_format != FMT_MJPEG) {
761         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
762                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
763                           31, 1);
764         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
765                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
766                           31, 0);
767     }
768
769     if (ff_rate_control_init(s) < 0)
770         return -1;
771
772 #if FF_API_ERROR_RATE
773     FF_DISABLE_DEPRECATION_WARNINGS
774     if (avctx->error_rate)
775         s->error_rate = avctx->error_rate;
776     FF_ENABLE_DEPRECATION_WARNINGS;
777 #endif
778
779     return 0;
780 }
781
782 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
783 {
784     MpegEncContext *s = avctx->priv_data;
785
786     ff_rate_control_uninit(s);
787
788     ff_MPV_common_end(s);
789     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
790         s->out_format == FMT_MJPEG)
791         ff_mjpeg_encode_close(s);
792
793     av_freep(&avctx->extradata);
794
795     return 0;
796 }
797
798 static int get_sae(uint8_t *src, int ref, int stride)
799 {
800     int x,y;
801     int acc = 0;
802
803     for (y = 0; y < 16; y++) {
804         for (x = 0; x < 16; x++) {
805             acc += FFABS(src[x + y * stride] - ref);
806         }
807     }
808
809     return acc;
810 }
811
812 static int get_intra_count(MpegEncContext *s, uint8_t *src,
813                            uint8_t *ref, int stride)
814 {
815     int x, y, w, h;
816     int acc = 0;
817
818     w = s->width  & ~15;
819     h = s->height & ~15;
820
821     for (y = 0; y < h; y += 16) {
822         for (x = 0; x < w; x += 16) {
823             int offset = x + y * stride;
824             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
825                                      16);
826             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
827             int sae  = get_sae(src + offset, mean, stride);
828
829             acc += sae + 500 < sad;
830         }
831     }
832     return acc;
833 }
834
835
836 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
837 {
838     Picture *pic = NULL;
839     int64_t pts;
840     int i, display_picture_number = 0, ret;
841     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
842                                                  (s->low_delay ? 0 : 1);
843     int direct = 1;
844
845     if (pic_arg) {
846         pts = pic_arg->pts;
847         display_picture_number = s->input_picture_number++;
848
849         if (pts != AV_NOPTS_VALUE) {
850             if (s->user_specified_pts != AV_NOPTS_VALUE) {
851                 int64_t time = pts;
852                 int64_t last = s->user_specified_pts;
853
854                 if (time <= last) {
855                     av_log(s->avctx, AV_LOG_ERROR,
856                            "Error, Invalid timestamp=%"PRId64", "
857                            "last=%"PRId64"\n", pts, s->user_specified_pts);
858                     return -1;
859                 }
860
861                 if (!s->low_delay && display_picture_number == 1)
862                     s->dts_delta = time - last;
863             }
864             s->user_specified_pts = pts;
865         } else {
866             if (s->user_specified_pts != AV_NOPTS_VALUE) {
867                 s->user_specified_pts =
868                 pts = s->user_specified_pts + 1;
869                 av_log(s->avctx, AV_LOG_INFO,
870                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
871                        pts);
872             } else {
873                 pts = display_picture_number;
874             }
875         }
876     }
877
878     if (pic_arg) {
879         if (!pic_arg->buf[0]);
880             direct = 0;
881         if (pic_arg->linesize[0] != s->linesize)
882             direct = 0;
883         if (pic_arg->linesize[1] != s->uvlinesize)
884             direct = 0;
885         if (pic_arg->linesize[2] != s->uvlinesize)
886             direct = 0;
887
888         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
889                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
890
891         if (direct) {
892             i = ff_find_unused_picture(s, 1);
893             if (i < 0)
894                 return i;
895
896             pic = &s->picture[i];
897             pic->reference = 3;
898
899             if ((ret = av_frame_ref(&pic->f, pic_arg)) < 0)
900                 return ret;
901             if (ff_alloc_picture(s, pic, 1) < 0) {
902                 return -1;
903             }
904         } else {
905             i = ff_find_unused_picture(s, 0);
906             if (i < 0)
907                 return i;
908
909             pic = &s->picture[i];
910             pic->reference = 3;
911
912             if (ff_alloc_picture(s, pic, 0) < 0) {
913                 return -1;
914             }
915
916             if (pic->f.data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
917                 pic->f.data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
918                 pic->f.data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
919                 // empty
920             } else {
921                 int h_chroma_shift, v_chroma_shift;
922                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
923                                                  &h_chroma_shift,
924                                                  &v_chroma_shift);
925
926                 for (i = 0; i < 3; i++) {
927                     int src_stride = pic_arg->linesize[i];
928                     int dst_stride = i ? s->uvlinesize : s->linesize;
929                     int h_shift = i ? h_chroma_shift : 0;
930                     int v_shift = i ? v_chroma_shift : 0;
931                     int w = s->width  >> h_shift;
932                     int h = s->height >> v_shift;
933                     uint8_t *src = pic_arg->data[i];
934                     uint8_t *dst = pic->f.data[i];
935
936                     if (!s->avctx->rc_buffer_size)
937                         dst += INPLACE_OFFSET;
938
939                     if (src_stride == dst_stride)
940                         memcpy(dst, src, src_stride * h);
941                     else {
942                         while (h--) {
943                             memcpy(dst, src, w);
944                             dst += dst_stride;
945                             src += src_stride;
946                         }
947                     }
948                 }
949             }
950         }
951         ret = av_frame_copy_props(&pic->f, pic_arg);
952         if (ret < 0)
953             return ret;
954
955         pic->f.display_picture_number = display_picture_number;
956         pic->f.pts = pts; // we set this here to avoid modifiying pic_arg
957     }
958
959     /* shift buffer entries */
960     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
961         s->input_picture[i - 1] = s->input_picture[i];
962
963     s->input_picture[encoding_delay] = (Picture*) pic;
964
965     return 0;
966 }
967
968 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
969 {
970     int x, y, plane;
971     int score = 0;
972     int64_t score64 = 0;
973
974     for (plane = 0; plane < 3; plane++) {
975         const int stride = p->f.linesize[plane];
976         const int bw = plane ? 1 : 2;
977         for (y = 0; y < s->mb_height * bw; y++) {
978             for (x = 0; x < s->mb_width * bw; x++) {
979                 int off = p->shared ? 0 : 16;
980                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
981                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
982                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
983
984                 switch (s->avctx->frame_skip_exp) {
985                 case 0: score    =  FFMAX(score, v);          break;
986                 case 1: score   += FFABS(v);                  break;
987                 case 2: score   += v * v;                     break;
988                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
989                 case 4: score64 += v * v * (int64_t)(v * v);  break;
990                 }
991             }
992         }
993     }
994
995     if (score)
996         score64 = score;
997
998     if (score64 < s->avctx->frame_skip_threshold)
999         return 1;
1000     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1001         return 1;
1002     return 0;
1003 }
1004
1005 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1006 {
1007     AVPacket pkt = { 0 };
1008     int ret, got_output;
1009
1010     av_init_packet(&pkt);
1011     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1012     if (ret < 0)
1013         return ret;
1014
1015     ret = pkt.size;
1016     av_free_packet(&pkt);
1017     return ret;
1018 }
1019
1020 static int estimate_best_b_count(MpegEncContext *s)
1021 {
1022     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1023     AVCodecContext *c = avcodec_alloc_context3(NULL);
1024     AVFrame input[FF_MAX_B_FRAMES + 2];
1025     const int scale = s->avctx->brd_scale;
1026     int i, j, out_size, p_lambda, b_lambda, lambda2;
1027     int64_t best_rd  = INT64_MAX;
1028     int best_b_count = -1;
1029
1030     assert(scale >= 0 && scale <= 3);
1031
1032     //emms_c();
1033     //s->next_picture_ptr->quality;
1034     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1035     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1036     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1037     if (!b_lambda) // FIXME we should do this somewhere else
1038         b_lambda = p_lambda;
1039     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1040                FF_LAMBDA_SHIFT;
1041
1042     c->width        = s->width  >> scale;
1043     c->height       = s->height >> scale;
1044     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1045                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1046     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1047     c->mb_decision  = s->avctx->mb_decision;
1048     c->me_cmp       = s->avctx->me_cmp;
1049     c->mb_cmp       = s->avctx->mb_cmp;
1050     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1051     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1052     c->time_base    = s->avctx->time_base;
1053     c->max_b_frames = s->max_b_frames;
1054
1055     if (avcodec_open2(c, codec, NULL) < 0)
1056         return -1;
1057
1058     for (i = 0; i < s->max_b_frames + 2; i++) {
1059         int ysize = c->width * c->height;
1060         int csize = (c->width / 2) * (c->height / 2);
1061         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1062                                                 s->next_picture_ptr;
1063
1064         avcodec_get_frame_defaults(&input[i]);
1065         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1066         input[i].data[1]     = input[i].data[0] + ysize;
1067         input[i].data[2]     = input[i].data[1] + csize;
1068         input[i].linesize[0] = c->width;
1069         input[i].linesize[1] =
1070         input[i].linesize[2] = c->width / 2;
1071
1072         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1073             pre_input = *pre_input_ptr;
1074
1075             if (!pre_input.shared && i) {
1076                 pre_input.f.data[0] += INPLACE_OFFSET;
1077                 pre_input.f.data[1] += INPLACE_OFFSET;
1078                 pre_input.f.data[2] += INPLACE_OFFSET;
1079             }
1080
1081             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1082                                  pre_input.f.data[0], pre_input.f.linesize[0],
1083                                  c->width,      c->height);
1084             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1085                                  pre_input.f.data[1], pre_input.f.linesize[1],
1086                                  c->width >> 1, c->height >> 1);
1087             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1088                                  pre_input.f.data[2], pre_input.f.linesize[2],
1089                                  c->width >> 1, c->height >> 1);
1090         }
1091     }
1092
1093     for (j = 0; j < s->max_b_frames + 1; j++) {
1094         int64_t rd = 0;
1095
1096         if (!s->input_picture[j])
1097             break;
1098
1099         c->error[0] = c->error[1] = c->error[2] = 0;
1100
1101         input[0].pict_type = AV_PICTURE_TYPE_I;
1102         input[0].quality   = 1 * FF_QP2LAMBDA;
1103
1104         out_size = encode_frame(c, &input[0]);
1105
1106         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1107
1108         for (i = 0; i < s->max_b_frames + 1; i++) {
1109             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1110
1111             input[i + 1].pict_type = is_p ?
1112                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1113             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1114
1115             out_size = encode_frame(c, &input[i + 1]);
1116
1117             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1118         }
1119
1120         /* get the delayed frames */
1121         while (out_size) {
1122             out_size = encode_frame(c, NULL);
1123             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1124         }
1125
1126         rd += c->error[0] + c->error[1] + c->error[2];
1127
1128         if (rd < best_rd) {
1129             best_rd = rd;
1130             best_b_count = j;
1131         }
1132     }
1133
1134     avcodec_close(c);
1135     av_freep(&c);
1136
1137     for (i = 0; i < s->max_b_frames + 2; i++) {
1138         av_freep(&input[i].data[0]);
1139     }
1140
1141     return best_b_count;
1142 }
1143
1144 static int select_input_picture(MpegEncContext *s)
1145 {
1146     int i, ret;
1147
1148     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1149         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1150     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1151
1152     /* set next picture type & ordering */
1153     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1154         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1155             s->next_picture_ptr == NULL || s->intra_only) {
1156             s->reordered_input_picture[0] = s->input_picture[0];
1157             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1158             s->reordered_input_picture[0]->f.coded_picture_number =
1159                 s->coded_picture_number++;
1160         } else {
1161             int b_frames;
1162
1163             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1164                 if (s->picture_in_gop_number < s->gop_size &&
1165                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1166                     // FIXME check that te gop check above is +-1 correct
1167                     av_frame_unref(&s->input_picture[0]->f);
1168
1169                     emms_c();
1170                     ff_vbv_update(s, 0);
1171
1172                     goto no_output_pic;
1173                 }
1174             }
1175
1176             if (s->flags & CODEC_FLAG_PASS2) {
1177                 for (i = 0; i < s->max_b_frames + 1; i++) {
1178                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1179
1180                     if (pict_num >= s->rc_context.num_entries)
1181                         break;
1182                     if (!s->input_picture[i]) {
1183                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1184                         break;
1185                     }
1186
1187                     s->input_picture[i]->f.pict_type =
1188                         s->rc_context.entry[pict_num].new_pict_type;
1189                 }
1190             }
1191
1192             if (s->avctx->b_frame_strategy == 0) {
1193                 b_frames = s->max_b_frames;
1194                 while (b_frames && !s->input_picture[b_frames])
1195                     b_frames--;
1196             } else if (s->avctx->b_frame_strategy == 1) {
1197                 for (i = 1; i < s->max_b_frames + 1; i++) {
1198                     if (s->input_picture[i] &&
1199                         s->input_picture[i]->b_frame_score == 0) {
1200                         s->input_picture[i]->b_frame_score =
1201                             get_intra_count(s,
1202                                             s->input_picture[i    ]->f.data[0],
1203                                             s->input_picture[i - 1]->f.data[0],
1204                                             s->linesize) + 1;
1205                     }
1206                 }
1207                 for (i = 0; i < s->max_b_frames + 1; i++) {
1208                     if (s->input_picture[i] == NULL ||
1209                         s->input_picture[i]->b_frame_score - 1 >
1210                             s->mb_num / s->avctx->b_sensitivity)
1211                         break;
1212                 }
1213
1214                 b_frames = FFMAX(0, i - 1);
1215
1216                 /* reset scores */
1217                 for (i = 0; i < b_frames + 1; i++) {
1218                     s->input_picture[i]->b_frame_score = 0;
1219                 }
1220             } else if (s->avctx->b_frame_strategy == 2) {
1221                 b_frames = estimate_best_b_count(s);
1222             } else {
1223                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1224                 b_frames = 0;
1225             }
1226
1227             emms_c();
1228
1229             for (i = b_frames - 1; i >= 0; i--) {
1230                 int type = s->input_picture[i]->f.pict_type;
1231                 if (type && type != AV_PICTURE_TYPE_B)
1232                     b_frames = i;
1233             }
1234             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1235                 b_frames == s->max_b_frames) {
1236                 av_log(s->avctx, AV_LOG_ERROR,
1237                        "warning, too many b frames in a row\n");
1238             }
1239
1240             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1241                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1242                     s->gop_size > s->picture_in_gop_number) {
1243                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1244                 } else {
1245                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1246                         b_frames = 0;
1247                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1248                 }
1249             }
1250
1251             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1252                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1253                 b_frames--;
1254
1255             s->reordered_input_picture[0] = s->input_picture[b_frames];
1256             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1257                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1258             s->reordered_input_picture[0]->f.coded_picture_number =
1259                 s->coded_picture_number++;
1260             for (i = 0; i < b_frames; i++) {
1261                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1262                 s->reordered_input_picture[i + 1]->f.pict_type =
1263                     AV_PICTURE_TYPE_B;
1264                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1265                     s->coded_picture_number++;
1266             }
1267         }
1268     }
1269 no_output_pic:
1270     if (s->reordered_input_picture[0]) {
1271         s->reordered_input_picture[0]->reference =
1272            s->reordered_input_picture[0]->f.pict_type !=
1273                AV_PICTURE_TYPE_B ? 3 : 0;
1274
1275         ff_mpeg_unref_picture(s, &s->new_picture);
1276         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1277             return ret;
1278
1279         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1280             // input is a shared pix, so we can't modifiy it -> alloc a new
1281             // one & ensure that the shared one is reuseable
1282
1283             Picture *pic;
1284             int i = ff_find_unused_picture(s, 0);
1285             if (i < 0)
1286                 return i;
1287             pic = &s->picture[i];
1288
1289             pic->reference = s->reordered_input_picture[0]->reference;
1290             if (ff_alloc_picture(s, pic, 0) < 0) {
1291                 return -1;
1292             }
1293
1294             ret = av_frame_copy_props(&pic->f, &s->reordered_input_picture[0]->f);
1295             if (ret < 0)
1296                 return ret;
1297
1298             /* mark us unused / free shared pic */
1299             av_frame_unref(&s->reordered_input_picture[0]->f);
1300             s->reordered_input_picture[0]->shared = 0;
1301
1302             s->current_picture_ptr = pic;
1303         } else {
1304             // input is not a shared pix -> reuse buffer for current_pix
1305             s->current_picture_ptr = s->reordered_input_picture[0];
1306             for (i = 0; i < 4; i++) {
1307                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1308             }
1309         }
1310         ff_mpeg_unref_picture(s, &s->current_picture);
1311         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1312                                        s->current_picture_ptr)) < 0)
1313             return ret;
1314
1315         s->picture_number = s->new_picture.f.display_picture_number;
1316     } else {
1317         ff_mpeg_unref_picture(s, &s->new_picture);
1318     }
1319     return 0;
1320 }
1321
1322 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1323                           const AVFrame *pic_arg, int *got_packet)
1324 {
1325     MpegEncContext *s = avctx->priv_data;
1326     int i, stuffing_count, ret;
1327     int context_count = s->slice_context_count;
1328
1329     s->picture_in_gop_number++;
1330
1331     if (load_input_picture(s, pic_arg) < 0)
1332         return -1;
1333
1334     if (select_input_picture(s) < 0) {
1335         return -1;
1336     }
1337
1338     /* output? */
1339     if (s->new_picture.f.data[0]) {
1340         if (!pkt->data &&
1341             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1342             return ret;
1343         if (s->mb_info) {
1344             s->mb_info_ptr = av_packet_new_side_data(pkt,
1345                                  AV_PKT_DATA_H263_MB_INFO,
1346                                  s->mb_width*s->mb_height*12);
1347             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1348         }
1349
1350         for (i = 0; i < context_count; i++) {
1351             int start_y = s->thread_context[i]->start_mb_y;
1352             int   end_y = s->thread_context[i]->  end_mb_y;
1353             int h       = s->mb_height;
1354             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1355             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1356
1357             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1358         }
1359
1360         s->pict_type = s->new_picture.f.pict_type;
1361         //emms_c();
1362         ff_MPV_frame_start(s, avctx);
1363 vbv_retry:
1364         if (encode_picture(s, s->picture_number) < 0)
1365             return -1;
1366
1367         avctx->header_bits = s->header_bits;
1368         avctx->mv_bits     = s->mv_bits;
1369         avctx->misc_bits   = s->misc_bits;
1370         avctx->i_tex_bits  = s->i_tex_bits;
1371         avctx->p_tex_bits  = s->p_tex_bits;
1372         avctx->i_count     = s->i_count;
1373         // FIXME f/b_count in avctx
1374         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1375         avctx->skip_count  = s->skip_count;
1376
1377         ff_MPV_frame_end(s);
1378
1379         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1380             ff_mjpeg_encode_picture_trailer(s);
1381
1382         if (avctx->rc_buffer_size) {
1383             RateControlContext *rcc = &s->rc_context;
1384             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1385
1386             if (put_bits_count(&s->pb) > max_size &&
1387                 s->lambda < s->avctx->lmax) {
1388                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1389                                        (s->qscale + 1) / s->qscale);
1390                 if (s->adaptive_quant) {
1391                     int i;
1392                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1393                         s->lambda_table[i] =
1394                             FFMAX(s->lambda_table[i] + 1,
1395                                   s->lambda_table[i] * (s->qscale + 1) /
1396                                   s->qscale);
1397                 }
1398                 s->mb_skipped = 0;        // done in MPV_frame_start()
1399                 // done in encode_picture() so we must undo it
1400                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1401                     if (s->flipflop_rounding          ||
1402                         s->codec_id == AV_CODEC_ID_H263P ||
1403                         s->codec_id == AV_CODEC_ID_MPEG4)
1404                         s->no_rounding ^= 1;
1405                 }
1406                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1407                     s->time_base       = s->last_time_base;
1408                     s->last_non_b_time = s->time - s->pp_time;
1409                 }
1410                 for (i = 0; i < context_count; i++) {
1411                     PutBitContext *pb = &s->thread_context[i]->pb;
1412                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1413                 }
1414                 goto vbv_retry;
1415             }
1416
1417             assert(s->avctx->rc_max_rate);
1418         }
1419
1420         if (s->flags & CODEC_FLAG_PASS1)
1421             ff_write_pass1_stats(s);
1422
1423         for (i = 0; i < 4; i++) {
1424             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1425             avctx->error[i] += s->current_picture_ptr->f.error[i];
1426         }
1427
1428         if (s->flags & CODEC_FLAG_PASS1)
1429             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1430                    avctx->i_tex_bits + avctx->p_tex_bits ==
1431                        put_bits_count(&s->pb));
1432         flush_put_bits(&s->pb);
1433         s->frame_bits  = put_bits_count(&s->pb);
1434
1435         stuffing_count = ff_vbv_update(s, s->frame_bits);
1436         if (stuffing_count) {
1437             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1438                     stuffing_count + 50) {
1439                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1440                 return -1;
1441             }
1442
1443             switch (s->codec_id) {
1444             case AV_CODEC_ID_MPEG1VIDEO:
1445             case AV_CODEC_ID_MPEG2VIDEO:
1446                 while (stuffing_count--) {
1447                     put_bits(&s->pb, 8, 0);
1448                 }
1449             break;
1450             case AV_CODEC_ID_MPEG4:
1451                 put_bits(&s->pb, 16, 0);
1452                 put_bits(&s->pb, 16, 0x1C3);
1453                 stuffing_count -= 4;
1454                 while (stuffing_count--) {
1455                     put_bits(&s->pb, 8, 0xFF);
1456                 }
1457             break;
1458             default:
1459                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1460             }
1461             flush_put_bits(&s->pb);
1462             s->frame_bits  = put_bits_count(&s->pb);
1463         }
1464
1465         /* update mpeg1/2 vbv_delay for CBR */
1466         if (s->avctx->rc_max_rate                          &&
1467             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1468             s->out_format == FMT_MPEG1                     &&
1469             90000LL * (avctx->rc_buffer_size - 1) <=
1470                 s->avctx->rc_max_rate * 0xFFFFLL) {
1471             int vbv_delay, min_delay;
1472             double inbits  = s->avctx->rc_max_rate *
1473                              av_q2d(s->avctx->time_base);
1474             int    minbits = s->frame_bits - 8 *
1475                              (s->vbv_delay_ptr - s->pb.buf - 1);
1476             double bits    = s->rc_context.buffer_index + minbits - inbits;
1477
1478             if (bits < 0)
1479                 av_log(s->avctx, AV_LOG_ERROR,
1480                        "Internal error, negative bits\n");
1481
1482             assert(s->repeat_first_field == 0);
1483
1484             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1485             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1486                         s->avctx->rc_max_rate;
1487
1488             vbv_delay = FFMAX(vbv_delay, min_delay);
1489
1490             assert(vbv_delay < 0xFFFF);
1491
1492             s->vbv_delay_ptr[0] &= 0xF8;
1493             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1494             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1495             s->vbv_delay_ptr[2] &= 0x07;
1496             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1497             avctx->vbv_delay     = vbv_delay * 300;
1498         }
1499         s->total_bits     += s->frame_bits;
1500         avctx->frame_bits  = s->frame_bits;
1501
1502         pkt->pts = s->current_picture.f.pts;
1503         if (!s->low_delay) {
1504             if (!s->current_picture.f.coded_picture_number)
1505                 pkt->dts = pkt->pts - s->dts_delta;
1506             else
1507                 pkt->dts = s->reordered_pts;
1508             s->reordered_pts = s->input_picture[0]->f.pts;
1509         } else
1510             pkt->dts = pkt->pts;
1511         if (s->current_picture.f.key_frame)
1512             pkt->flags |= AV_PKT_FLAG_KEY;
1513         if (s->mb_info)
1514             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1515     } else {
1516         s->frame_bits = 0;
1517     }
1518     assert((s->frame_bits & 7) == 0);
1519
1520     pkt->size = s->frame_bits / 8;
1521     *got_packet = !!pkt->size;
1522     return 0;
1523 }
1524
1525 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1526                                                 int n, int threshold)
1527 {
1528     static const char tab[64] = {
1529         3, 2, 2, 1, 1, 1, 1, 1,
1530         1, 1, 1, 1, 1, 1, 1, 1,
1531         1, 1, 1, 1, 1, 1, 1, 1,
1532         0, 0, 0, 0, 0, 0, 0, 0,
1533         0, 0, 0, 0, 0, 0, 0, 0,
1534         0, 0, 0, 0, 0, 0, 0, 0,
1535         0, 0, 0, 0, 0, 0, 0, 0,
1536         0, 0, 0, 0, 0, 0, 0, 0
1537     };
1538     int score = 0;
1539     int run = 0;
1540     int i;
1541     int16_t *block = s->block[n];
1542     const int last_index = s->block_last_index[n];
1543     int skip_dc;
1544
1545     if (threshold < 0) {
1546         skip_dc = 0;
1547         threshold = -threshold;
1548     } else
1549         skip_dc = 1;
1550
1551     /* Are all we could set to zero already zero? */
1552     if (last_index <= skip_dc - 1)
1553         return;
1554
1555     for (i = 0; i <= last_index; i++) {
1556         const int j = s->intra_scantable.permutated[i];
1557         const int level = FFABS(block[j]);
1558         if (level == 1) {
1559             if (skip_dc && i == 0)
1560                 continue;
1561             score += tab[run];
1562             run = 0;
1563         } else if (level > 1) {
1564             return;
1565         } else {
1566             run++;
1567         }
1568     }
1569     if (score >= threshold)
1570         return;
1571     for (i = skip_dc; i <= last_index; i++) {
1572         const int j = s->intra_scantable.permutated[i];
1573         block[j] = 0;
1574     }
1575     if (block[0])
1576         s->block_last_index[n] = 0;
1577     else
1578         s->block_last_index[n] = -1;
1579 }
1580
1581 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1582                                int last_index)
1583 {
1584     int i;
1585     const int maxlevel = s->max_qcoeff;
1586     const int minlevel = s->min_qcoeff;
1587     int overflow = 0;
1588
1589     if (s->mb_intra) {
1590         i = 1; // skip clipping of intra dc
1591     } else
1592         i = 0;
1593
1594     for (; i <= last_index; i++) {
1595         const int j = s->intra_scantable.permutated[i];
1596         int level = block[j];
1597
1598         if (level > maxlevel) {
1599             level = maxlevel;
1600             overflow++;
1601         } else if (level < minlevel) {
1602             level = minlevel;
1603             overflow++;
1604         }
1605
1606         block[j] = level;
1607     }
1608
1609     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1610         av_log(s->avctx, AV_LOG_INFO,
1611                "warning, clipping %d dct coefficients to %d..%d\n",
1612                overflow, minlevel, maxlevel);
1613 }
1614
1615 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1616 {
1617     int x, y;
1618     // FIXME optimize
1619     for (y = 0; y < 8; y++) {
1620         for (x = 0; x < 8; x++) {
1621             int x2, y2;
1622             int sum = 0;
1623             int sqr = 0;
1624             int count = 0;
1625
1626             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1627                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1628                     int v = ptr[x2 + y2 * stride];
1629                     sum += v;
1630                     sqr += v * v;
1631                     count++;
1632                 }
1633             }
1634             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1635         }
1636     }
1637 }
1638
1639 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1640                                                 int motion_x, int motion_y,
1641                                                 int mb_block_height,
1642                                                 int mb_block_count)
1643 {
1644     int16_t weight[8][64];
1645     int16_t orig[8][64];
1646     const int mb_x = s->mb_x;
1647     const int mb_y = s->mb_y;
1648     int i;
1649     int skip_dct[8];
1650     int dct_offset = s->linesize * 8; // default for progressive frames
1651     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1652     ptrdiff_t wrap_y, wrap_c;
1653
1654     for (i = 0; i < mb_block_count; i++)
1655         skip_dct[i] = s->skipdct;
1656
1657     if (s->adaptive_quant) {
1658         const int last_qp = s->qscale;
1659         const int mb_xy = mb_x + mb_y * s->mb_stride;
1660
1661         s->lambda = s->lambda_table[mb_xy];
1662         update_qscale(s);
1663
1664         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1665             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1666             s->dquant = s->qscale - last_qp;
1667
1668             if (s->out_format == FMT_H263) {
1669                 s->dquant = av_clip(s->dquant, -2, 2);
1670
1671                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1672                     if (!s->mb_intra) {
1673                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1674                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1675                                 s->dquant = 0;
1676                         }
1677                         if (s->mv_type == MV_TYPE_8X8)
1678                             s->dquant = 0;
1679                     }
1680                 }
1681             }
1682         }
1683         ff_set_qscale(s, last_qp + s->dquant);
1684     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1685         ff_set_qscale(s, s->qscale + s->dquant);
1686
1687     wrap_y = s->linesize;
1688     wrap_c = s->uvlinesize;
1689     ptr_y  = s->new_picture.f.data[0] +
1690              (mb_y * 16 * wrap_y)              + mb_x * 16;
1691     ptr_cb = s->new_picture.f.data[1] +
1692              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1693     ptr_cr = s->new_picture.f.data[2] +
1694              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1695
1696     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1697         uint8_t *ebuf = s->edge_emu_buffer + 32;
1698         s->vdsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1699                                  mb_y * 16, s->width, s->height);
1700         ptr_y = ebuf;
1701         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1702                                  mb_block_height, mb_x * 8, mb_y * 8,
1703                                  s->width >> 1, s->height >> 1);
1704         ptr_cb = ebuf + 18 * wrap_y;
1705         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1706                                  mb_block_height, mb_x * 8, mb_y * 8,
1707                                  s->width >> 1, s->height >> 1);
1708         ptr_cr = ebuf + 18 * wrap_y + 8;
1709     }
1710
1711     if (s->mb_intra) {
1712         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1713             int progressive_score, interlaced_score;
1714
1715             s->interlaced_dct = 0;
1716             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1717                                                     NULL, wrap_y, 8) +
1718                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1719                                                     NULL, wrap_y, 8) - 400;
1720
1721             if (progressive_score > 0) {
1722                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1723                                                        NULL, wrap_y * 2, 8) +
1724                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1725                                                        NULL, wrap_y * 2, 8);
1726                 if (progressive_score > interlaced_score) {
1727                     s->interlaced_dct = 1;
1728
1729                     dct_offset = wrap_y;
1730                     wrap_y <<= 1;
1731                     if (s->chroma_format == CHROMA_422)
1732                         wrap_c <<= 1;
1733                 }
1734             }
1735         }
1736
1737         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1738         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1739         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1740         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1741
1742         if (s->flags & CODEC_FLAG_GRAY) {
1743             skip_dct[4] = 1;
1744             skip_dct[5] = 1;
1745         } else {
1746             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1747             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1748             if (!s->chroma_y_shift) { /* 422 */
1749                 s->dsp.get_pixels(s->block[6],
1750                                   ptr_cb + (dct_offset >> 1), wrap_c);
1751                 s->dsp.get_pixels(s->block[7],
1752                                   ptr_cr + (dct_offset >> 1), wrap_c);
1753             }
1754         }
1755     } else {
1756         op_pixels_func (*op_pix)[4];
1757         qpel_mc_func (*op_qpix)[16];
1758         uint8_t *dest_y, *dest_cb, *dest_cr;
1759
1760         dest_y  = s->dest[0];
1761         dest_cb = s->dest[1];
1762         dest_cr = s->dest[2];
1763
1764         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1765             op_pix  = s->hdsp.put_pixels_tab;
1766             op_qpix = s->dsp.put_qpel_pixels_tab;
1767         } else {
1768             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1769             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1770         }
1771
1772         if (s->mv_dir & MV_DIR_FORWARD) {
1773             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1774                           s->last_picture.f.data,
1775                           op_pix, op_qpix);
1776             op_pix  = s->hdsp.avg_pixels_tab;
1777             op_qpix = s->dsp.avg_qpel_pixels_tab;
1778         }
1779         if (s->mv_dir & MV_DIR_BACKWARD) {
1780             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1781                           s->next_picture.f.data,
1782                           op_pix, op_qpix);
1783         }
1784
1785         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1786             int progressive_score, interlaced_score;
1787
1788             s->interlaced_dct = 0;
1789             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1790                                                     ptr_y,              wrap_y,
1791                                                     8) +
1792                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1793                                                     ptr_y + wrap_y * 8, wrap_y,
1794                                                     8) - 400;
1795
1796             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1797                 progressive_score -= 400;
1798
1799             if (progressive_score > 0) {
1800                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1801                                                        ptr_y,
1802                                                        wrap_y * 2, 8) +
1803                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1804                                                        ptr_y + wrap_y,
1805                                                        wrap_y * 2, 8);
1806
1807                 if (progressive_score > interlaced_score) {
1808                     s->interlaced_dct = 1;
1809
1810                     dct_offset = wrap_y;
1811                     wrap_y <<= 1;
1812                     if (s->chroma_format == CHROMA_422)
1813                         wrap_c <<= 1;
1814                 }
1815             }
1816         }
1817
1818         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1819         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1820         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1821                            dest_y + dct_offset, wrap_y);
1822         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1823                            dest_y + dct_offset + 8, wrap_y);
1824
1825         if (s->flags & CODEC_FLAG_GRAY) {
1826             skip_dct[4] = 1;
1827             skip_dct[5] = 1;
1828         } else {
1829             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1830             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1831             if (!s->chroma_y_shift) { /* 422 */
1832                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1833                                    dest_cb + (dct_offset >> 1), wrap_c);
1834                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1835                                    dest_cr + (dct_offset >> 1), wrap_c);
1836             }
1837         }
1838         /* pre quantization */
1839         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1840                 2 * s->qscale * s->qscale) {
1841             // FIXME optimize
1842             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1843                               wrap_y, 8) < 20 * s->qscale)
1844                 skip_dct[0] = 1;
1845             if (s->dsp.sad[1](NULL, ptr_y + 8,
1846                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1847                 skip_dct[1] = 1;
1848             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1849                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1850                 skip_dct[2] = 1;
1851             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1852                               dest_y + dct_offset + 8,
1853                               wrap_y, 8) < 20 * s->qscale)
1854                 skip_dct[3] = 1;
1855             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1856                               wrap_c, 8) < 20 * s->qscale)
1857                 skip_dct[4] = 1;
1858             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1859                               wrap_c, 8) < 20 * s->qscale)
1860                 skip_dct[5] = 1;
1861             if (!s->chroma_y_shift) { /* 422 */
1862                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1863                                   dest_cb + (dct_offset >> 1),
1864                                   wrap_c, 8) < 20 * s->qscale)
1865                     skip_dct[6] = 1;
1866                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1867                                   dest_cr + (dct_offset >> 1),
1868                                   wrap_c, 8) < 20 * s->qscale)
1869                     skip_dct[7] = 1;
1870             }
1871         }
1872     }
1873
1874     if (s->quantizer_noise_shaping) {
1875         if (!skip_dct[0])
1876             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1877         if (!skip_dct[1])
1878             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1879         if (!skip_dct[2])
1880             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1881         if (!skip_dct[3])
1882             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1883         if (!skip_dct[4])
1884             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1885         if (!skip_dct[5])
1886             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1887         if (!s->chroma_y_shift) { /* 422 */
1888             if (!skip_dct[6])
1889                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1890                                   wrap_c);
1891             if (!skip_dct[7])
1892                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1893                                   wrap_c);
1894         }
1895         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
1896     }
1897
1898     /* DCT & quantize */
1899     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1900     {
1901         for (i = 0; i < mb_block_count; i++) {
1902             if (!skip_dct[i]) {
1903                 int overflow;
1904                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1905                 // FIXME we could decide to change to quantizer instead of
1906                 // clipping
1907                 // JS: I don't think that would be a good idea it could lower
1908                 //     quality instead of improve it. Just INTRADC clipping
1909                 //     deserves changes in quantizer
1910                 if (overflow)
1911                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
1912             } else
1913                 s->block_last_index[i] = -1;
1914         }
1915         if (s->quantizer_noise_shaping) {
1916             for (i = 0; i < mb_block_count; i++) {
1917                 if (!skip_dct[i]) {
1918                     s->block_last_index[i] =
1919                         dct_quantize_refine(s, s->block[i], weight[i],
1920                                             orig[i], i, s->qscale);
1921                 }
1922             }
1923         }
1924
1925         if (s->luma_elim_threshold && !s->mb_intra)
1926             for (i = 0; i < 4; i++)
1927                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
1928         if (s->chroma_elim_threshold && !s->mb_intra)
1929             for (i = 4; i < mb_block_count; i++)
1930                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
1931
1932         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
1933             for (i = 0; i < mb_block_count; i++) {
1934                 if (s->block_last_index[i] == -1)
1935                     s->coded_score[i] = INT_MAX / 256;
1936             }
1937         }
1938     }
1939
1940     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
1941         s->block_last_index[4] =
1942         s->block_last_index[5] = 0;
1943         s->block[4][0] =
1944         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
1945     }
1946
1947     // non c quantize code returns incorrect block_last_index FIXME
1948     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
1949         for (i = 0; i < mb_block_count; i++) {
1950             int j;
1951             if (s->block_last_index[i] > 0) {
1952                 for (j = 63; j > 0; j--) {
1953                     if (s->block[i][s->intra_scantable.permutated[j]])
1954                         break;
1955                 }
1956                 s->block_last_index[i] = j;
1957             }
1958         }
1959     }
1960
1961     /* huffman encode */
1962     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
1963     case AV_CODEC_ID_MPEG1VIDEO:
1964     case AV_CODEC_ID_MPEG2VIDEO:
1965         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
1966             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
1967         break;
1968     case AV_CODEC_ID_MPEG4:
1969         if (CONFIG_MPEG4_ENCODER)
1970             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
1971         break;
1972     case AV_CODEC_ID_MSMPEG4V2:
1973     case AV_CODEC_ID_MSMPEG4V3:
1974     case AV_CODEC_ID_WMV1:
1975         if (CONFIG_MSMPEG4_ENCODER)
1976             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
1977         break;
1978     case AV_CODEC_ID_WMV2:
1979         if (CONFIG_WMV2_ENCODER)
1980             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
1981         break;
1982     case AV_CODEC_ID_H261:
1983         if (CONFIG_H261_ENCODER)
1984             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
1985         break;
1986     case AV_CODEC_ID_H263:
1987     case AV_CODEC_ID_H263P:
1988     case AV_CODEC_ID_FLV1:
1989     case AV_CODEC_ID_RV10:
1990     case AV_CODEC_ID_RV20:
1991         if (CONFIG_H263_ENCODER)
1992             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
1993         break;
1994     case AV_CODEC_ID_MJPEG:
1995         if (CONFIG_MJPEG_ENCODER)
1996             ff_mjpeg_encode_mb(s, s->block);
1997         break;
1998     default:
1999         assert(0);
2000     }
2001 }
2002
2003 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2004 {
2005     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2006     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2007 }
2008
2009 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2010     int i;
2011
2012     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2013
2014     /* mpeg1 */
2015     d->mb_skip_run= s->mb_skip_run;
2016     for(i=0; i<3; i++)
2017         d->last_dc[i] = s->last_dc[i];
2018
2019     /* statistics */
2020     d->mv_bits= s->mv_bits;
2021     d->i_tex_bits= s->i_tex_bits;
2022     d->p_tex_bits= s->p_tex_bits;
2023     d->i_count= s->i_count;
2024     d->f_count= s->f_count;
2025     d->b_count= s->b_count;
2026     d->skip_count= s->skip_count;
2027     d->misc_bits= s->misc_bits;
2028     d->last_bits= 0;
2029
2030     d->mb_skipped= 0;
2031     d->qscale= s->qscale;
2032     d->dquant= s->dquant;
2033
2034     d->esc3_level_length= s->esc3_level_length;
2035 }
2036
2037 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2038     int i;
2039
2040     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2041     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2042
2043     /* mpeg1 */
2044     d->mb_skip_run= s->mb_skip_run;
2045     for(i=0; i<3; i++)
2046         d->last_dc[i] = s->last_dc[i];
2047
2048     /* statistics */
2049     d->mv_bits= s->mv_bits;
2050     d->i_tex_bits= s->i_tex_bits;
2051     d->p_tex_bits= s->p_tex_bits;
2052     d->i_count= s->i_count;
2053     d->f_count= s->f_count;
2054     d->b_count= s->b_count;
2055     d->skip_count= s->skip_count;
2056     d->misc_bits= s->misc_bits;
2057
2058     d->mb_intra= s->mb_intra;
2059     d->mb_skipped= s->mb_skipped;
2060     d->mv_type= s->mv_type;
2061     d->mv_dir= s->mv_dir;
2062     d->pb= s->pb;
2063     if(s->data_partitioning){
2064         d->pb2= s->pb2;
2065         d->tex_pb= s->tex_pb;
2066     }
2067     d->block= s->block;
2068     for(i=0; i<8; i++)
2069         d->block_last_index[i]= s->block_last_index[i];
2070     d->interlaced_dct= s->interlaced_dct;
2071     d->qscale= s->qscale;
2072
2073     d->esc3_level_length= s->esc3_level_length;
2074 }
2075
2076 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2077                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2078                            int *dmin, int *next_block, int motion_x, int motion_y)
2079 {
2080     int score;
2081     uint8_t *dest_backup[3];
2082
2083     copy_context_before_encode(s, backup, type);
2084
2085     s->block= s->blocks[*next_block];
2086     s->pb= pb[*next_block];
2087     if(s->data_partitioning){
2088         s->pb2   = pb2   [*next_block];
2089         s->tex_pb= tex_pb[*next_block];
2090     }
2091
2092     if(*next_block){
2093         memcpy(dest_backup, s->dest, sizeof(s->dest));
2094         s->dest[0] = s->rd_scratchpad;
2095         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2096         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2097         assert(s->linesize >= 32); //FIXME
2098     }
2099
2100     encode_mb(s, motion_x, motion_y);
2101
2102     score= put_bits_count(&s->pb);
2103     if(s->data_partitioning){
2104         score+= put_bits_count(&s->pb2);
2105         score+= put_bits_count(&s->tex_pb);
2106     }
2107
2108     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2109         ff_MPV_decode_mb(s, s->block);
2110
2111         score *= s->lambda2;
2112         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2113     }
2114
2115     if(*next_block){
2116         memcpy(s->dest, dest_backup, sizeof(s->dest));
2117     }
2118
2119     if(score<*dmin){
2120         *dmin= score;
2121         *next_block^=1;
2122
2123         copy_context_after_encode(best, s, type);
2124     }
2125 }
2126
2127 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2128     uint32_t *sq = ff_squareTbl + 256;
2129     int acc=0;
2130     int x,y;
2131
2132     if(w==16 && h==16)
2133         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2134     else if(w==8 && h==8)
2135         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2136
2137     for(y=0; y<h; y++){
2138         for(x=0; x<w; x++){
2139             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2140         }
2141     }
2142
2143     assert(acc>=0);
2144
2145     return acc;
2146 }
2147
2148 static int sse_mb(MpegEncContext *s){
2149     int w= 16;
2150     int h= 16;
2151
2152     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2153     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2154
2155     if(w==16 && h==16)
2156       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2157         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2158                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2159                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2160       }else{
2161         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2162                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2163                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2164       }
2165     else
2166         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2167                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2168                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2169 }
2170
2171 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2172     MpegEncContext *s= *(void**)arg;
2173
2174
2175     s->me.pre_pass=1;
2176     s->me.dia_size= s->avctx->pre_dia_size;
2177     s->first_slice_line=1;
2178     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2179         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2180             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2181         }
2182         s->first_slice_line=0;
2183     }
2184
2185     s->me.pre_pass=0;
2186
2187     return 0;
2188 }
2189
2190 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2191     MpegEncContext *s= *(void**)arg;
2192
2193     ff_check_alignment();
2194
2195     s->me.dia_size= s->avctx->dia_size;
2196     s->first_slice_line=1;
2197     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2198         s->mb_x=0; //for block init below
2199         ff_init_block_index(s);
2200         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2201             s->block_index[0]+=2;
2202             s->block_index[1]+=2;
2203             s->block_index[2]+=2;
2204             s->block_index[3]+=2;
2205
2206             /* compute motion vector & mb_type and store in context */
2207             if(s->pict_type==AV_PICTURE_TYPE_B)
2208                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2209             else
2210                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2211         }
2212         s->first_slice_line=0;
2213     }
2214     return 0;
2215 }
2216
2217 static int mb_var_thread(AVCodecContext *c, void *arg){
2218     MpegEncContext *s= *(void**)arg;
2219     int mb_x, mb_y;
2220
2221     ff_check_alignment();
2222
2223     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2224         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2225             int xx = mb_x * 16;
2226             int yy = mb_y * 16;
2227             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2228             int varc;
2229             int sum = s->dsp.pix_sum(pix, s->linesize);
2230
2231             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2232
2233             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2234             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2235             s->me.mb_var_sum_temp    += varc;
2236         }
2237     }
2238     return 0;
2239 }
2240
2241 static void write_slice_end(MpegEncContext *s){
2242     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2243         if(s->partitioned_frame){
2244             ff_mpeg4_merge_partitions(s);
2245         }
2246
2247         ff_mpeg4_stuffing(&s->pb);
2248     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2249         ff_mjpeg_encode_stuffing(&s->pb);
2250     }
2251
2252     avpriv_align_put_bits(&s->pb);
2253     flush_put_bits(&s->pb);
2254
2255     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2256         s->misc_bits+= get_bits_diff(s);
2257 }
2258
2259 static void write_mb_info(MpegEncContext *s)
2260 {
2261     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2262     int offset = put_bits_count(&s->pb);
2263     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2264     int gobn = s->mb_y / s->gob_index;
2265     int pred_x, pred_y;
2266     if (CONFIG_H263_ENCODER)
2267         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2268     bytestream_put_le32(&ptr, offset);
2269     bytestream_put_byte(&ptr, s->qscale);
2270     bytestream_put_byte(&ptr, gobn);
2271     bytestream_put_le16(&ptr, mba);
2272     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2273     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2274     /* 4MV not implemented */
2275     bytestream_put_byte(&ptr, 0); /* hmv2 */
2276     bytestream_put_byte(&ptr, 0); /* vmv2 */
2277 }
2278
2279 static void update_mb_info(MpegEncContext *s, int startcode)
2280 {
2281     if (!s->mb_info)
2282         return;
2283     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2284         s->mb_info_size += 12;
2285         s->prev_mb_info = s->last_mb_info;
2286     }
2287     if (startcode) {
2288         s->prev_mb_info = put_bits_count(&s->pb)/8;
2289         /* This might have incremented mb_info_size above, and we return without
2290          * actually writing any info into that slot yet. But in that case,
2291          * this will be called again at the start of the after writing the
2292          * start code, actually writing the mb info. */
2293         return;
2294     }
2295
2296     s->last_mb_info = put_bits_count(&s->pb)/8;
2297     if (!s->mb_info_size)
2298         s->mb_info_size += 12;
2299     write_mb_info(s);
2300 }
2301
2302 static int encode_thread(AVCodecContext *c, void *arg){
2303     MpegEncContext *s= *(void**)arg;
2304     int mb_x, mb_y, pdif = 0;
2305     int chr_h= 16>>s->chroma_y_shift;
2306     int i, j;
2307     MpegEncContext best_s, backup_s;
2308     uint8_t bit_buf[2][MAX_MB_BYTES];
2309     uint8_t bit_buf2[2][MAX_MB_BYTES];
2310     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2311     PutBitContext pb[2], pb2[2], tex_pb[2];
2312
2313     ff_check_alignment();
2314
2315     for(i=0; i<2; i++){
2316         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2317         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2318         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2319     }
2320
2321     s->last_bits= put_bits_count(&s->pb);
2322     s->mv_bits=0;
2323     s->misc_bits=0;
2324     s->i_tex_bits=0;
2325     s->p_tex_bits=0;
2326     s->i_count=0;
2327     s->f_count=0;
2328     s->b_count=0;
2329     s->skip_count=0;
2330
2331     for(i=0; i<3; i++){
2332         /* init last dc values */
2333         /* note: quant matrix value (8) is implied here */
2334         s->last_dc[i] = 128 << s->intra_dc_precision;
2335
2336         s->current_picture.f.error[i] = 0;
2337     }
2338     s->mb_skip_run = 0;
2339     memset(s->last_mv, 0, sizeof(s->last_mv));
2340
2341     s->last_mv_dir = 0;
2342
2343     switch(s->codec_id){
2344     case AV_CODEC_ID_H263:
2345     case AV_CODEC_ID_H263P:
2346     case AV_CODEC_ID_FLV1:
2347         if (CONFIG_H263_ENCODER)
2348             s->gob_index = ff_h263_get_gob_height(s);
2349         break;
2350     case AV_CODEC_ID_MPEG4:
2351         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2352             ff_mpeg4_init_partitions(s);
2353         break;
2354     }
2355
2356     s->resync_mb_x=0;
2357     s->resync_mb_y=0;
2358     s->first_slice_line = 1;
2359     s->ptr_lastgob = s->pb.buf;
2360     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2361         s->mb_x=0;
2362         s->mb_y= mb_y;
2363
2364         ff_set_qscale(s, s->qscale);
2365         ff_init_block_index(s);
2366
2367         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2368             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2369             int mb_type= s->mb_type[xy];
2370 //            int d;
2371             int dmin= INT_MAX;
2372             int dir;
2373
2374             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2375                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2376                 return -1;
2377             }
2378             if(s->data_partitioning){
2379                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2380                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2381                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2382                     return -1;
2383                 }
2384             }
2385
2386             s->mb_x = mb_x;
2387             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2388             ff_update_block_index(s);
2389
2390             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2391                 ff_h261_reorder_mb_index(s);
2392                 xy= s->mb_y*s->mb_stride + s->mb_x;
2393                 mb_type= s->mb_type[xy];
2394             }
2395
2396             /* write gob / video packet header  */
2397             if(s->rtp_mode){
2398                 int current_packet_size, is_gob_start;
2399
2400                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2401
2402                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2403
2404                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2405
2406                 switch(s->codec_id){
2407                 case AV_CODEC_ID_H263:
2408                 case AV_CODEC_ID_H263P:
2409                     if(!s->h263_slice_structured)
2410                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2411                     break;
2412                 case AV_CODEC_ID_MPEG2VIDEO:
2413                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2414                 case AV_CODEC_ID_MPEG1VIDEO:
2415                     if(s->mb_skip_run) is_gob_start=0;
2416                     break;
2417                 }
2418
2419                 if(is_gob_start){
2420                     if(s->start_mb_y != mb_y || mb_x!=0){
2421                         write_slice_end(s);
2422
2423                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2424                             ff_mpeg4_init_partitions(s);
2425                         }
2426                     }
2427
2428                     assert((put_bits_count(&s->pb)&7) == 0);
2429                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2430
2431                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2432                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2433                         int d = 100 / s->error_rate;
2434                         if(r % d == 0){
2435                             current_packet_size=0;
2436                             s->pb.buf_ptr= s->ptr_lastgob;
2437                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2438                         }
2439                     }
2440
2441                     if (s->avctx->rtp_callback){
2442                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2443                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2444                     }
2445                     update_mb_info(s, 1);
2446
2447                     switch(s->codec_id){
2448                     case AV_CODEC_ID_MPEG4:
2449                         if (CONFIG_MPEG4_ENCODER) {
2450                             ff_mpeg4_encode_video_packet_header(s);
2451                             ff_mpeg4_clean_buffers(s);
2452                         }
2453                     break;
2454                     case AV_CODEC_ID_MPEG1VIDEO:
2455                     case AV_CODEC_ID_MPEG2VIDEO:
2456                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2457                             ff_mpeg1_encode_slice_header(s);
2458                             ff_mpeg1_clean_buffers(s);
2459                         }
2460                     break;
2461                     case AV_CODEC_ID_H263:
2462                     case AV_CODEC_ID_H263P:
2463                         if (CONFIG_H263_ENCODER)
2464                             ff_h263_encode_gob_header(s, mb_y);
2465                     break;
2466                     }
2467
2468                     if(s->flags&CODEC_FLAG_PASS1){
2469                         int bits= put_bits_count(&s->pb);
2470                         s->misc_bits+= bits - s->last_bits;
2471                         s->last_bits= bits;
2472                     }
2473
2474                     s->ptr_lastgob += current_packet_size;
2475                     s->first_slice_line=1;
2476                     s->resync_mb_x=mb_x;
2477                     s->resync_mb_y=mb_y;
2478                 }
2479             }
2480
2481             if(  (s->resync_mb_x   == s->mb_x)
2482                && s->resync_mb_y+1 == s->mb_y){
2483                 s->first_slice_line=0;
2484             }
2485
2486             s->mb_skipped=0;
2487             s->dquant=0; //only for QP_RD
2488
2489             update_mb_info(s, 0);
2490
2491             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2492                 int next_block=0;
2493                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2494
2495                 copy_context_before_encode(&backup_s, s, -1);
2496                 backup_s.pb= s->pb;
2497                 best_s.data_partitioning= s->data_partitioning;
2498                 best_s.partitioned_frame= s->partitioned_frame;
2499                 if(s->data_partitioning){
2500                     backup_s.pb2= s->pb2;
2501                     backup_s.tex_pb= s->tex_pb;
2502                 }
2503
2504                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2505                     s->mv_dir = MV_DIR_FORWARD;
2506                     s->mv_type = MV_TYPE_16X16;
2507                     s->mb_intra= 0;
2508                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2509                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2510                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2511                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2512                 }
2513                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2514                     s->mv_dir = MV_DIR_FORWARD;
2515                     s->mv_type = MV_TYPE_FIELD;
2516                     s->mb_intra= 0;
2517                     for(i=0; i<2; i++){
2518                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2519                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2520                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2521                     }
2522                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2523                                  &dmin, &next_block, 0, 0);
2524                 }
2525                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2526                     s->mv_dir = MV_DIR_FORWARD;
2527                     s->mv_type = MV_TYPE_16X16;
2528                     s->mb_intra= 0;
2529                     s->mv[0][0][0] = 0;
2530                     s->mv[0][0][1] = 0;
2531                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2532                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2533                 }
2534                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2535                     s->mv_dir = MV_DIR_FORWARD;
2536                     s->mv_type = MV_TYPE_8X8;
2537                     s->mb_intra= 0;
2538                     for(i=0; i<4; i++){
2539                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2540                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2541                     }
2542                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2543                                  &dmin, &next_block, 0, 0);
2544                 }
2545                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2546                     s->mv_dir = MV_DIR_FORWARD;
2547                     s->mv_type = MV_TYPE_16X16;
2548                     s->mb_intra= 0;
2549                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2550                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2551                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2552                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2553                 }
2554                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2555                     s->mv_dir = MV_DIR_BACKWARD;
2556                     s->mv_type = MV_TYPE_16X16;
2557                     s->mb_intra= 0;
2558                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2559                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2560                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2561                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2562                 }
2563                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2564                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2565                     s->mv_type = MV_TYPE_16X16;
2566                     s->mb_intra= 0;
2567                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2568                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2569                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2570                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2571                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2572                                  &dmin, &next_block, 0, 0);
2573                 }
2574                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2575                     s->mv_dir = MV_DIR_FORWARD;
2576                     s->mv_type = MV_TYPE_FIELD;
2577                     s->mb_intra= 0;
2578                     for(i=0; i<2; i++){
2579                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2580                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2581                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2582                     }
2583                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2584                                  &dmin, &next_block, 0, 0);
2585                 }
2586                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2587                     s->mv_dir = MV_DIR_BACKWARD;
2588                     s->mv_type = MV_TYPE_FIELD;
2589                     s->mb_intra= 0;
2590                     for(i=0; i<2; i++){
2591                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2592                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2593                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2594                     }
2595                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2596                                  &dmin, &next_block, 0, 0);
2597                 }
2598                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2599                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2600                     s->mv_type = MV_TYPE_FIELD;
2601                     s->mb_intra= 0;
2602                     for(dir=0; dir<2; dir++){
2603                         for(i=0; i<2; i++){
2604                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2605                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2606                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2607                         }
2608                     }
2609                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2610                                  &dmin, &next_block, 0, 0);
2611                 }
2612                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2613                     s->mv_dir = 0;
2614                     s->mv_type = MV_TYPE_16X16;
2615                     s->mb_intra= 1;
2616                     s->mv[0][0][0] = 0;
2617                     s->mv[0][0][1] = 0;
2618                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2619                                  &dmin, &next_block, 0, 0);
2620                     if(s->h263_pred || s->h263_aic){
2621                         if(best_s.mb_intra)
2622                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2623                         else
2624                             ff_clean_intra_table_entries(s); //old mode?
2625                     }
2626                 }
2627
2628                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2629                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2630                         const int last_qp= backup_s.qscale;
2631                         int qpi, qp, dc[6];
2632                         int16_t ac[6][16];
2633                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2634                         static const int dquant_tab[4]={-1,1,-2,2};
2635
2636                         assert(backup_s.dquant == 0);
2637
2638                         //FIXME intra
2639                         s->mv_dir= best_s.mv_dir;
2640                         s->mv_type = MV_TYPE_16X16;
2641                         s->mb_intra= best_s.mb_intra;
2642                         s->mv[0][0][0] = best_s.mv[0][0][0];
2643                         s->mv[0][0][1] = best_s.mv[0][0][1];
2644                         s->mv[1][0][0] = best_s.mv[1][0][0];
2645                         s->mv[1][0][1] = best_s.mv[1][0][1];
2646
2647                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2648                         for(; qpi<4; qpi++){
2649                             int dquant= dquant_tab[qpi];
2650                             qp= last_qp + dquant;
2651                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2652                                 continue;
2653                             backup_s.dquant= dquant;
2654                             if(s->mb_intra && s->dc_val[0]){
2655                                 for(i=0; i<6; i++){
2656                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2657                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2658                                 }
2659                             }
2660
2661                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2662                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2663                             if(best_s.qscale != qp){
2664                                 if(s->mb_intra && s->dc_val[0]){
2665                                     for(i=0; i<6; i++){
2666                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2667                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2668                                     }
2669                                 }
2670                             }
2671                         }
2672                     }
2673                 }
2674                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2675                     int mx= s->b_direct_mv_table[xy][0];
2676                     int my= s->b_direct_mv_table[xy][1];
2677
2678                     backup_s.dquant = 0;
2679                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2680                     s->mb_intra= 0;
2681                     ff_mpeg4_set_direct_mv(s, mx, my);
2682                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2683                                  &dmin, &next_block, mx, my);
2684                 }
2685                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2686                     backup_s.dquant = 0;
2687                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2688                     s->mb_intra= 0;
2689                     ff_mpeg4_set_direct_mv(s, 0, 0);
2690                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2691                                  &dmin, &next_block, 0, 0);
2692                 }
2693                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2694                     int coded=0;
2695                     for(i=0; i<6; i++)
2696                         coded |= s->block_last_index[i];
2697                     if(coded){
2698                         int mx,my;
2699                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2700                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2701                             mx=my=0; //FIXME find the one we actually used
2702                             ff_mpeg4_set_direct_mv(s, mx, my);
2703                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2704                             mx= s->mv[1][0][0];
2705                             my= s->mv[1][0][1];
2706                         }else{
2707                             mx= s->mv[0][0][0];
2708                             my= s->mv[0][0][1];
2709                         }
2710
2711                         s->mv_dir= best_s.mv_dir;
2712                         s->mv_type = best_s.mv_type;
2713                         s->mb_intra= 0;
2714 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2715                         s->mv[0][0][1] = best_s.mv[0][0][1];
2716                         s->mv[1][0][0] = best_s.mv[1][0][0];
2717                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2718                         backup_s.dquant= 0;
2719                         s->skipdct=1;
2720                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2721                                         &dmin, &next_block, mx, my);
2722                         s->skipdct=0;
2723                     }
2724                 }
2725
2726                 s->current_picture.qscale_table[xy] = best_s.qscale;
2727
2728                 copy_context_after_encode(s, &best_s, -1);
2729
2730                 pb_bits_count= put_bits_count(&s->pb);
2731                 flush_put_bits(&s->pb);
2732                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2733                 s->pb= backup_s.pb;
2734
2735                 if(s->data_partitioning){
2736                     pb2_bits_count= put_bits_count(&s->pb2);
2737                     flush_put_bits(&s->pb2);
2738                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2739                     s->pb2= backup_s.pb2;
2740
2741                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2742                     flush_put_bits(&s->tex_pb);
2743                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2744                     s->tex_pb= backup_s.tex_pb;
2745                 }
2746                 s->last_bits= put_bits_count(&s->pb);
2747
2748                 if (CONFIG_H263_ENCODER &&
2749                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2750                     ff_h263_update_motion_val(s);
2751
2752                 if(next_block==0){ //FIXME 16 vs linesize16
2753                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2754                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2755                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2756                 }
2757
2758                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2759                     ff_MPV_decode_mb(s, s->block);
2760             } else {
2761                 int motion_x = 0, motion_y = 0;
2762                 s->mv_type=MV_TYPE_16X16;
2763                 // only one MB-Type possible
2764
2765                 switch(mb_type){
2766                 case CANDIDATE_MB_TYPE_INTRA:
2767                     s->mv_dir = 0;
2768                     s->mb_intra= 1;
2769                     motion_x= s->mv[0][0][0] = 0;
2770                     motion_y= s->mv[0][0][1] = 0;
2771                     break;
2772                 case CANDIDATE_MB_TYPE_INTER:
2773                     s->mv_dir = MV_DIR_FORWARD;
2774                     s->mb_intra= 0;
2775                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2776                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2777                     break;
2778                 case CANDIDATE_MB_TYPE_INTER_I:
2779                     s->mv_dir = MV_DIR_FORWARD;
2780                     s->mv_type = MV_TYPE_FIELD;
2781                     s->mb_intra= 0;
2782                     for(i=0; i<2; i++){
2783                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2784                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2785                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2786                     }
2787                     break;
2788                 case CANDIDATE_MB_TYPE_INTER4V:
2789                     s->mv_dir = MV_DIR_FORWARD;
2790                     s->mv_type = MV_TYPE_8X8;
2791                     s->mb_intra= 0;
2792                     for(i=0; i<4; i++){
2793                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2794                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2795                     }
2796                     break;
2797                 case CANDIDATE_MB_TYPE_DIRECT:
2798                     if (CONFIG_MPEG4_ENCODER) {
2799                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2800                         s->mb_intra= 0;
2801                         motion_x=s->b_direct_mv_table[xy][0];
2802                         motion_y=s->b_direct_mv_table[xy][1];
2803                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2804                     }
2805                     break;
2806                 case CANDIDATE_MB_TYPE_DIRECT0:
2807                     if (CONFIG_MPEG4_ENCODER) {
2808                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2809                         s->mb_intra= 0;
2810                         ff_mpeg4_set_direct_mv(s, 0, 0);
2811                     }
2812                     break;
2813                 case CANDIDATE_MB_TYPE_BIDIR:
2814                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2815                     s->mb_intra= 0;
2816                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2817                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2818                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2819                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2820                     break;
2821                 case CANDIDATE_MB_TYPE_BACKWARD:
2822                     s->mv_dir = MV_DIR_BACKWARD;
2823                     s->mb_intra= 0;
2824                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2825                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2826                     break;
2827                 case CANDIDATE_MB_TYPE_FORWARD:
2828                     s->mv_dir = MV_DIR_FORWARD;
2829                     s->mb_intra= 0;
2830                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2831                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2832                     break;
2833                 case CANDIDATE_MB_TYPE_FORWARD_I:
2834                     s->mv_dir = MV_DIR_FORWARD;
2835                     s->mv_type = MV_TYPE_FIELD;
2836                     s->mb_intra= 0;
2837                     for(i=0; i<2; i++){
2838                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2839                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2840                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2841                     }
2842                     break;
2843                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2844                     s->mv_dir = MV_DIR_BACKWARD;
2845                     s->mv_type = MV_TYPE_FIELD;
2846                     s->mb_intra= 0;
2847                     for(i=0; i<2; i++){
2848                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2849                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2850                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2851                     }
2852                     break;
2853                 case CANDIDATE_MB_TYPE_BIDIR_I:
2854                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2855                     s->mv_type = MV_TYPE_FIELD;
2856                     s->mb_intra= 0;
2857                     for(dir=0; dir<2; dir++){
2858                         for(i=0; i<2; i++){
2859                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2860                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2861                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2862                         }
2863                     }
2864                     break;
2865                 default:
2866                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2867                 }
2868
2869                 encode_mb(s, motion_x, motion_y);
2870
2871                 // RAL: Update last macroblock type
2872                 s->last_mv_dir = s->mv_dir;
2873
2874                 if (CONFIG_H263_ENCODER &&
2875                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2876                     ff_h263_update_motion_val(s);
2877
2878                 ff_MPV_decode_mb(s, s->block);
2879             }
2880
2881             /* clean the MV table in IPS frames for direct mode in B frames */
2882             if(s->mb_intra /* && I,P,S_TYPE */){
2883                 s->p_mv_table[xy][0]=0;
2884                 s->p_mv_table[xy][1]=0;
2885             }
2886
2887             if(s->flags&CODEC_FLAG_PSNR){
2888                 int w= 16;
2889                 int h= 16;
2890
2891                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2892                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2893
2894                 s->current_picture.f.error[0] += sse(
2895                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2896                     s->dest[0], w, h, s->linesize);
2897                 s->current_picture.f.error[1] += sse(
2898                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2899                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2900                 s->current_picture.f.error[2] += sse(
2901                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2902                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2903             }
2904             if(s->loop_filter){
2905                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2906                     ff_h263_loop_filter(s);
2907             }
2908             av_dlog(s->avctx, "MB %d %d bits\n",
2909                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
2910         }
2911     }
2912
2913     //not beautiful here but we must write it before flushing so it has to be here
2914     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
2915         ff_msmpeg4_encode_ext_header(s);
2916
2917     write_slice_end(s);
2918
2919     /* Send the last GOB if RTP */
2920     if (s->avctx->rtp_callback) {
2921         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
2922         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
2923         /* Call the RTP callback to send the last GOB */
2924         emms_c();
2925         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
2926     }
2927
2928     return 0;
2929 }
2930
2931 #define MERGE(field) dst->field += src->field; src->field=0
2932 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
2933     MERGE(me.scene_change_score);
2934     MERGE(me.mc_mb_var_sum_temp);
2935     MERGE(me.mb_var_sum_temp);
2936 }
2937
2938 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
2939     int i;
2940
2941     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
2942     MERGE(dct_count[1]);
2943     MERGE(mv_bits);
2944     MERGE(i_tex_bits);
2945     MERGE(p_tex_bits);
2946     MERGE(i_count);
2947     MERGE(f_count);
2948     MERGE(b_count);
2949     MERGE(skip_count);
2950     MERGE(misc_bits);
2951     MERGE(er.error_count);
2952     MERGE(padding_bug_score);
2953     MERGE(current_picture.f.error[0]);
2954     MERGE(current_picture.f.error[1]);
2955     MERGE(current_picture.f.error[2]);
2956
2957     if(dst->avctx->noise_reduction){
2958         for(i=0; i<64; i++){
2959             MERGE(dct_error_sum[0][i]);
2960             MERGE(dct_error_sum[1][i]);
2961         }
2962     }
2963
2964     assert(put_bits_count(&src->pb) % 8 ==0);
2965     assert(put_bits_count(&dst->pb) % 8 ==0);
2966     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
2967     flush_put_bits(&dst->pb);
2968 }
2969
2970 static int estimate_qp(MpegEncContext *s, int dry_run){
2971     if (s->next_lambda){
2972         s->current_picture_ptr->f.quality =
2973         s->current_picture.f.quality = s->next_lambda;
2974         if(!dry_run) s->next_lambda= 0;
2975     } else if (!s->fixed_qscale) {
2976         s->current_picture_ptr->f.quality =
2977         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
2978         if (s->current_picture.f.quality < 0)
2979             return -1;
2980     }
2981
2982     if(s->adaptive_quant){
2983         switch(s->codec_id){
2984         case AV_CODEC_ID_MPEG4:
2985             if (CONFIG_MPEG4_ENCODER)
2986                 ff_clean_mpeg4_qscales(s);
2987             break;
2988         case AV_CODEC_ID_H263:
2989         case AV_CODEC_ID_H263P:
2990         case AV_CODEC_ID_FLV1:
2991             if (CONFIG_H263_ENCODER)
2992                 ff_clean_h263_qscales(s);
2993             break;
2994         default:
2995             ff_init_qscale_tab(s);
2996         }
2997
2998         s->lambda= s->lambda_table[0];
2999         //FIXME broken
3000     }else
3001         s->lambda = s->current_picture.f.quality;
3002     update_qscale(s);
3003     return 0;
3004 }
3005
3006 /* must be called before writing the header */
3007 static void set_frame_distances(MpegEncContext * s){
3008     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3009     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3010
3011     if(s->pict_type==AV_PICTURE_TYPE_B){
3012         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3013         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3014     }else{
3015         s->pp_time= s->time - s->last_non_b_time;
3016         s->last_non_b_time= s->time;
3017         assert(s->picture_number==0 || s->pp_time > 0);
3018     }
3019 }
3020
3021 static int encode_picture(MpegEncContext *s, int picture_number)
3022 {
3023     int i, ret;
3024     int bits;
3025     int context_count = s->slice_context_count;
3026
3027     s->picture_number = picture_number;
3028
3029     /* Reset the average MB variance */
3030     s->me.mb_var_sum_temp    =
3031     s->me.mc_mb_var_sum_temp = 0;
3032
3033     /* we need to initialize some time vars before we can encode b-frames */
3034     // RAL: Condition added for MPEG1VIDEO
3035     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3036         set_frame_distances(s);
3037     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3038         ff_set_mpeg4_time(s);
3039
3040     s->me.scene_change_score=0;
3041
3042 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3043
3044     if(s->pict_type==AV_PICTURE_TYPE_I){
3045         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3046         else                        s->no_rounding=0;
3047     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3048         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3049             s->no_rounding ^= 1;
3050     }
3051
3052     if(s->flags & CODEC_FLAG_PASS2){
3053         if (estimate_qp(s,1) < 0)
3054             return -1;
3055         ff_get_2pass_fcode(s);
3056     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3057         if(s->pict_type==AV_PICTURE_TYPE_B)
3058             s->lambda= s->last_lambda_for[s->pict_type];
3059         else
3060             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3061         update_qscale(s);
3062     }
3063
3064     s->mb_intra=0; //for the rate distortion & bit compare functions
3065     for(i=1; i<context_count; i++){
3066         ret = ff_update_duplicate_context(s->thread_context[i], s);
3067         if (ret < 0)
3068             return ret;
3069     }
3070
3071     if(ff_init_me(s)<0)
3072         return -1;
3073
3074     /* Estimate motion for every MB */
3075     if(s->pict_type != AV_PICTURE_TYPE_I){
3076         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3077         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3078         if (s->pict_type != AV_PICTURE_TYPE_B) {
3079             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3080                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3081             }
3082         }
3083
3084         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3085     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3086         /* I-Frame */
3087         for(i=0; i<s->mb_stride*s->mb_height; i++)
3088             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3089
3090         if(!s->fixed_qscale){
3091             /* finding spatial complexity for I-frame rate control */
3092             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3093         }
3094     }
3095     for(i=1; i<context_count; i++){
3096         merge_context_after_me(s, s->thread_context[i]);
3097     }
3098     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3099     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3100     emms_c();
3101
3102     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3103         s->pict_type= AV_PICTURE_TYPE_I;
3104         for(i=0; i<s->mb_stride*s->mb_height; i++)
3105             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3106         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3107                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3108     }
3109
3110     if(!s->umvplus){
3111         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3112             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3113
3114             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3115                 int a,b;
3116                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3117                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3118                 s->f_code= FFMAX3(s->f_code, a, b);
3119             }
3120
3121             ff_fix_long_p_mvs(s);
3122             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3123             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3124                 int j;
3125                 for(i=0; i<2; i++){
3126                     for(j=0; j<2; j++)
3127                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3128                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3129                 }
3130             }
3131         }
3132
3133         if(s->pict_type==AV_PICTURE_TYPE_B){
3134             int a, b;
3135
3136             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3137             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3138             s->f_code = FFMAX(a, b);
3139
3140             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3141             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3142             s->b_code = FFMAX(a, b);
3143
3144             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3145             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3146             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3147             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3148             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3149                 int dir, j;
3150                 for(dir=0; dir<2; dir++){
3151                     for(i=0; i<2; i++){
3152                         for(j=0; j<2; j++){
3153                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3154                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3155                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3156                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3157                         }
3158                     }
3159                 }
3160             }
3161         }
3162     }
3163
3164     if (estimate_qp(s, 0) < 0)
3165         return -1;
3166
3167     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3168         s->qscale= 3; //reduce clipping problems
3169
3170     if (s->out_format == FMT_MJPEG) {
3171         /* for mjpeg, we do include qscale in the matrix */
3172         for(i=1;i<64;i++){
3173             int j= s->dsp.idct_permutation[i];
3174
3175             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3176         }
3177         s->y_dc_scale_table=
3178         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3179         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3180         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3181                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3182         s->qscale= 8;
3183     }
3184
3185     //FIXME var duplication
3186     s->current_picture_ptr->f.key_frame =
3187     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3188     s->current_picture_ptr->f.pict_type =
3189     s->current_picture.f.pict_type = s->pict_type;
3190
3191     if (s->current_picture.f.key_frame)
3192         s->picture_in_gop_number=0;
3193
3194     s->last_bits= put_bits_count(&s->pb);
3195     switch(s->out_format) {
3196     case FMT_MJPEG:
3197         if (CONFIG_MJPEG_ENCODER)
3198             ff_mjpeg_encode_picture_header(s);
3199         break;
3200     case FMT_H261:
3201         if (CONFIG_H261_ENCODER)
3202             ff_h261_encode_picture_header(s, picture_number);
3203         break;
3204     case FMT_H263:
3205         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3206             ff_wmv2_encode_picture_header(s, picture_number);
3207         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3208             ff_msmpeg4_encode_picture_header(s, picture_number);
3209         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3210             ff_mpeg4_encode_picture_header(s, picture_number);
3211         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3212             ff_rv10_encode_picture_header(s, picture_number);
3213         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3214             ff_rv20_encode_picture_header(s, picture_number);
3215         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3216             ff_flv_encode_picture_header(s, picture_number);
3217         else if (CONFIG_H263_ENCODER)
3218             ff_h263_encode_picture_header(s, picture_number);
3219         break;
3220     case FMT_MPEG1:
3221         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3222             ff_mpeg1_encode_picture_header(s, picture_number);
3223         break;
3224     default:
3225         assert(0);
3226     }
3227     bits= put_bits_count(&s->pb);
3228     s->header_bits= bits - s->last_bits;
3229
3230     for(i=1; i<context_count; i++){
3231         update_duplicate_context_after_me(s->thread_context[i], s);
3232     }
3233     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3234     for(i=1; i<context_count; i++){
3235         merge_context_after_encode(s, s->thread_context[i]);
3236     }
3237     emms_c();
3238     return 0;
3239 }
3240
3241 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3242     const int intra= s->mb_intra;
3243     int i;
3244
3245     s->dct_count[intra]++;
3246
3247     for(i=0; i<64; i++){
3248         int level= block[i];
3249
3250         if(level){
3251             if(level>0){
3252                 s->dct_error_sum[intra][i] += level;
3253                 level -= s->dct_offset[intra][i];
3254                 if(level<0) level=0;
3255             }else{
3256                 s->dct_error_sum[intra][i] -= level;
3257                 level += s->dct_offset[intra][i];
3258                 if(level>0) level=0;
3259             }
3260             block[i]= level;
3261         }
3262     }
3263 }
3264
3265 static int dct_quantize_trellis_c(MpegEncContext *s,
3266                                   int16_t *block, int n,
3267                                   int qscale, int *overflow){
3268     const int *qmat;
3269     const uint8_t *scantable= s->intra_scantable.scantable;
3270     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3271     int max=0;
3272     unsigned int threshold1, threshold2;
3273     int bias=0;
3274     int run_tab[65];
3275     int level_tab[65];
3276     int score_tab[65];
3277     int survivor[65];
3278     int survivor_count;
3279     int last_run=0;
3280     int last_level=0;
3281     int last_score= 0;
3282     int last_i;
3283     int coeff[2][64];
3284     int coeff_count[64];
3285     int qmul, qadd, start_i, last_non_zero, i, dc;
3286     const int esc_length= s->ac_esc_length;
3287     uint8_t * length;
3288     uint8_t * last_length;
3289     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3290
3291     s->dsp.fdct (block);
3292
3293     if(s->dct_error_sum)
3294         s->denoise_dct(s, block);
3295     qmul= qscale*16;
3296     qadd= ((qscale-1)|1)*8;
3297
3298     if (s->mb_intra) {
3299         int q;
3300         if (!s->h263_aic) {
3301             if (n < 4)
3302                 q = s->y_dc_scale;
3303             else
3304                 q = s->c_dc_scale;
3305             q = q << 3;
3306         } else{
3307             /* For AIC we skip quant/dequant of INTRADC */
3308             q = 1 << 3;
3309             qadd=0;
3310         }
3311
3312         /* note: block[0] is assumed to be positive */
3313         block[0] = (block[0] + (q >> 1)) / q;
3314         start_i = 1;
3315         last_non_zero = 0;
3316         qmat = s->q_intra_matrix[qscale];
3317         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3318             bias= 1<<(QMAT_SHIFT-1);
3319         length     = s->intra_ac_vlc_length;
3320         last_length= s->intra_ac_vlc_last_length;
3321     } else {
3322         start_i = 0;
3323         last_non_zero = -1;
3324         qmat = s->q_inter_matrix[qscale];
3325         length     = s->inter_ac_vlc_length;
3326         last_length= s->inter_ac_vlc_last_length;
3327     }
3328     last_i= start_i;
3329
3330     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3331     threshold2= (threshold1<<1);
3332
3333     for(i=63; i>=start_i; i--) {
3334         const int j = scantable[i];
3335         int level = block[j] * qmat[j];
3336
3337         if(((unsigned)(level+threshold1))>threshold2){
3338             last_non_zero = i;
3339             break;
3340         }
3341     }
3342
3343     for(i=start_i; i<=last_non_zero; i++) {
3344         const int j = scantable[i];
3345         int level = block[j] * qmat[j];
3346
3347 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3348 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3349         if(((unsigned)(level+threshold1))>threshold2){
3350             if(level>0){
3351                 level= (bias + level)>>QMAT_SHIFT;
3352                 coeff[0][i]= level;
3353                 coeff[1][i]= level-1;
3354 //                coeff[2][k]= level-2;
3355             }else{
3356                 level= (bias - level)>>QMAT_SHIFT;
3357                 coeff[0][i]= -level;
3358                 coeff[1][i]= -level+1;
3359 //                coeff[2][k]= -level+2;
3360             }
3361             coeff_count[i]= FFMIN(level, 2);
3362             assert(coeff_count[i]);
3363             max |=level;
3364         }else{
3365             coeff[0][i]= (level>>31)|1;
3366             coeff_count[i]= 1;
3367         }
3368     }
3369
3370     *overflow= s->max_qcoeff < max; //overflow might have happened
3371
3372     if(last_non_zero < start_i){
3373         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3374         return last_non_zero;
3375     }
3376
3377     score_tab[start_i]= 0;
3378     survivor[0]= start_i;
3379     survivor_count= 1;
3380
3381     for(i=start_i; i<=last_non_zero; i++){
3382         int level_index, j, zero_distortion;
3383         int dct_coeff= FFABS(block[ scantable[i] ]);
3384         int best_score=256*256*256*120;
3385
3386         if (s->dsp.fdct == ff_fdct_ifast)
3387             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3388         zero_distortion= dct_coeff*dct_coeff;
3389
3390         for(level_index=0; level_index < coeff_count[i]; level_index++){
3391             int distortion;
3392             int level= coeff[level_index][i];
3393             const int alevel= FFABS(level);
3394             int unquant_coeff;
3395
3396             assert(level);
3397
3398             if(s->out_format == FMT_H263){
3399                 unquant_coeff= alevel*qmul + qadd;
3400             }else{ //MPEG1
3401                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3402                 if(s->mb_intra){
3403                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3404                         unquant_coeff =   (unquant_coeff - 1) | 1;
3405                 }else{
3406                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3407                         unquant_coeff =   (unquant_coeff - 1) | 1;
3408                 }
3409                 unquant_coeff<<= 3;
3410             }
3411
3412             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3413             level+=64;
3414             if((level&(~127)) == 0){
3415                 for(j=survivor_count-1; j>=0; j--){
3416                     int run= i - survivor[j];
3417                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3418                     score += score_tab[i-run];
3419
3420                     if(score < best_score){
3421                         best_score= score;
3422                         run_tab[i+1]= run;
3423                         level_tab[i+1]= level-64;
3424                     }
3425                 }
3426
3427                 if(s->out_format == FMT_H263){
3428                     for(j=survivor_count-1; j>=0; j--){
3429                         int run= i - survivor[j];
3430                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3431                         score += score_tab[i-run];
3432                         if(score < last_score){
3433                             last_score= score;
3434                             last_run= run;
3435                             last_level= level-64;
3436                             last_i= i+1;
3437                         }
3438                     }
3439                 }
3440             }else{
3441                 distortion += esc_length*lambda;
3442                 for(j=survivor_count-1; j>=0; j--){
3443                     int run= i - survivor[j];
3444                     int score= distortion + score_tab[i-run];
3445
3446                     if(score < best_score){
3447                         best_score= score;
3448                         run_tab[i+1]= run;
3449                         level_tab[i+1]= level-64;
3450                     }
3451                 }
3452
3453                 if(s->out_format == FMT_H263){
3454                   for(j=survivor_count-1; j>=0; j--){
3455                         int run= i - survivor[j];
3456                         int score= distortion + score_tab[i-run];
3457                         if(score < last_score){
3458                             last_score= score;
3459                             last_run= run;
3460                             last_level= level-64;
3461                             last_i= i+1;
3462                         }
3463                     }
3464                 }
3465             }
3466         }
3467
3468         score_tab[i+1]= best_score;
3469
3470         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3471         if(last_non_zero <= 27){
3472             for(; survivor_count; survivor_count--){
3473                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3474                     break;
3475             }
3476         }else{
3477             for(; survivor_count; survivor_count--){
3478                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3479                     break;
3480             }
3481         }
3482
3483         survivor[ survivor_count++ ]= i+1;
3484     }
3485
3486     if(s->out_format != FMT_H263){
3487         last_score= 256*256*256*120;
3488         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3489             int score= score_tab[i];
3490             if(i) score += lambda*2; //FIXME exacter?
3491
3492             if(score < last_score){
3493                 last_score= score;
3494                 last_i= i;
3495                 last_level= level_tab[i];
3496                 last_run= run_tab[i];
3497             }
3498         }
3499     }
3500
3501     s->coded_score[n] = last_score;
3502
3503     dc= FFABS(block[0]);
3504     last_non_zero= last_i - 1;
3505     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3506
3507     if(last_non_zero < start_i)
3508         return last_non_zero;
3509
3510     if(last_non_zero == 0 && start_i == 0){
3511         int best_level= 0;
3512         int best_score= dc * dc;
3513
3514         for(i=0; i<coeff_count[0]; i++){
3515             int level= coeff[i][0];
3516             int alevel= FFABS(level);
3517             int unquant_coeff, score, distortion;
3518
3519             if(s->out_format == FMT_H263){
3520                     unquant_coeff= (alevel*qmul + qadd)>>3;
3521             }else{ //MPEG1
3522                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3523                     unquant_coeff =   (unquant_coeff - 1) | 1;
3524             }
3525             unquant_coeff = (unquant_coeff + 4) >> 3;
3526             unquant_coeff<<= 3 + 3;
3527
3528             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3529             level+=64;
3530             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3531             else                    score= distortion + esc_length*lambda;
3532
3533             if(score < best_score){
3534                 best_score= score;
3535                 best_level= level - 64;
3536             }
3537         }
3538         block[0]= best_level;
3539         s->coded_score[n] = best_score - dc*dc;
3540         if(best_level == 0) return -1;
3541         else                return last_non_zero;
3542     }
3543
3544     i= last_i;
3545     assert(last_level);
3546
3547     block[ perm_scantable[last_non_zero] ]= last_level;
3548     i -= last_run + 1;
3549
3550     for(; i>start_i; i -= run_tab[i] + 1){
3551         block[ perm_scantable[i-1] ]= level_tab[i];
3552     }
3553
3554     return last_non_zero;
3555 }
3556
3557 //#define REFINE_STATS 1
3558 static int16_t basis[64][64];
3559
3560 static void build_basis(uint8_t *perm){
3561     int i, j, x, y;
3562     emms_c();
3563     for(i=0; i<8; i++){
3564         for(j=0; j<8; j++){
3565             for(y=0; y<8; y++){
3566                 for(x=0; x<8; x++){
3567                     double s= 0.25*(1<<BASIS_SHIFT);
3568                     int index= 8*i + j;
3569                     int perm_index= perm[index];
3570                     if(i==0) s*= sqrt(0.5);
3571                     if(j==0) s*= sqrt(0.5);
3572                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3573                 }
3574             }
3575         }
3576     }
3577 }
3578
3579 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3580                         int16_t *block, int16_t *weight, int16_t *orig,
3581                         int n, int qscale){
3582     int16_t rem[64];
3583     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3584     const uint8_t *scantable= s->intra_scantable.scantable;
3585     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3586 //    unsigned int threshold1, threshold2;
3587 //    int bias=0;
3588     int run_tab[65];
3589     int prev_run=0;
3590     int prev_level=0;
3591     int qmul, qadd, start_i, last_non_zero, i, dc;
3592     uint8_t * length;
3593     uint8_t * last_length;
3594     int lambda;
3595     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3596 #ifdef REFINE_STATS
3597 static int count=0;
3598 static int after_last=0;
3599 static int to_zero=0;
3600 static int from_zero=0;
3601 static int raise=0;
3602 static int lower=0;
3603 static int messed_sign=0;
3604 #endif
3605
3606     if(basis[0][0] == 0)
3607         build_basis(s->dsp.idct_permutation);
3608
3609     qmul= qscale*2;
3610     qadd= (qscale-1)|1;
3611     if (s->mb_intra) {
3612         if (!s->h263_aic) {
3613             if (n < 4)
3614                 q = s->y_dc_scale;
3615             else
3616                 q = s->c_dc_scale;
3617         } else{
3618             /* For AIC we skip quant/dequant of INTRADC */
3619             q = 1;
3620             qadd=0;
3621         }
3622         q <<= RECON_SHIFT-3;
3623         /* note: block[0] is assumed to be positive */
3624         dc= block[0]*q;
3625 //        block[0] = (block[0] + (q >> 1)) / q;
3626         start_i = 1;
3627 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3628 //            bias= 1<<(QMAT_SHIFT-1);
3629         length     = s->intra_ac_vlc_length;
3630         last_length= s->intra_ac_vlc_last_length;
3631     } else {
3632         dc= 0;
3633         start_i = 0;
3634         length     = s->inter_ac_vlc_length;
3635         last_length= s->inter_ac_vlc_last_length;
3636     }
3637     last_non_zero = s->block_last_index[n];
3638
3639 #ifdef REFINE_STATS
3640 {START_TIMER
3641 #endif
3642     dc += (1<<(RECON_SHIFT-1));
3643     for(i=0; i<64; i++){
3644         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3645     }
3646 #ifdef REFINE_STATS
3647 STOP_TIMER("memset rem[]")}
3648 #endif
3649     sum=0;
3650     for(i=0; i<64; i++){
3651         int one= 36;
3652         int qns=4;
3653         int w;
3654
3655         w= FFABS(weight[i]) + qns*one;
3656         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3657
3658         weight[i] = w;
3659 //        w=weight[i] = (63*qns + (w/2)) / w;
3660
3661         assert(w>0);
3662         assert(w<(1<<6));
3663         sum += w*w;
3664     }
3665     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3666 #ifdef REFINE_STATS
3667 {START_TIMER
3668 #endif
3669     run=0;
3670     rle_index=0;
3671     for(i=start_i; i<=last_non_zero; i++){
3672         int j= perm_scantable[i];
3673         const int level= block[j];
3674         int coeff;
3675
3676         if(level){
3677             if(level<0) coeff= qmul*level - qadd;
3678             else        coeff= qmul*level + qadd;
3679             run_tab[rle_index++]=run;
3680             run=0;
3681
3682             s->dsp.add_8x8basis(rem, basis[j], coeff);
3683         }else{
3684             run++;
3685         }
3686     }
3687 #ifdef REFINE_STATS
3688 if(last_non_zero>0){
3689 STOP_TIMER("init rem[]")
3690 }
3691 }
3692
3693 {START_TIMER
3694 #endif
3695     for(;;){
3696         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3697         int best_coeff=0;
3698         int best_change=0;
3699         int run2, best_unquant_change=0, analyze_gradient;
3700 #ifdef REFINE_STATS
3701 {START_TIMER
3702 #endif
3703         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3704
3705         if(analyze_gradient){
3706 #ifdef REFINE_STATS
3707 {START_TIMER
3708 #endif
3709             for(i=0; i<64; i++){
3710                 int w= weight[i];
3711
3712                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3713             }
3714 #ifdef REFINE_STATS
3715 STOP_TIMER("rem*w*w")}
3716 {START_TIMER
3717 #endif
3718             s->dsp.fdct(d1);
3719 #ifdef REFINE_STATS
3720 STOP_TIMER("dct")}
3721 #endif
3722         }
3723
3724         if(start_i){
3725             const int level= block[0];
3726             int change, old_coeff;
3727
3728             assert(s->mb_intra);
3729
3730             old_coeff= q*level;
3731
3732             for(change=-1; change<=1; change+=2){
3733                 int new_level= level + change;
3734                 int score, new_coeff;
3735
3736                 new_coeff= q*new_level;
3737                 if(new_coeff >= 2048 || new_coeff < 0)
3738                     continue;
3739
3740                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3741                 if(score<best_score){
3742                     best_score= score;
3743                     best_coeff= 0;
3744                     best_change= change;
3745                     best_unquant_change= new_coeff - old_coeff;
3746                 }
3747             }
3748         }
3749
3750         run=0;
3751         rle_index=0;
3752         run2= run_tab[rle_index++];
3753         prev_level=0;
3754         prev_run=0;
3755
3756         for(i=start_i; i<64; i++){
3757             int j= perm_scantable[i];
3758             const int level= block[j];
3759             int change, old_coeff;
3760
3761             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3762                 break;
3763
3764             if(level){
3765                 if(level<0) old_coeff= qmul*level - qadd;
3766                 else        old_coeff= qmul*level + qadd;
3767                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3768             }else{
3769                 old_coeff=0;
3770                 run2--;
3771                 assert(run2>=0 || i >= last_non_zero );
3772             }
3773
3774             for(change=-1; change<=1; change+=2){
3775                 int new_level= level + change;
3776                 int score, new_coeff, unquant_change;
3777
3778                 score=0;
3779                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3780                    continue;
3781
3782                 if(new_level){
3783                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3784                     else            new_coeff= qmul*new_level + qadd;
3785                     if(new_coeff >= 2048 || new_coeff <= -2048)
3786                         continue;
3787                     //FIXME check for overflow
3788
3789                     if(level){
3790                         if(level < 63 && level > -63){
3791                             if(i < last_non_zero)
3792                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3793                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3794                             else
3795                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3796                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3797                         }
3798                     }else{
3799                         assert(FFABS(new_level)==1);
3800
3801                         if(analyze_gradient){
3802                             int g= d1[ scantable[i] ];
3803                             if(g && (g^new_level) >= 0)
3804                                 continue;
3805                         }
3806
3807                         if(i < last_non_zero){
3808                             int next_i= i + run2 + 1;
3809                             int next_level= block[ perm_scantable[next_i] ] + 64;
3810
3811                             if(next_level&(~127))
3812                                 next_level= 0;
3813
3814                             if(next_i < last_non_zero)
3815                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3816                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3817                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3818                             else
3819                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3820                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3821                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3822                         }else{
3823                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3824                             if(prev_level){
3825                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3826                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3827                             }
3828                         }
3829                     }
3830                 }else{
3831                     new_coeff=0;
3832                     assert(FFABS(level)==1);
3833
3834                     if(i < last_non_zero){
3835                         int next_i= i + run2 + 1;
3836                         int next_level= block[ perm_scantable[next_i] ] + 64;
3837
3838                         if(next_level&(~127))
3839                             next_level= 0;
3840
3841                         if(next_i < last_non_zero)
3842                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3843                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3844                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3845                         else
3846                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3847                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3848                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3849                     }else{
3850                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3851                         if(prev_level){
3852                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3853                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3854                         }
3855                     }
3856                 }
3857
3858                 score *= lambda;
3859
3860                 unquant_change= new_coeff - old_coeff;
3861                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3862
3863                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3864                 if(score<best_score){
3865                     best_score= score;
3866                     best_coeff= i;
3867                     best_change= change;
3868                     best_unquant_change= unquant_change;
3869                 }
3870             }
3871             if(level){
3872                 prev_level= level + 64;
3873                 if(prev_level&(~127))
3874                     prev_level= 0;
3875                 prev_run= run;
3876                 run=0;
3877             }else{
3878                 run++;
3879             }
3880         }
3881 #ifdef REFINE_STATS
3882 STOP_TIMER("iterative step")}
3883 #endif
3884
3885         if(best_change){
3886             int j= perm_scantable[ best_coeff ];
3887
3888             block[j] += best_change;
3889
3890             if(best_coeff > last_non_zero){
3891                 last_non_zero= best_coeff;
3892                 assert(block[j]);
3893 #ifdef REFINE_STATS
3894 after_last++;
3895 #endif
3896             }else{
3897 #ifdef REFINE_STATS
3898 if(block[j]){
3899     if(block[j] - best_change){
3900         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3901             raise++;
3902         }else{
3903             lower++;
3904         }
3905     }else{
3906         from_zero++;
3907     }
3908 }else{
3909     to_zero++;
3910 }
3911 #endif
3912                 for(; last_non_zero>=start_i; last_non_zero--){
3913                     if(block[perm_scantable[last_non_zero]])
3914                         break;
3915                 }
3916             }
3917 #ifdef REFINE_STATS
3918 count++;
3919 if(256*256*256*64 % count == 0){
3920     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
3921 }
3922 #endif
3923             run=0;
3924             rle_index=0;
3925             for(i=start_i; i<=last_non_zero; i++){
3926                 int j= perm_scantable[i];
3927                 const int level= block[j];
3928
3929                  if(level){
3930                      run_tab[rle_index++]=run;
3931                      run=0;
3932                  }else{
3933                      run++;
3934                  }
3935             }
3936
3937             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
3938         }else{
3939             break;
3940         }
3941     }
3942 #ifdef REFINE_STATS
3943 if(last_non_zero>0){
3944 STOP_TIMER("iterative search")
3945 }
3946 }
3947 #endif
3948
3949     return last_non_zero;
3950 }
3951
3952 int ff_dct_quantize_c(MpegEncContext *s,
3953                         int16_t *block, int n,
3954                         int qscale, int *overflow)
3955 {
3956     int i, j, level, last_non_zero, q, start_i;
3957     const int *qmat;
3958     const uint8_t *scantable= s->intra_scantable.scantable;
3959     int bias;
3960     int max=0;
3961     unsigned int threshold1, threshold2;
3962
3963     s->dsp.fdct (block);
3964
3965     if(s->dct_error_sum)
3966         s->denoise_dct(s, block);
3967
3968     if (s->mb_intra) {
3969         if (!s->h263_aic) {
3970             if (n < 4)
3971                 q = s->y_dc_scale;
3972             else
3973                 q = s->c_dc_scale;
3974             q = q << 3;
3975         } else
3976             /* For AIC we skip quant/dequant of INTRADC */
3977             q = 1 << 3;
3978
3979         /* note: block[0] is assumed to be positive */
3980         block[0] = (block[0] + (q >> 1)) / q;
3981         start_i = 1;
3982         last_non_zero = 0;
3983         qmat = s->q_intra_matrix[qscale];
3984         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
3985     } else {
3986         start_i = 0;
3987         last_non_zero = -1;
3988         qmat = s->q_inter_matrix[qscale];
3989         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
3990     }
3991     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3992     threshold2= (threshold1<<1);
3993     for(i=63;i>=start_i;i--) {
3994         j = scantable[i];
3995         level = block[j] * qmat[j];
3996
3997         if(((unsigned)(level+threshold1))>threshold2){
3998             last_non_zero = i;
3999             break;
4000         }else{
4001             block[j]=0;
4002         }
4003     }
4004     for(i=start_i; i<=last_non_zero; i++) {
4005         j = scantable[i];
4006         level = block[j] * qmat[j];
4007
4008 //        if(   bias+level >= (1<<QMAT_SHIFT)
4009 //           || bias-level >= (1<<QMAT_SHIFT)){
4010         if(((unsigned)(level+threshold1))>threshold2){
4011             if(level>0){
4012                 level= (bias + level)>>QMAT_SHIFT;
4013                 block[j]= level;
4014             }else{
4015                 level= (bias - level)>>QMAT_SHIFT;
4016                 block[j]= -level;
4017             }
4018             max |=level;
4019         }else{
4020             block[j]=0;
4021         }
4022     }
4023     *overflow= s->max_qcoeff < max; //overflow might have happened
4024
4025     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4026     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4027         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4028
4029     return last_non_zero;
4030 }
4031
4032 #define OFFSET(x) offsetof(MpegEncContext, x)
4033 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4034 static const AVOption h263_options[] = {
4035     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4036     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4037     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4038     FF_MPV_COMMON_OPTS
4039     { NULL },
4040 };
4041
4042 static const AVClass h263_class = {
4043     .class_name = "H.263 encoder",
4044     .item_name  = av_default_item_name,
4045     .option     = h263_options,
4046     .version    = LIBAVUTIL_VERSION_INT,
4047 };
4048
4049 AVCodec ff_h263_encoder = {
4050     .name           = "h263",
4051     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4052     .type           = AVMEDIA_TYPE_VIDEO,
4053     .id             = AV_CODEC_ID_H263,
4054     .priv_data_size = sizeof(MpegEncContext),
4055     .init           = ff_MPV_encode_init,
4056     .encode2        = ff_MPV_encode_picture,
4057     .close          = ff_MPV_encode_end,
4058     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4059     .priv_class     = &h263_class,
4060 };
4061
4062 static const AVOption h263p_options[] = {
4063     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4064     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4065     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4066     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4067     FF_MPV_COMMON_OPTS
4068     { NULL },
4069 };
4070 static const AVClass h263p_class = {
4071     .class_name = "H.263p encoder",
4072     .item_name  = av_default_item_name,
4073     .option     = h263p_options,
4074     .version    = LIBAVUTIL_VERSION_INT,
4075 };
4076
4077 AVCodec ff_h263p_encoder = {
4078     .name           = "h263p",
4079     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4080     .type           = AVMEDIA_TYPE_VIDEO,
4081     .id             = AV_CODEC_ID_H263P,
4082     .priv_data_size = sizeof(MpegEncContext),
4083     .init           = ff_MPV_encode_init,
4084     .encode2        = ff_MPV_encode_picture,
4085     .close          = ff_MPV_encode_end,
4086     .capabilities   = CODEC_CAP_SLICE_THREADS,
4087     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4088     .priv_class     = &h263p_class,
4089 };
4090
4091 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4092
4093 AVCodec ff_msmpeg4v2_encoder = {
4094     .name           = "msmpeg4v2",
4095     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4096     .type           = AVMEDIA_TYPE_VIDEO,
4097     .id             = AV_CODEC_ID_MSMPEG4V2,
4098     .priv_data_size = sizeof(MpegEncContext),
4099     .init           = ff_MPV_encode_init,
4100     .encode2        = ff_MPV_encode_picture,
4101     .close          = ff_MPV_encode_end,
4102     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4103     .priv_class     = &msmpeg4v2_class,
4104 };
4105
4106 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4107
4108 AVCodec ff_msmpeg4v3_encoder = {
4109     .name           = "msmpeg4",
4110     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4111     .type           = AVMEDIA_TYPE_VIDEO,
4112     .id             = AV_CODEC_ID_MSMPEG4V3,
4113     .priv_data_size = sizeof(MpegEncContext),
4114     .init           = ff_MPV_encode_init,
4115     .encode2        = ff_MPV_encode_picture,
4116     .close          = ff_MPV_encode_end,
4117     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4118     .priv_class     = &msmpeg4v3_class,
4119 };
4120
4121 FF_MPV_GENERIC_CLASS(wmv1)
4122
4123 AVCodec ff_wmv1_encoder = {
4124     .name           = "wmv1",
4125     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4126     .type           = AVMEDIA_TYPE_VIDEO,
4127     .id             = AV_CODEC_ID_WMV1,
4128     .priv_data_size = sizeof(MpegEncContext),
4129     .init           = ff_MPV_encode_init,
4130     .encode2        = ff_MPV_encode_picture,
4131     .close          = ff_MPV_encode_end,
4132     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4133     .priv_class     = &wmv1_class,
4134 };