]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
mpegvideo: split the encoding-only parts of frame_start() into a separate function
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "avcodec.h"
38 #include "dct.h"
39 #include "dsputil.h"
40 #include "mpeg12.h"
41 #include "mpegvideo.h"
42 #include "h261.h"
43 #include "h263.h"
44 #include "mathops.h"
45 #include "mjpegenc.h"
46 #include "msmpeg4.h"
47 #include "faandct.h"
48 #include "thread.h"
49 #include "aandcttab.h"
50 #include "flv.h"
51 #include "mpeg4video.h"
52 #include "internal.h"
53 #include "bytestream.h"
54 #include <limits.h>
55
56 static int encode_picture(MpegEncContext *s, int picture_number);
57 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
58 static int sse_mb(MpegEncContext *s);
59 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
60 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
61
62 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
63 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
64
65 const AVOption ff_mpv_generic_options[] = {
66     FF_MPV_COMMON_OPTS
67     { NULL },
68 };
69
70 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
71                        uint16_t (*qmat16)[2][64],
72                        const uint16_t *quant_matrix,
73                        int bias, int qmin, int qmax, int intra)
74 {
75     int qscale;
76     int shift = 0;
77
78     for (qscale = qmin; qscale <= qmax; qscale++) {
79         int i;
80         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
81             dsp->fdct == ff_jpeg_fdct_islow_10 ||
82             dsp->fdct == ff_faandct) {
83             for (i = 0; i < 64; i++) {
84                 const int j = dsp->idct_permutation[i];
85                 /* 16 <= qscale * quant_matrix[i] <= 7905
86                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
87                  *             19952 <=              x  <= 249205026
88                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
89                  *           3444240 >= (1 << 36) / (x) >= 275 */
90
91                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
92                                         (qscale * quant_matrix[j]));
93             }
94         } else if (dsp->fdct == ff_fdct_ifast) {
95             for (i = 0; i < 64; i++) {
96                 const int j = dsp->idct_permutation[i];
97                 /* 16 <= qscale * quant_matrix[i] <= 7905
98                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
99                  *             19952 <=              x  <= 249205026
100                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
101                  *           3444240 >= (1 << 36) / (x) >= 275 */
102
103                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
104                                         (ff_aanscales[i] * qscale *
105                                          quant_matrix[j]));
106             }
107         } else {
108             for (i = 0; i < 64; i++) {
109                 const int j = dsp->idct_permutation[i];
110                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
111                  * Assume x = qscale * quant_matrix[i]
112                  * So             16 <=              x  <= 7905
113                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
114                  * so          32768 >= (1 << 19) / (x) >= 67 */
115                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
116                                         (qscale * quant_matrix[j]));
117                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
118                 //                    (qscale * quant_matrix[i]);
119                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
120                                        (qscale * quant_matrix[j]);
121
122                 if (qmat16[qscale][0][i] == 0 ||
123                     qmat16[qscale][0][i] == 128 * 256)
124                     qmat16[qscale][0][i] = 128 * 256 - 1;
125                 qmat16[qscale][1][i] =
126                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
127                                 qmat16[qscale][0][i]);
128             }
129         }
130
131         for (i = intra; i < 64; i++) {
132             int64_t max = 8191;
133             if (dsp->fdct == ff_fdct_ifast) {
134                 max = (8191LL * ff_aanscales[i]) >> 14;
135             }
136             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
137                 shift++;
138             }
139         }
140     }
141     if (shift) {
142         av_log(NULL, AV_LOG_INFO,
143                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
144                QMAT_SHIFT - shift);
145     }
146 }
147
148 static inline void update_qscale(MpegEncContext *s)
149 {
150     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
151                 (FF_LAMBDA_SHIFT + 7);
152     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
153
154     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
155                  FF_LAMBDA_SHIFT;
156 }
157
158 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
159 {
160     int i;
161
162     if (matrix) {
163         put_bits(pb, 1, 1);
164         for (i = 0; i < 64; i++) {
165             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
166         }
167     } else
168         put_bits(pb, 1, 0);
169 }
170
171 /**
172  * init s->current_picture.qscale_table from s->lambda_table
173  */
174 void ff_init_qscale_tab(MpegEncContext *s)
175 {
176     int8_t * const qscale_table = s->current_picture.qscale_table;
177     int i;
178
179     for (i = 0; i < s->mb_num; i++) {
180         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
181         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
182         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
183                                                   s->avctx->qmax);
184     }
185 }
186
187 static void update_duplicate_context_after_me(MpegEncContext *dst,
188                                               MpegEncContext *src)
189 {
190 #define COPY(a) dst->a= src->a
191     COPY(pict_type);
192     COPY(current_picture);
193     COPY(f_code);
194     COPY(b_code);
195     COPY(qscale);
196     COPY(lambda);
197     COPY(lambda2);
198     COPY(picture_in_gop_number);
199     COPY(gop_picture_number);
200     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
201     COPY(progressive_frame);    // FIXME don't set in encode_header
202     COPY(partitioned_frame);    // FIXME don't set in encode_header
203 #undef COPY
204 }
205
206 /**
207  * Set the given MpegEncContext to defaults for encoding.
208  * the changed fields will not depend upon the prior state of the MpegEncContext.
209  */
210 static void MPV_encode_defaults(MpegEncContext *s)
211 {
212     int i;
213     ff_MPV_common_defaults(s);
214
215     for (i = -16; i < 16; i++) {
216         default_fcode_tab[i + MAX_MV] = 1;
217     }
218     s->me.mv_penalty = default_mv_penalty;
219     s->fcode_tab     = default_fcode_tab;
220
221     s->input_picture_number  = 0;
222     s->picture_in_gop_number = 0;
223 }
224
225 /* init video encoder */
226 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
227 {
228     MpegEncContext *s = avctx->priv_data;
229     int i, ret;
230
231     MPV_encode_defaults(s);
232
233     switch (avctx->codec_id) {
234     case AV_CODEC_ID_MPEG2VIDEO:
235         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
236             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
237             av_log(avctx, AV_LOG_ERROR,
238                    "only YUV420 and YUV422 are supported\n");
239             return -1;
240         }
241         break;
242     case AV_CODEC_ID_MJPEG:
243         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
244             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
245             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
246               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
247              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
248             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
249             return -1;
250         }
251         break;
252     default:
253         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
254             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
255             return -1;
256         }
257     }
258
259     switch (avctx->pix_fmt) {
260     case AV_PIX_FMT_YUVJ422P:
261     case AV_PIX_FMT_YUV422P:
262         s->chroma_format = CHROMA_422;
263         break;
264     case AV_PIX_FMT_YUVJ420P:
265     case AV_PIX_FMT_YUV420P:
266     default:
267         s->chroma_format = CHROMA_420;
268         break;
269     }
270
271     s->bit_rate = avctx->bit_rate;
272     s->width    = avctx->width;
273     s->height   = avctx->height;
274     if (avctx->gop_size > 600 &&
275         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
276         av_log(avctx, AV_LOG_ERROR,
277                "Warning keyframe interval too large! reducing it ...\n");
278         avctx->gop_size = 600;
279     }
280     s->gop_size     = avctx->gop_size;
281     s->avctx        = avctx;
282     s->flags        = avctx->flags;
283     s->flags2       = avctx->flags2;
284     if (avctx->max_b_frames > MAX_B_FRAMES) {
285         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
286                "is %d.\n", MAX_B_FRAMES);
287     }
288     s->max_b_frames = avctx->max_b_frames;
289     s->codec_id     = avctx->codec->id;
290     s->strict_std_compliance = avctx->strict_std_compliance;
291     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
292     s->mpeg_quant         = avctx->mpeg_quant;
293     s->rtp_mode           = !!avctx->rtp_payload_size;
294     s->intra_dc_precision = avctx->intra_dc_precision;
295     s->user_specified_pts = AV_NOPTS_VALUE;
296
297     if (s->gop_size <= 1) {
298         s->intra_only = 1;
299         s->gop_size   = 12;
300     } else {
301         s->intra_only = 0;
302     }
303
304     s->me_method = avctx->me_method;
305
306     /* Fixed QSCALE */
307     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
308
309     s->adaptive_quant = (s->avctx->lumi_masking ||
310                          s->avctx->dark_masking ||
311                          s->avctx->temporal_cplx_masking ||
312                          s->avctx->spatial_cplx_masking  ||
313                          s->avctx->p_masking      ||
314                          s->avctx->border_masking ||
315                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
316                         !s->fixed_qscale;
317
318     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
319
320     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
321         av_log(avctx, AV_LOG_ERROR,
322                "a vbv buffer size is needed, "
323                "for encoding with a maximum bitrate\n");
324         return -1;
325     }
326
327     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
328         av_log(avctx, AV_LOG_INFO,
329                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
330     }
331
332     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
333         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
334         return -1;
335     }
336
337     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
338         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
339         return -1;
340     }
341
342     if (avctx->rc_max_rate &&
343         avctx->rc_max_rate == avctx->bit_rate &&
344         avctx->rc_max_rate != avctx->rc_min_rate) {
345         av_log(avctx, AV_LOG_INFO,
346                "impossible bitrate constraints, this will fail\n");
347     }
348
349     if (avctx->rc_buffer_size &&
350         avctx->bit_rate * (int64_t)avctx->time_base.num >
351             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
352         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
353         return -1;
354     }
355
356     if (!s->fixed_qscale &&
357         avctx->bit_rate * av_q2d(avctx->time_base) >
358             avctx->bit_rate_tolerance) {
359         av_log(avctx, AV_LOG_ERROR,
360                "bitrate tolerance too small for bitrate\n");
361         return -1;
362     }
363
364     if (s->avctx->rc_max_rate &&
365         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
366         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
367          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
368         90000LL * (avctx->rc_buffer_size - 1) >
369             s->avctx->rc_max_rate * 0xFFFFLL) {
370         av_log(avctx, AV_LOG_INFO,
371                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
372                "specified vbv buffer is too large for the given bitrate!\n");
373     }
374
375     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
376         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
377         s->codec_id != AV_CODEC_ID_FLV1) {
378         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
379         return -1;
380     }
381
382     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
383         av_log(avctx, AV_LOG_ERROR,
384                "OBMC is only supported with simple mb decision\n");
385         return -1;
386     }
387
388     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
389         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
390         return -1;
391     }
392
393     if (s->max_b_frames                    &&
394         s->codec_id != AV_CODEC_ID_MPEG4      &&
395         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
396         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
397         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
398         return -1;
399     }
400
401     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
402          s->codec_id == AV_CODEC_ID_H263  ||
403          s->codec_id == AV_CODEC_ID_H263P) &&
404         (avctx->sample_aspect_ratio.num > 255 ||
405          avctx->sample_aspect_ratio.den > 255)) {
406         av_log(avctx, AV_LOG_ERROR,
407                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
408                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
409         return -1;
410     }
411
412     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
413         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
414         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
415         return -1;
416     }
417
418     // FIXME mpeg2 uses that too
419     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
420         av_log(avctx, AV_LOG_ERROR,
421                "mpeg2 style quantization not supported by codec\n");
422         return -1;
423     }
424
425     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
426         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
427         return -1;
428     }
429
430     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
431         s->avctx->mb_decision != FF_MB_DECISION_RD) {
432         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
433         return -1;
434     }
435
436     if (s->avctx->scenechange_threshold < 1000000000 &&
437         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
438         av_log(avctx, AV_LOG_ERROR,
439                "closed gop with scene change detection are not supported yet, "
440                "set threshold to 1000000000\n");
441         return -1;
442     }
443
444     if (s->flags & CODEC_FLAG_LOW_DELAY) {
445         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
446             av_log(avctx, AV_LOG_ERROR,
447                   "low delay forcing is only available for mpeg2\n");
448             return -1;
449         }
450         if (s->max_b_frames != 0) {
451             av_log(avctx, AV_LOG_ERROR,
452                    "b frames cannot be used with low delay\n");
453             return -1;
454         }
455     }
456
457     if (s->q_scale_type == 1) {
458         if (avctx->qmax > 12) {
459             av_log(avctx, AV_LOG_ERROR,
460                    "non linear quant only supports qmax <= 12 currently\n");
461             return -1;
462         }
463     }
464
465     if (s->avctx->thread_count > 1         &&
466         s->codec_id != AV_CODEC_ID_MPEG4      &&
467         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
468         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
469         (s->codec_id != AV_CODEC_ID_H263P)) {
470         av_log(avctx, AV_LOG_ERROR,
471                "multi threaded encoding not supported by codec\n");
472         return -1;
473     }
474
475     if (s->avctx->thread_count < 1) {
476         av_log(avctx, AV_LOG_ERROR,
477                "automatic thread number detection not supported by codec,"
478                "patch welcome\n");
479         return -1;
480     }
481
482     if (s->avctx->thread_count > 1)
483         s->rtp_mode = 1;
484
485     if (!avctx->time_base.den || !avctx->time_base.num) {
486         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
487         return -1;
488     }
489
490     i = (INT_MAX / 2 + 128) >> 8;
491     if (avctx->mb_threshold >= i) {
492         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
493                i - 1);
494         return -1;
495     }
496
497     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
498         av_log(avctx, AV_LOG_INFO,
499                "notice: b_frame_strategy only affects the first pass\n");
500         avctx->b_frame_strategy = 0;
501     }
502
503     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
504     if (i > 1) {
505         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
506         avctx->time_base.den /= i;
507         avctx->time_base.num /= i;
508         //return -1;
509     }
510
511     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
512         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
513         // (a + x * 3 / 8) / x
514         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
515         s->inter_quant_bias = 0;
516     } else {
517         s->intra_quant_bias = 0;
518         // (a - x / 4) / x
519         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
520     }
521
522     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
523         s->intra_quant_bias = avctx->intra_quant_bias;
524     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
525         s->inter_quant_bias = avctx->inter_quant_bias;
526
527     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
528         s->avctx->time_base.den > (1 << 16) - 1) {
529         av_log(avctx, AV_LOG_ERROR,
530                "timebase %d/%d not supported by MPEG 4 standard, "
531                "the maximum admitted value for the timebase denominator "
532                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
533                (1 << 16) - 1);
534         return -1;
535     }
536     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
537
538     switch (avctx->codec->id) {
539     case AV_CODEC_ID_MPEG1VIDEO:
540         s->out_format = FMT_MPEG1;
541         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
542         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
543         break;
544     case AV_CODEC_ID_MPEG2VIDEO:
545         s->out_format = FMT_MPEG1;
546         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
547         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
548         s->rtp_mode   = 1;
549         break;
550     case AV_CODEC_ID_MJPEG:
551         s->out_format = FMT_MJPEG;
552         s->intra_only = 1; /* force intra only for jpeg */
553         if (!CONFIG_MJPEG_ENCODER ||
554             ff_mjpeg_encode_init(s) < 0)
555             return -1;
556         avctx->delay = 0;
557         s->low_delay = 1;
558         break;
559     case AV_CODEC_ID_H261:
560         if (!CONFIG_H261_ENCODER)
561             return -1;
562         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
563             av_log(avctx, AV_LOG_ERROR,
564                    "The specified picture size of %dx%d is not valid for the "
565                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
566                     s->width, s->height);
567             return -1;
568         }
569         s->out_format = FMT_H261;
570         avctx->delay  = 0;
571         s->low_delay  = 1;
572         break;
573     case AV_CODEC_ID_H263:
574         if (!CONFIG_H263_ENCODER)
575         return -1;
576         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
577                              s->width, s->height) == 8) {
578             av_log(avctx, AV_LOG_INFO,
579                    "The specified picture size of %dx%d is not valid for "
580                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
581                    "352x288, 704x576, and 1408x1152."
582                    "Try H.263+.\n", s->width, s->height);
583             return -1;
584         }
585         s->out_format = FMT_H263;
586         avctx->delay  = 0;
587         s->low_delay  = 1;
588         break;
589     case AV_CODEC_ID_H263P:
590         s->out_format = FMT_H263;
591         s->h263_plus  = 1;
592         /* Fx */
593         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
594         s->modified_quant  = s->h263_aic;
595         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
596         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
597
598         /* /Fx */
599         /* These are just to be sure */
600         avctx->delay = 0;
601         s->low_delay = 1;
602         break;
603     case AV_CODEC_ID_FLV1:
604         s->out_format      = FMT_H263;
605         s->h263_flv        = 2; /* format = 1; 11-bit codes */
606         s->unrestricted_mv = 1;
607         s->rtp_mode  = 0; /* don't allow GOB */
608         avctx->delay = 0;
609         s->low_delay = 1;
610         break;
611     case AV_CODEC_ID_RV10:
612         s->out_format = FMT_H263;
613         avctx->delay  = 0;
614         s->low_delay  = 1;
615         break;
616     case AV_CODEC_ID_RV20:
617         s->out_format      = FMT_H263;
618         avctx->delay       = 0;
619         s->low_delay       = 1;
620         s->modified_quant  = 1;
621         s->h263_aic        = 1;
622         s->h263_plus       = 1;
623         s->loop_filter     = 1;
624         s->unrestricted_mv = 0;
625         break;
626     case AV_CODEC_ID_MPEG4:
627         s->out_format      = FMT_H263;
628         s->h263_pred       = 1;
629         s->unrestricted_mv = 1;
630         s->low_delay       = s->max_b_frames ? 0 : 1;
631         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
632         break;
633     case AV_CODEC_ID_MSMPEG4V2:
634         s->out_format      = FMT_H263;
635         s->h263_pred       = 1;
636         s->unrestricted_mv = 1;
637         s->msmpeg4_version = 2;
638         avctx->delay       = 0;
639         s->low_delay       = 1;
640         break;
641     case AV_CODEC_ID_MSMPEG4V3:
642         s->out_format        = FMT_H263;
643         s->h263_pred         = 1;
644         s->unrestricted_mv   = 1;
645         s->msmpeg4_version   = 3;
646         s->flipflop_rounding = 1;
647         avctx->delay         = 0;
648         s->low_delay         = 1;
649         break;
650     case AV_CODEC_ID_WMV1:
651         s->out_format        = FMT_H263;
652         s->h263_pred         = 1;
653         s->unrestricted_mv   = 1;
654         s->msmpeg4_version   = 4;
655         s->flipflop_rounding = 1;
656         avctx->delay         = 0;
657         s->low_delay         = 1;
658         break;
659     case AV_CODEC_ID_WMV2:
660         s->out_format        = FMT_H263;
661         s->h263_pred         = 1;
662         s->unrestricted_mv   = 1;
663         s->msmpeg4_version   = 5;
664         s->flipflop_rounding = 1;
665         avctx->delay         = 0;
666         s->low_delay         = 1;
667         break;
668     default:
669         return -1;
670     }
671
672     avctx->has_b_frames = !s->low_delay;
673
674     s->encoding = 1;
675
676     s->progressive_frame    =
677     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
678                                                 CODEC_FLAG_INTERLACED_ME) ||
679                                 s->alternate_scan);
680
681     /* init */
682     if (ff_MPV_common_init(s) < 0)
683         return -1;
684
685     if (ARCH_X86)
686         ff_MPV_encode_init_x86(s);
687
688     s->avctx->coded_frame = &s->current_picture.f;
689
690     if (s->msmpeg4_version) {
691         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
692                           2 * 2 * (MAX_LEVEL + 1) *
693                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
694     }
695     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
696
697     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
698     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
699     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
700     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
701     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
702                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
703     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
704                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
705
706     if (s->avctx->noise_reduction) {
707         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
708                           2 * 64 * sizeof(uint16_t), fail);
709     }
710
711     ff_h263dsp_init(&s->h263dsp);
712     if (!s->dct_quantize)
713         s->dct_quantize = ff_dct_quantize_c;
714     if (!s->denoise_dct)
715         s->denoise_dct  = denoise_dct_c;
716     s->fast_dct_quantize = s->dct_quantize;
717     if (avctx->trellis)
718         s->dct_quantize  = dct_quantize_trellis_c;
719
720     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
721         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
722
723     s->quant_precision = 5;
724
725     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
726     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
727
728     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
729         ff_h261_encode_init(s);
730     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
731         ff_h263_encode_init(s);
732     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
733         ff_msmpeg4_encode_init(s);
734     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
735         && s->out_format == FMT_MPEG1)
736         ff_mpeg1_encode_init(s);
737
738     /* init q matrix */
739     for (i = 0; i < 64; i++) {
740         int j = s->dsp.idct_permutation[i];
741         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
742             s->mpeg_quant) {
743             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
744             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
745         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
746             s->intra_matrix[j] =
747             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
748         } else {
749             /* mpeg1/2 */
750             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
751             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
752         }
753         if (s->avctx->intra_matrix)
754             s->intra_matrix[j] = s->avctx->intra_matrix[i];
755         if (s->avctx->inter_matrix)
756             s->inter_matrix[j] = s->avctx->inter_matrix[i];
757     }
758
759     /* precompute matrix */
760     /* for mjpeg, we do include qscale in the matrix */
761     if (s->out_format != FMT_MJPEG) {
762         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
763                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
764                           31, 1);
765         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
766                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
767                           31, 0);
768     }
769
770     if (ff_rate_control_init(s) < 0)
771         return -1;
772
773 #if FF_API_ERROR_RATE
774     FF_DISABLE_DEPRECATION_WARNINGS
775     if (avctx->error_rate)
776         s->error_rate = avctx->error_rate;
777     FF_ENABLE_DEPRECATION_WARNINGS;
778 #endif
779
780     if (avctx->b_frame_strategy == 2) {
781         for (i = 0; i < s->max_b_frames + 2; i++) {
782             s->tmp_frames[i] = av_frame_alloc();
783             if (!s->tmp_frames[i])
784                 return AVERROR(ENOMEM);
785
786             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
787             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
788             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
789
790             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
791             if (ret < 0)
792                 return ret;
793         }
794     }
795
796     return 0;
797 fail:
798     ff_MPV_encode_end(avctx);
799     return AVERROR_UNKNOWN;
800 }
801
802 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
803 {
804     MpegEncContext *s = avctx->priv_data;
805     int i;
806
807     ff_rate_control_uninit(s);
808
809     ff_MPV_common_end(s);
810     if (CONFIG_MJPEG_ENCODER &&
811         s->out_format == FMT_MJPEG)
812         ff_mjpeg_encode_close(s);
813
814     av_freep(&avctx->extradata);
815
816     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
817         av_frame_free(&s->tmp_frames[i]);
818
819     ff_free_picture_tables(&s->new_picture);
820     ff_mpeg_unref_picture(s, &s->new_picture);
821
822     av_freep(&s->avctx->stats_out);
823     av_freep(&s->ac_stats);
824
825     av_freep(&s->q_intra_matrix);
826     av_freep(&s->q_inter_matrix);
827     av_freep(&s->q_intra_matrix16);
828     av_freep(&s->q_inter_matrix16);
829     av_freep(&s->input_picture);
830     av_freep(&s->reordered_input_picture);
831     av_freep(&s->dct_offset);
832
833     return 0;
834 }
835
836 static int get_sae(uint8_t *src, int ref, int stride)
837 {
838     int x,y;
839     int acc = 0;
840
841     for (y = 0; y < 16; y++) {
842         for (x = 0; x < 16; x++) {
843             acc += FFABS(src[x + y * stride] - ref);
844         }
845     }
846
847     return acc;
848 }
849
850 static int get_intra_count(MpegEncContext *s, uint8_t *src,
851                            uint8_t *ref, int stride)
852 {
853     int x, y, w, h;
854     int acc = 0;
855
856     w = s->width  & ~15;
857     h = s->height & ~15;
858
859     for (y = 0; y < h; y += 16) {
860         for (x = 0; x < w; x += 16) {
861             int offset = x + y * stride;
862             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
863                                      16);
864             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
865             int sae  = get_sae(src + offset, mean, stride);
866
867             acc += sae + 500 < sad;
868         }
869     }
870     return acc;
871 }
872
873
874 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
875 {
876     Picture *pic = NULL;
877     int64_t pts;
878     int i, display_picture_number = 0, ret;
879     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
880                                                  (s->low_delay ? 0 : 1);
881     int direct = 1;
882
883     if (pic_arg) {
884         pts = pic_arg->pts;
885         display_picture_number = s->input_picture_number++;
886
887         if (pts != AV_NOPTS_VALUE) {
888             if (s->user_specified_pts != AV_NOPTS_VALUE) {
889                 int64_t time = pts;
890                 int64_t last = s->user_specified_pts;
891
892                 if (time <= last) {
893                     av_log(s->avctx, AV_LOG_ERROR,
894                            "Error, Invalid timestamp=%"PRId64", "
895                            "last=%"PRId64"\n", pts, s->user_specified_pts);
896                     return -1;
897                 }
898
899                 if (!s->low_delay && display_picture_number == 1)
900                     s->dts_delta = time - last;
901             }
902             s->user_specified_pts = pts;
903         } else {
904             if (s->user_specified_pts != AV_NOPTS_VALUE) {
905                 s->user_specified_pts =
906                 pts = s->user_specified_pts + 1;
907                 av_log(s->avctx, AV_LOG_INFO,
908                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
909                        pts);
910             } else {
911                 pts = display_picture_number;
912             }
913         }
914     }
915
916     if (pic_arg) {
917         if (!pic_arg->buf[0]);
918             direct = 0;
919         if (pic_arg->linesize[0] != s->linesize)
920             direct = 0;
921         if (pic_arg->linesize[1] != s->uvlinesize)
922             direct = 0;
923         if (pic_arg->linesize[2] != s->uvlinesize)
924             direct = 0;
925
926         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
927                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
928
929         if (direct) {
930             i = ff_find_unused_picture(s, 1);
931             if (i < 0)
932                 return i;
933
934             pic = &s->picture[i];
935             pic->reference = 3;
936
937             if ((ret = av_frame_ref(&pic->f, pic_arg)) < 0)
938                 return ret;
939             if (ff_alloc_picture(s, pic, 1) < 0) {
940                 return -1;
941             }
942         } else {
943             i = ff_find_unused_picture(s, 0);
944             if (i < 0)
945                 return i;
946
947             pic = &s->picture[i];
948             pic->reference = 3;
949
950             if (ff_alloc_picture(s, pic, 0) < 0) {
951                 return -1;
952             }
953
954             if (pic->f.data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
955                 pic->f.data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
956                 pic->f.data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
957                 // empty
958             } else {
959                 int h_chroma_shift, v_chroma_shift;
960                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
961                                                  &h_chroma_shift,
962                                                  &v_chroma_shift);
963
964                 for (i = 0; i < 3; i++) {
965                     int src_stride = pic_arg->linesize[i];
966                     int dst_stride = i ? s->uvlinesize : s->linesize;
967                     int h_shift = i ? h_chroma_shift : 0;
968                     int v_shift = i ? v_chroma_shift : 0;
969                     int w = s->width  >> h_shift;
970                     int h = s->height >> v_shift;
971                     uint8_t *src = pic_arg->data[i];
972                     uint8_t *dst = pic->f.data[i];
973
974                     if (!s->avctx->rc_buffer_size)
975                         dst += INPLACE_OFFSET;
976
977                     if (src_stride == dst_stride)
978                         memcpy(dst, src, src_stride * h);
979                     else {
980                         while (h--) {
981                             memcpy(dst, src, w);
982                             dst += dst_stride;
983                             src += src_stride;
984                         }
985                     }
986                 }
987             }
988         }
989         ret = av_frame_copy_props(&pic->f, pic_arg);
990         if (ret < 0)
991             return ret;
992
993         pic->f.display_picture_number = display_picture_number;
994         pic->f.pts = pts; // we set this here to avoid modifiying pic_arg
995     }
996
997     /* shift buffer entries */
998     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
999         s->input_picture[i - 1] = s->input_picture[i];
1000
1001     s->input_picture[encoding_delay] = (Picture*) pic;
1002
1003     return 0;
1004 }
1005
1006 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1007 {
1008     int x, y, plane;
1009     int score = 0;
1010     int64_t score64 = 0;
1011
1012     for (plane = 0; plane < 3; plane++) {
1013         const int stride = p->f.linesize[plane];
1014         const int bw = plane ? 1 : 2;
1015         for (y = 0; y < s->mb_height * bw; y++) {
1016             for (x = 0; x < s->mb_width * bw; x++) {
1017                 int off = p->shared ? 0 : 16;
1018                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1019                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1020                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1021
1022                 switch (s->avctx->frame_skip_exp) {
1023                 case 0: score    =  FFMAX(score, v);          break;
1024                 case 1: score   += FFABS(v);                  break;
1025                 case 2: score   += v * v;                     break;
1026                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1027                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1028                 }
1029             }
1030         }
1031     }
1032
1033     if (score)
1034         score64 = score;
1035
1036     if (score64 < s->avctx->frame_skip_threshold)
1037         return 1;
1038     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1039         return 1;
1040     return 0;
1041 }
1042
1043 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1044 {
1045     AVPacket pkt = { 0 };
1046     int ret, got_output;
1047
1048     av_init_packet(&pkt);
1049     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1050     if (ret < 0)
1051         return ret;
1052
1053     ret = pkt.size;
1054     av_free_packet(&pkt);
1055     return ret;
1056 }
1057
1058 static int estimate_best_b_count(MpegEncContext *s)
1059 {
1060     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1061     AVCodecContext *c = avcodec_alloc_context3(NULL);
1062     const int scale = s->avctx->brd_scale;
1063     int i, j, out_size, p_lambda, b_lambda, lambda2;
1064     int64_t best_rd  = INT64_MAX;
1065     int best_b_count = -1;
1066
1067     assert(scale >= 0 && scale <= 3);
1068
1069     //emms_c();
1070     //s->next_picture_ptr->quality;
1071     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1072     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1073     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1074     if (!b_lambda) // FIXME we should do this somewhere else
1075         b_lambda = p_lambda;
1076     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1077                FF_LAMBDA_SHIFT;
1078
1079     c->width        = s->width  >> scale;
1080     c->height       = s->height >> scale;
1081     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1082                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1083     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1084     c->mb_decision  = s->avctx->mb_decision;
1085     c->me_cmp       = s->avctx->me_cmp;
1086     c->mb_cmp       = s->avctx->mb_cmp;
1087     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1088     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1089     c->time_base    = s->avctx->time_base;
1090     c->max_b_frames = s->max_b_frames;
1091
1092     if (avcodec_open2(c, codec, NULL) < 0)
1093         return -1;
1094
1095     for (i = 0; i < s->max_b_frames + 2; i++) {
1096         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1097                                                 s->next_picture_ptr;
1098
1099         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1100             pre_input = *pre_input_ptr;
1101
1102             if (!pre_input.shared && i) {
1103                 pre_input.f.data[0] += INPLACE_OFFSET;
1104                 pre_input.f.data[1] += INPLACE_OFFSET;
1105                 pre_input.f.data[2] += INPLACE_OFFSET;
1106             }
1107
1108             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1109                                  pre_input.f.data[0], pre_input.f.linesize[0],
1110                                  c->width,      c->height);
1111             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1112                                  pre_input.f.data[1], pre_input.f.linesize[1],
1113                                  c->width >> 1, c->height >> 1);
1114             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1115                                  pre_input.f.data[2], pre_input.f.linesize[2],
1116                                  c->width >> 1, c->height >> 1);
1117         }
1118     }
1119
1120     for (j = 0; j < s->max_b_frames + 1; j++) {
1121         int64_t rd = 0;
1122
1123         if (!s->input_picture[j])
1124             break;
1125
1126         c->error[0] = c->error[1] = c->error[2] = 0;
1127
1128         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1129         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1130
1131         out_size = encode_frame(c, s->tmp_frames[0]);
1132
1133         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1134
1135         for (i = 0; i < s->max_b_frames + 1; i++) {
1136             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1137
1138             s->tmp_frames[i + 1]->pict_type = is_p ?
1139                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1140             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1141
1142             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1143
1144             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1145         }
1146
1147         /* get the delayed frames */
1148         while (out_size) {
1149             out_size = encode_frame(c, NULL);
1150             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1151         }
1152
1153         rd += c->error[0] + c->error[1] + c->error[2];
1154
1155         if (rd < best_rd) {
1156             best_rd = rd;
1157             best_b_count = j;
1158         }
1159     }
1160
1161     avcodec_close(c);
1162     av_freep(&c);
1163
1164     return best_b_count;
1165 }
1166
1167 static int select_input_picture(MpegEncContext *s)
1168 {
1169     int i, ret;
1170
1171     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1172         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1173     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1174
1175     /* set next picture type & ordering */
1176     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1177         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1178             s->next_picture_ptr == NULL || s->intra_only) {
1179             s->reordered_input_picture[0] = s->input_picture[0];
1180             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1181             s->reordered_input_picture[0]->f.coded_picture_number =
1182                 s->coded_picture_number++;
1183         } else {
1184             int b_frames;
1185
1186             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1187                 if (s->picture_in_gop_number < s->gop_size &&
1188                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1189                     // FIXME check that te gop check above is +-1 correct
1190                     av_frame_unref(&s->input_picture[0]->f);
1191
1192                     emms_c();
1193                     ff_vbv_update(s, 0);
1194
1195                     goto no_output_pic;
1196                 }
1197             }
1198
1199             if (s->flags & CODEC_FLAG_PASS2) {
1200                 for (i = 0; i < s->max_b_frames + 1; i++) {
1201                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1202
1203                     if (pict_num >= s->rc_context.num_entries)
1204                         break;
1205                     if (!s->input_picture[i]) {
1206                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1207                         break;
1208                     }
1209
1210                     s->input_picture[i]->f.pict_type =
1211                         s->rc_context.entry[pict_num].new_pict_type;
1212                 }
1213             }
1214
1215             if (s->avctx->b_frame_strategy == 0) {
1216                 b_frames = s->max_b_frames;
1217                 while (b_frames && !s->input_picture[b_frames])
1218                     b_frames--;
1219             } else if (s->avctx->b_frame_strategy == 1) {
1220                 for (i = 1; i < s->max_b_frames + 1; i++) {
1221                     if (s->input_picture[i] &&
1222                         s->input_picture[i]->b_frame_score == 0) {
1223                         s->input_picture[i]->b_frame_score =
1224                             get_intra_count(s,
1225                                             s->input_picture[i    ]->f.data[0],
1226                                             s->input_picture[i - 1]->f.data[0],
1227                                             s->linesize) + 1;
1228                     }
1229                 }
1230                 for (i = 0; i < s->max_b_frames + 1; i++) {
1231                     if (s->input_picture[i] == NULL ||
1232                         s->input_picture[i]->b_frame_score - 1 >
1233                             s->mb_num / s->avctx->b_sensitivity)
1234                         break;
1235                 }
1236
1237                 b_frames = FFMAX(0, i - 1);
1238
1239                 /* reset scores */
1240                 for (i = 0; i < b_frames + 1; i++) {
1241                     s->input_picture[i]->b_frame_score = 0;
1242                 }
1243             } else if (s->avctx->b_frame_strategy == 2) {
1244                 b_frames = estimate_best_b_count(s);
1245             } else {
1246                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1247                 b_frames = 0;
1248             }
1249
1250             emms_c();
1251
1252             for (i = b_frames - 1; i >= 0; i--) {
1253                 int type = s->input_picture[i]->f.pict_type;
1254                 if (type && type != AV_PICTURE_TYPE_B)
1255                     b_frames = i;
1256             }
1257             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1258                 b_frames == s->max_b_frames) {
1259                 av_log(s->avctx, AV_LOG_ERROR,
1260                        "warning, too many b frames in a row\n");
1261             }
1262
1263             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1264                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1265                     s->gop_size > s->picture_in_gop_number) {
1266                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1267                 } else {
1268                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1269                         b_frames = 0;
1270                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1271                 }
1272             }
1273
1274             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1275                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1276                 b_frames--;
1277
1278             s->reordered_input_picture[0] = s->input_picture[b_frames];
1279             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1280                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1281             s->reordered_input_picture[0]->f.coded_picture_number =
1282                 s->coded_picture_number++;
1283             for (i = 0; i < b_frames; i++) {
1284                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1285                 s->reordered_input_picture[i + 1]->f.pict_type =
1286                     AV_PICTURE_TYPE_B;
1287                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1288                     s->coded_picture_number++;
1289             }
1290         }
1291     }
1292 no_output_pic:
1293     if (s->reordered_input_picture[0]) {
1294         s->reordered_input_picture[0]->reference =
1295            s->reordered_input_picture[0]->f.pict_type !=
1296                AV_PICTURE_TYPE_B ? 3 : 0;
1297
1298         ff_mpeg_unref_picture(s, &s->new_picture);
1299         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1300             return ret;
1301
1302         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1303             // input is a shared pix, so we can't modifiy it -> alloc a new
1304             // one & ensure that the shared one is reuseable
1305
1306             Picture *pic;
1307             int i = ff_find_unused_picture(s, 0);
1308             if (i < 0)
1309                 return i;
1310             pic = &s->picture[i];
1311
1312             pic->reference = s->reordered_input_picture[0]->reference;
1313             if (ff_alloc_picture(s, pic, 0) < 0) {
1314                 return -1;
1315             }
1316
1317             ret = av_frame_copy_props(&pic->f, &s->reordered_input_picture[0]->f);
1318             if (ret < 0)
1319                 return ret;
1320
1321             /* mark us unused / free shared pic */
1322             av_frame_unref(&s->reordered_input_picture[0]->f);
1323             s->reordered_input_picture[0]->shared = 0;
1324
1325             s->current_picture_ptr = pic;
1326         } else {
1327             // input is not a shared pix -> reuse buffer for current_pix
1328             s->current_picture_ptr = s->reordered_input_picture[0];
1329             for (i = 0; i < 4; i++) {
1330                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1331             }
1332         }
1333         ff_mpeg_unref_picture(s, &s->current_picture);
1334         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1335                                        s->current_picture_ptr)) < 0)
1336             return ret;
1337
1338         s->picture_number = s->new_picture.f.display_picture_number;
1339     } else {
1340         ff_mpeg_unref_picture(s, &s->new_picture);
1341     }
1342     return 0;
1343 }
1344
1345 static void frame_end(MpegEncContext *s)
1346 {
1347     int i;
1348
1349     if (s->unrestricted_mv &&
1350         s->current_picture.reference &&
1351         !s->intra_only) {
1352         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1353         int hshift = desc->log2_chroma_w;
1354         int vshift = desc->log2_chroma_h;
1355         s->dsp.draw_edges(s->current_picture.f.data[0], s->linesize,
1356                           s->h_edge_pos, s->v_edge_pos,
1357                           EDGE_WIDTH, EDGE_WIDTH,
1358                           EDGE_TOP | EDGE_BOTTOM);
1359         s->dsp.draw_edges(s->current_picture.f.data[1], s->uvlinesize,
1360                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1361                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1362                           EDGE_TOP | EDGE_BOTTOM);
1363         s->dsp.draw_edges(s->current_picture.f.data[2], s->uvlinesize,
1364                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1365                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1366                           EDGE_TOP | EDGE_BOTTOM);
1367     }
1368
1369     emms_c();
1370
1371     s->last_pict_type                 = s->pict_type;
1372     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f.quality;
1373     if (s->pict_type!= AV_PICTURE_TYPE_B)
1374         s->last_non_b_pict_type = s->pict_type;
1375
1376     if (s->encoding) {
1377         /* release non-reference frames */
1378         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1379             if (!s->picture[i].reference)
1380                 ff_mpeg_unref_picture(s, &s->picture[i]);
1381         }
1382     }
1383
1384     s->avctx->coded_frame = &s->current_picture_ptr->f;
1385
1386 }
1387
1388 static void update_noise_reduction(MpegEncContext *s)
1389 {
1390     int intra, i;
1391
1392     for (intra = 0; intra < 2; intra++) {
1393         if (s->dct_count[intra] > (1 << 16)) {
1394             for (i = 0; i < 64; i++) {
1395                 s->dct_error_sum[intra][i] >>= 1;
1396             }
1397             s->dct_count[intra] >>= 1;
1398         }
1399
1400         for (i = 0; i < 64; i++) {
1401             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1402                                        s->dct_count[intra] +
1403                                        s->dct_error_sum[intra][i] / 2) /
1404                                       (s->dct_error_sum[intra][i] + 1);
1405         }
1406     }
1407 }
1408
1409 static int frame_start(MpegEncContext *s)
1410 {
1411     int ret;
1412
1413     /* mark & release old frames */
1414     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1415         s->last_picture_ptr != s->next_picture_ptr &&
1416         s->last_picture_ptr->f.buf[0]) {
1417         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1418     }
1419
1420     s->current_picture_ptr->f.pict_type = s->pict_type;
1421     s->current_picture_ptr->f.key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1422
1423     ff_mpeg_unref_picture(s, &s->current_picture);
1424     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1425                                    s->current_picture_ptr)) < 0)
1426         return ret;
1427
1428     if (s->pict_type != AV_PICTURE_TYPE_B) {
1429         s->last_picture_ptr = s->next_picture_ptr;
1430         if (!s->droppable)
1431             s->next_picture_ptr = s->current_picture_ptr;
1432     }
1433
1434     if (s->last_picture_ptr) {
1435         ff_mpeg_unref_picture(s, &s->last_picture);
1436         if (s->last_picture_ptr->f.buf[0] &&
1437             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1438                                        s->last_picture_ptr)) < 0)
1439             return ret;
1440     }
1441     if (s->next_picture_ptr) {
1442         ff_mpeg_unref_picture(s, &s->next_picture);
1443         if (s->next_picture_ptr->f.buf[0] &&
1444             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1445                                        s->next_picture_ptr)) < 0)
1446             return ret;
1447     }
1448
1449     if (s->picture_structure!= PICT_FRAME) {
1450         int i;
1451         for (i = 0; i < 4; i++) {
1452             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1453                 s->current_picture.f.data[i] +=
1454                     s->current_picture.f.linesize[i];
1455             }
1456             s->current_picture.f.linesize[i] *= 2;
1457             s->last_picture.f.linesize[i]    *= 2;
1458             s->next_picture.f.linesize[i]    *= 2;
1459         }
1460     }
1461
1462     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1463         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1464         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1465     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1466         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1467         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1468     } else {
1469         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1470         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1471     }
1472
1473     if (s->dct_error_sum) {
1474         assert(s->avctx->noise_reduction && s->encoding);
1475         update_noise_reduction(s);
1476     }
1477
1478     return 0;
1479 }
1480
1481 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1482                           const AVFrame *pic_arg, int *got_packet)
1483 {
1484     MpegEncContext *s = avctx->priv_data;
1485     int i, stuffing_count, ret;
1486     int context_count = s->slice_context_count;
1487
1488     s->picture_in_gop_number++;
1489
1490     if (load_input_picture(s, pic_arg) < 0)
1491         return -1;
1492
1493     if (select_input_picture(s) < 0) {
1494         return -1;
1495     }
1496
1497     /* output? */
1498     if (s->new_picture.f.data[0]) {
1499         if (!pkt->data &&
1500             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1501             return ret;
1502         if (s->mb_info) {
1503             s->mb_info_ptr = av_packet_new_side_data(pkt,
1504                                  AV_PKT_DATA_H263_MB_INFO,
1505                                  s->mb_width*s->mb_height*12);
1506             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1507         }
1508
1509         for (i = 0; i < context_count; i++) {
1510             int start_y = s->thread_context[i]->start_mb_y;
1511             int   end_y = s->thread_context[i]->  end_mb_y;
1512             int h       = s->mb_height;
1513             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1514             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1515
1516             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1517         }
1518
1519         s->pict_type = s->new_picture.f.pict_type;
1520         //emms_c();
1521         ret = frame_start(s);
1522         if (ret < 0)
1523             return ret;
1524 vbv_retry:
1525         if (encode_picture(s, s->picture_number) < 0)
1526             return -1;
1527
1528         avctx->header_bits = s->header_bits;
1529         avctx->mv_bits     = s->mv_bits;
1530         avctx->misc_bits   = s->misc_bits;
1531         avctx->i_tex_bits  = s->i_tex_bits;
1532         avctx->p_tex_bits  = s->p_tex_bits;
1533         avctx->i_count     = s->i_count;
1534         // FIXME f/b_count in avctx
1535         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1536         avctx->skip_count  = s->skip_count;
1537
1538         frame_end(s);
1539
1540         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1541             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1542
1543         if (avctx->rc_buffer_size) {
1544             RateControlContext *rcc = &s->rc_context;
1545             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1546
1547             if (put_bits_count(&s->pb) > max_size &&
1548                 s->lambda < s->avctx->lmax) {
1549                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1550                                        (s->qscale + 1) / s->qscale);
1551                 if (s->adaptive_quant) {
1552                     int i;
1553                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1554                         s->lambda_table[i] =
1555                             FFMAX(s->lambda_table[i] + 1,
1556                                   s->lambda_table[i] * (s->qscale + 1) /
1557                                   s->qscale);
1558                 }
1559                 s->mb_skipped = 0;        // done in frame_start()
1560                 // done in encode_picture() so we must undo it
1561                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1562                     if (s->flipflop_rounding          ||
1563                         s->codec_id == AV_CODEC_ID_H263P ||
1564                         s->codec_id == AV_CODEC_ID_MPEG4)
1565                         s->no_rounding ^= 1;
1566                 }
1567                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1568                     s->time_base       = s->last_time_base;
1569                     s->last_non_b_time = s->time - s->pp_time;
1570                 }
1571                 for (i = 0; i < context_count; i++) {
1572                     PutBitContext *pb = &s->thread_context[i]->pb;
1573                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1574                 }
1575                 goto vbv_retry;
1576             }
1577
1578             assert(s->avctx->rc_max_rate);
1579         }
1580
1581         if (s->flags & CODEC_FLAG_PASS1)
1582             ff_write_pass1_stats(s);
1583
1584         for (i = 0; i < 4; i++) {
1585             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1586             avctx->error[i] += s->current_picture_ptr->f.error[i];
1587         }
1588
1589         if (s->flags & CODEC_FLAG_PASS1)
1590             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1591                    avctx->i_tex_bits + avctx->p_tex_bits ==
1592                        put_bits_count(&s->pb));
1593         flush_put_bits(&s->pb);
1594         s->frame_bits  = put_bits_count(&s->pb);
1595
1596         stuffing_count = ff_vbv_update(s, s->frame_bits);
1597         if (stuffing_count) {
1598             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1599                     stuffing_count + 50) {
1600                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1601                 return -1;
1602             }
1603
1604             switch (s->codec_id) {
1605             case AV_CODEC_ID_MPEG1VIDEO:
1606             case AV_CODEC_ID_MPEG2VIDEO:
1607                 while (stuffing_count--) {
1608                     put_bits(&s->pb, 8, 0);
1609                 }
1610             break;
1611             case AV_CODEC_ID_MPEG4:
1612                 put_bits(&s->pb, 16, 0);
1613                 put_bits(&s->pb, 16, 0x1C3);
1614                 stuffing_count -= 4;
1615                 while (stuffing_count--) {
1616                     put_bits(&s->pb, 8, 0xFF);
1617                 }
1618             break;
1619             default:
1620                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1621             }
1622             flush_put_bits(&s->pb);
1623             s->frame_bits  = put_bits_count(&s->pb);
1624         }
1625
1626         /* update mpeg1/2 vbv_delay for CBR */
1627         if (s->avctx->rc_max_rate                          &&
1628             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1629             s->out_format == FMT_MPEG1                     &&
1630             90000LL * (avctx->rc_buffer_size - 1) <=
1631                 s->avctx->rc_max_rate * 0xFFFFLL) {
1632             int vbv_delay, min_delay;
1633             double inbits  = s->avctx->rc_max_rate *
1634                              av_q2d(s->avctx->time_base);
1635             int    minbits = s->frame_bits - 8 *
1636                              (s->vbv_delay_ptr - s->pb.buf - 1);
1637             double bits    = s->rc_context.buffer_index + minbits - inbits;
1638
1639             if (bits < 0)
1640                 av_log(s->avctx, AV_LOG_ERROR,
1641                        "Internal error, negative bits\n");
1642
1643             assert(s->repeat_first_field == 0);
1644
1645             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1646             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1647                         s->avctx->rc_max_rate;
1648
1649             vbv_delay = FFMAX(vbv_delay, min_delay);
1650
1651             assert(vbv_delay < 0xFFFF);
1652
1653             s->vbv_delay_ptr[0] &= 0xF8;
1654             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1655             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1656             s->vbv_delay_ptr[2] &= 0x07;
1657             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1658             avctx->vbv_delay     = vbv_delay * 300;
1659         }
1660         s->total_bits     += s->frame_bits;
1661         avctx->frame_bits  = s->frame_bits;
1662
1663         pkt->pts = s->current_picture.f.pts;
1664         if (!s->low_delay) {
1665             if (!s->current_picture.f.coded_picture_number)
1666                 pkt->dts = pkt->pts - s->dts_delta;
1667             else
1668                 pkt->dts = s->reordered_pts;
1669             s->reordered_pts = s->input_picture[0]->f.pts;
1670         } else
1671             pkt->dts = pkt->pts;
1672         if (s->current_picture.f.key_frame)
1673             pkt->flags |= AV_PKT_FLAG_KEY;
1674         if (s->mb_info)
1675             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1676     } else {
1677         s->frame_bits = 0;
1678     }
1679     assert((s->frame_bits & 7) == 0);
1680
1681     pkt->size = s->frame_bits / 8;
1682     *got_packet = !!pkt->size;
1683     return 0;
1684 }
1685
1686 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1687                                                 int n, int threshold)
1688 {
1689     static const char tab[64] = {
1690         3, 2, 2, 1, 1, 1, 1, 1,
1691         1, 1, 1, 1, 1, 1, 1, 1,
1692         1, 1, 1, 1, 1, 1, 1, 1,
1693         0, 0, 0, 0, 0, 0, 0, 0,
1694         0, 0, 0, 0, 0, 0, 0, 0,
1695         0, 0, 0, 0, 0, 0, 0, 0,
1696         0, 0, 0, 0, 0, 0, 0, 0,
1697         0, 0, 0, 0, 0, 0, 0, 0
1698     };
1699     int score = 0;
1700     int run = 0;
1701     int i;
1702     int16_t *block = s->block[n];
1703     const int last_index = s->block_last_index[n];
1704     int skip_dc;
1705
1706     if (threshold < 0) {
1707         skip_dc = 0;
1708         threshold = -threshold;
1709     } else
1710         skip_dc = 1;
1711
1712     /* Are all we could set to zero already zero? */
1713     if (last_index <= skip_dc - 1)
1714         return;
1715
1716     for (i = 0; i <= last_index; i++) {
1717         const int j = s->intra_scantable.permutated[i];
1718         const int level = FFABS(block[j]);
1719         if (level == 1) {
1720             if (skip_dc && i == 0)
1721                 continue;
1722             score += tab[run];
1723             run = 0;
1724         } else if (level > 1) {
1725             return;
1726         } else {
1727             run++;
1728         }
1729     }
1730     if (score >= threshold)
1731         return;
1732     for (i = skip_dc; i <= last_index; i++) {
1733         const int j = s->intra_scantable.permutated[i];
1734         block[j] = 0;
1735     }
1736     if (block[0])
1737         s->block_last_index[n] = 0;
1738     else
1739         s->block_last_index[n] = -1;
1740 }
1741
1742 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1743                                int last_index)
1744 {
1745     int i;
1746     const int maxlevel = s->max_qcoeff;
1747     const int minlevel = s->min_qcoeff;
1748     int overflow = 0;
1749
1750     if (s->mb_intra) {
1751         i = 1; // skip clipping of intra dc
1752     } else
1753         i = 0;
1754
1755     for (; i <= last_index; i++) {
1756         const int j = s->intra_scantable.permutated[i];
1757         int level = block[j];
1758
1759         if (level > maxlevel) {
1760             level = maxlevel;
1761             overflow++;
1762         } else if (level < minlevel) {
1763             level = minlevel;
1764             overflow++;
1765         }
1766
1767         block[j] = level;
1768     }
1769
1770     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1771         av_log(s->avctx, AV_LOG_INFO,
1772                "warning, clipping %d dct coefficients to %d..%d\n",
1773                overflow, minlevel, maxlevel);
1774 }
1775
1776 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1777 {
1778     int x, y;
1779     // FIXME optimize
1780     for (y = 0; y < 8; y++) {
1781         for (x = 0; x < 8; x++) {
1782             int x2, y2;
1783             int sum = 0;
1784             int sqr = 0;
1785             int count = 0;
1786
1787             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1788                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1789                     int v = ptr[x2 + y2 * stride];
1790                     sum += v;
1791                     sqr += v * v;
1792                     count++;
1793                 }
1794             }
1795             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1796         }
1797     }
1798 }
1799
1800 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1801                                                 int motion_x, int motion_y,
1802                                                 int mb_block_height,
1803                                                 int mb_block_count)
1804 {
1805     int16_t weight[8][64];
1806     int16_t orig[8][64];
1807     const int mb_x = s->mb_x;
1808     const int mb_y = s->mb_y;
1809     int i;
1810     int skip_dct[8];
1811     int dct_offset = s->linesize * 8; // default for progressive frames
1812     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1813     ptrdiff_t wrap_y, wrap_c;
1814
1815     for (i = 0; i < mb_block_count; i++)
1816         skip_dct[i] = s->skipdct;
1817
1818     if (s->adaptive_quant) {
1819         const int last_qp = s->qscale;
1820         const int mb_xy = mb_x + mb_y * s->mb_stride;
1821
1822         s->lambda = s->lambda_table[mb_xy];
1823         update_qscale(s);
1824
1825         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1826             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1827             s->dquant = s->qscale - last_qp;
1828
1829             if (s->out_format == FMT_H263) {
1830                 s->dquant = av_clip(s->dquant, -2, 2);
1831
1832                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1833                     if (!s->mb_intra) {
1834                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1835                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1836                                 s->dquant = 0;
1837                         }
1838                         if (s->mv_type == MV_TYPE_8X8)
1839                             s->dquant = 0;
1840                     }
1841                 }
1842             }
1843         }
1844         ff_set_qscale(s, last_qp + s->dquant);
1845     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1846         ff_set_qscale(s, s->qscale + s->dquant);
1847
1848     wrap_y = s->linesize;
1849     wrap_c = s->uvlinesize;
1850     ptr_y  = s->new_picture.f.data[0] +
1851              (mb_y * 16 * wrap_y)              + mb_x * 16;
1852     ptr_cb = s->new_picture.f.data[1] +
1853              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1854     ptr_cr = s->new_picture.f.data[2] +
1855              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1856
1857     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1858         uint8_t *ebuf = s->edge_emu_buffer + 32;
1859         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1860                                  wrap_y, wrap_y,
1861                                  16, 16, mb_x * 16, mb_y * 16,
1862                                  s->width, s->height);
1863         ptr_y = ebuf;
1864         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1865                                  wrap_c, wrap_c,
1866                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1867                                  s->width >> 1, s->height >> 1);
1868         ptr_cb = ebuf + 18 * wrap_y;
1869         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1870                                  wrap_c, wrap_c,
1871                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1872                                  s->width >> 1, s->height >> 1);
1873         ptr_cr = ebuf + 18 * wrap_y + 8;
1874     }
1875
1876     if (s->mb_intra) {
1877         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1878             int progressive_score, interlaced_score;
1879
1880             s->interlaced_dct = 0;
1881             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1882                                                     NULL, wrap_y, 8) +
1883                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1884                                                     NULL, wrap_y, 8) - 400;
1885
1886             if (progressive_score > 0) {
1887                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1888                                                        NULL, wrap_y * 2, 8) +
1889                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1890                                                        NULL, wrap_y * 2, 8);
1891                 if (progressive_score > interlaced_score) {
1892                     s->interlaced_dct = 1;
1893
1894                     dct_offset = wrap_y;
1895                     wrap_y <<= 1;
1896                     if (s->chroma_format == CHROMA_422)
1897                         wrap_c <<= 1;
1898                 }
1899             }
1900         }
1901
1902         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1903         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1904         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1905         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1906
1907         if (s->flags & CODEC_FLAG_GRAY) {
1908             skip_dct[4] = 1;
1909             skip_dct[5] = 1;
1910         } else {
1911             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1912             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1913             if (!s->chroma_y_shift) { /* 422 */
1914                 s->dsp.get_pixels(s->block[6],
1915                                   ptr_cb + (dct_offset >> 1), wrap_c);
1916                 s->dsp.get_pixels(s->block[7],
1917                                   ptr_cr + (dct_offset >> 1), wrap_c);
1918             }
1919         }
1920     } else {
1921         op_pixels_func (*op_pix)[4];
1922         qpel_mc_func (*op_qpix)[16];
1923         uint8_t *dest_y, *dest_cb, *dest_cr;
1924
1925         dest_y  = s->dest[0];
1926         dest_cb = s->dest[1];
1927         dest_cr = s->dest[2];
1928
1929         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1930             op_pix  = s->hdsp.put_pixels_tab;
1931             op_qpix = s->dsp.put_qpel_pixels_tab;
1932         } else {
1933             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1934             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1935         }
1936
1937         if (s->mv_dir & MV_DIR_FORWARD) {
1938             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1939                           s->last_picture.f.data,
1940                           op_pix, op_qpix);
1941             op_pix  = s->hdsp.avg_pixels_tab;
1942             op_qpix = s->dsp.avg_qpel_pixels_tab;
1943         }
1944         if (s->mv_dir & MV_DIR_BACKWARD) {
1945             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1946                           s->next_picture.f.data,
1947                           op_pix, op_qpix);
1948         }
1949
1950         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1951             int progressive_score, interlaced_score;
1952
1953             s->interlaced_dct = 0;
1954             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1955                                                     ptr_y,              wrap_y,
1956                                                     8) +
1957                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1958                                                     ptr_y + wrap_y * 8, wrap_y,
1959                                                     8) - 400;
1960
1961             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1962                 progressive_score -= 400;
1963
1964             if (progressive_score > 0) {
1965                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1966                                                        ptr_y,
1967                                                        wrap_y * 2, 8) +
1968                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1969                                                        ptr_y + wrap_y,
1970                                                        wrap_y * 2, 8);
1971
1972                 if (progressive_score > interlaced_score) {
1973                     s->interlaced_dct = 1;
1974
1975                     dct_offset = wrap_y;
1976                     wrap_y <<= 1;
1977                     if (s->chroma_format == CHROMA_422)
1978                         wrap_c <<= 1;
1979                 }
1980             }
1981         }
1982
1983         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1984         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1985         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1986                            dest_y + dct_offset, wrap_y);
1987         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1988                            dest_y + dct_offset + 8, wrap_y);
1989
1990         if (s->flags & CODEC_FLAG_GRAY) {
1991             skip_dct[4] = 1;
1992             skip_dct[5] = 1;
1993         } else {
1994             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1995             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1996             if (!s->chroma_y_shift) { /* 422 */
1997                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1998                                    dest_cb + (dct_offset >> 1), wrap_c);
1999                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2000                                    dest_cr + (dct_offset >> 1), wrap_c);
2001             }
2002         }
2003         /* pre quantization */
2004         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2005                 2 * s->qscale * s->qscale) {
2006             // FIXME optimize
2007             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2008                               wrap_y, 8) < 20 * s->qscale)
2009                 skip_dct[0] = 1;
2010             if (s->dsp.sad[1](NULL, ptr_y + 8,
2011                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2012                 skip_dct[1] = 1;
2013             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2014                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2015                 skip_dct[2] = 1;
2016             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2017                               dest_y + dct_offset + 8,
2018                               wrap_y, 8) < 20 * s->qscale)
2019                 skip_dct[3] = 1;
2020             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2021                               wrap_c, 8) < 20 * s->qscale)
2022                 skip_dct[4] = 1;
2023             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2024                               wrap_c, 8) < 20 * s->qscale)
2025                 skip_dct[5] = 1;
2026             if (!s->chroma_y_shift) { /* 422 */
2027                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2028                                   dest_cb + (dct_offset >> 1),
2029                                   wrap_c, 8) < 20 * s->qscale)
2030                     skip_dct[6] = 1;
2031                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2032                                   dest_cr + (dct_offset >> 1),
2033                                   wrap_c, 8) < 20 * s->qscale)
2034                     skip_dct[7] = 1;
2035             }
2036         }
2037     }
2038
2039     if (s->quantizer_noise_shaping) {
2040         if (!skip_dct[0])
2041             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2042         if (!skip_dct[1])
2043             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2044         if (!skip_dct[2])
2045             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2046         if (!skip_dct[3])
2047             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2048         if (!skip_dct[4])
2049             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2050         if (!skip_dct[5])
2051             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2052         if (!s->chroma_y_shift) { /* 422 */
2053             if (!skip_dct[6])
2054                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2055                                   wrap_c);
2056             if (!skip_dct[7])
2057                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2058                                   wrap_c);
2059         }
2060         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2061     }
2062
2063     /* DCT & quantize */
2064     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2065     {
2066         for (i = 0; i < mb_block_count; i++) {
2067             if (!skip_dct[i]) {
2068                 int overflow;
2069                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2070                 // FIXME we could decide to change to quantizer instead of
2071                 // clipping
2072                 // JS: I don't think that would be a good idea it could lower
2073                 //     quality instead of improve it. Just INTRADC clipping
2074                 //     deserves changes in quantizer
2075                 if (overflow)
2076                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2077             } else
2078                 s->block_last_index[i] = -1;
2079         }
2080         if (s->quantizer_noise_shaping) {
2081             for (i = 0; i < mb_block_count; i++) {
2082                 if (!skip_dct[i]) {
2083                     s->block_last_index[i] =
2084                         dct_quantize_refine(s, s->block[i], weight[i],
2085                                             orig[i], i, s->qscale);
2086                 }
2087             }
2088         }
2089
2090         if (s->luma_elim_threshold && !s->mb_intra)
2091             for (i = 0; i < 4; i++)
2092                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2093         if (s->chroma_elim_threshold && !s->mb_intra)
2094             for (i = 4; i < mb_block_count; i++)
2095                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2096
2097         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2098             for (i = 0; i < mb_block_count; i++) {
2099                 if (s->block_last_index[i] == -1)
2100                     s->coded_score[i] = INT_MAX / 256;
2101             }
2102         }
2103     }
2104
2105     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2106         s->block_last_index[4] =
2107         s->block_last_index[5] = 0;
2108         s->block[4][0] =
2109         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2110     }
2111
2112     // non c quantize code returns incorrect block_last_index FIXME
2113     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2114         for (i = 0; i < mb_block_count; i++) {
2115             int j;
2116             if (s->block_last_index[i] > 0) {
2117                 for (j = 63; j > 0; j--) {
2118                     if (s->block[i][s->intra_scantable.permutated[j]])
2119                         break;
2120                 }
2121                 s->block_last_index[i] = j;
2122             }
2123         }
2124     }
2125
2126     /* huffman encode */
2127     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2128     case AV_CODEC_ID_MPEG1VIDEO:
2129     case AV_CODEC_ID_MPEG2VIDEO:
2130         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2131             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2132         break;
2133     case AV_CODEC_ID_MPEG4:
2134         if (CONFIG_MPEG4_ENCODER)
2135             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2136         break;
2137     case AV_CODEC_ID_MSMPEG4V2:
2138     case AV_CODEC_ID_MSMPEG4V3:
2139     case AV_CODEC_ID_WMV1:
2140         if (CONFIG_MSMPEG4_ENCODER)
2141             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2142         break;
2143     case AV_CODEC_ID_WMV2:
2144         if (CONFIG_WMV2_ENCODER)
2145             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2146         break;
2147     case AV_CODEC_ID_H261:
2148         if (CONFIG_H261_ENCODER)
2149             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2150         break;
2151     case AV_CODEC_ID_H263:
2152     case AV_CODEC_ID_H263P:
2153     case AV_CODEC_ID_FLV1:
2154     case AV_CODEC_ID_RV10:
2155     case AV_CODEC_ID_RV20:
2156         if (CONFIG_H263_ENCODER)
2157             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2158         break;
2159     case AV_CODEC_ID_MJPEG:
2160         if (CONFIG_MJPEG_ENCODER)
2161             ff_mjpeg_encode_mb(s, s->block);
2162         break;
2163     default:
2164         assert(0);
2165     }
2166 }
2167
2168 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2169 {
2170     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2171     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2172 }
2173
2174 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2175     int i;
2176
2177     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2178
2179     /* mpeg1 */
2180     d->mb_skip_run= s->mb_skip_run;
2181     for(i=0; i<3; i++)
2182         d->last_dc[i] = s->last_dc[i];
2183
2184     /* statistics */
2185     d->mv_bits= s->mv_bits;
2186     d->i_tex_bits= s->i_tex_bits;
2187     d->p_tex_bits= s->p_tex_bits;
2188     d->i_count= s->i_count;
2189     d->f_count= s->f_count;
2190     d->b_count= s->b_count;
2191     d->skip_count= s->skip_count;
2192     d->misc_bits= s->misc_bits;
2193     d->last_bits= 0;
2194
2195     d->mb_skipped= 0;
2196     d->qscale= s->qscale;
2197     d->dquant= s->dquant;
2198
2199     d->esc3_level_length= s->esc3_level_length;
2200 }
2201
2202 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2203     int i;
2204
2205     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2206     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2207
2208     /* mpeg1 */
2209     d->mb_skip_run= s->mb_skip_run;
2210     for(i=0; i<3; i++)
2211         d->last_dc[i] = s->last_dc[i];
2212
2213     /* statistics */
2214     d->mv_bits= s->mv_bits;
2215     d->i_tex_bits= s->i_tex_bits;
2216     d->p_tex_bits= s->p_tex_bits;
2217     d->i_count= s->i_count;
2218     d->f_count= s->f_count;
2219     d->b_count= s->b_count;
2220     d->skip_count= s->skip_count;
2221     d->misc_bits= s->misc_bits;
2222
2223     d->mb_intra= s->mb_intra;
2224     d->mb_skipped= s->mb_skipped;
2225     d->mv_type= s->mv_type;
2226     d->mv_dir= s->mv_dir;
2227     d->pb= s->pb;
2228     if(s->data_partitioning){
2229         d->pb2= s->pb2;
2230         d->tex_pb= s->tex_pb;
2231     }
2232     d->block= s->block;
2233     for(i=0; i<8; i++)
2234         d->block_last_index[i]= s->block_last_index[i];
2235     d->interlaced_dct= s->interlaced_dct;
2236     d->qscale= s->qscale;
2237
2238     d->esc3_level_length= s->esc3_level_length;
2239 }
2240
2241 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2242                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2243                            int *dmin, int *next_block, int motion_x, int motion_y)
2244 {
2245     int score;
2246     uint8_t *dest_backup[3];
2247
2248     copy_context_before_encode(s, backup, type);
2249
2250     s->block= s->blocks[*next_block];
2251     s->pb= pb[*next_block];
2252     if(s->data_partitioning){
2253         s->pb2   = pb2   [*next_block];
2254         s->tex_pb= tex_pb[*next_block];
2255     }
2256
2257     if(*next_block){
2258         memcpy(dest_backup, s->dest, sizeof(s->dest));
2259         s->dest[0] = s->rd_scratchpad;
2260         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2261         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2262         assert(s->linesize >= 32); //FIXME
2263     }
2264
2265     encode_mb(s, motion_x, motion_y);
2266
2267     score= put_bits_count(&s->pb);
2268     if(s->data_partitioning){
2269         score+= put_bits_count(&s->pb2);
2270         score+= put_bits_count(&s->tex_pb);
2271     }
2272
2273     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2274         ff_MPV_decode_mb(s, s->block);
2275
2276         score *= s->lambda2;
2277         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2278     }
2279
2280     if(*next_block){
2281         memcpy(s->dest, dest_backup, sizeof(s->dest));
2282     }
2283
2284     if(score<*dmin){
2285         *dmin= score;
2286         *next_block^=1;
2287
2288         copy_context_after_encode(best, s, type);
2289     }
2290 }
2291
2292 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2293     uint32_t *sq = ff_squareTbl + 256;
2294     int acc=0;
2295     int x,y;
2296
2297     if(w==16 && h==16)
2298         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2299     else if(w==8 && h==8)
2300         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2301
2302     for(y=0; y<h; y++){
2303         for(x=0; x<w; x++){
2304             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2305         }
2306     }
2307
2308     assert(acc>=0);
2309
2310     return acc;
2311 }
2312
2313 static int sse_mb(MpegEncContext *s){
2314     int w= 16;
2315     int h= 16;
2316
2317     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2318     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2319
2320     if(w==16 && h==16)
2321       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2322         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2323                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2324                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2325       }else{
2326         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2327                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2328                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2329       }
2330     else
2331         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2332                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2333                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2334 }
2335
2336 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2337     MpegEncContext *s= *(void**)arg;
2338
2339
2340     s->me.pre_pass=1;
2341     s->me.dia_size= s->avctx->pre_dia_size;
2342     s->first_slice_line=1;
2343     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2344         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2345             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2346         }
2347         s->first_slice_line=0;
2348     }
2349
2350     s->me.pre_pass=0;
2351
2352     return 0;
2353 }
2354
2355 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2356     MpegEncContext *s= *(void**)arg;
2357
2358     ff_check_alignment();
2359
2360     s->me.dia_size= s->avctx->dia_size;
2361     s->first_slice_line=1;
2362     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2363         s->mb_x=0; //for block init below
2364         ff_init_block_index(s);
2365         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2366             s->block_index[0]+=2;
2367             s->block_index[1]+=2;
2368             s->block_index[2]+=2;
2369             s->block_index[3]+=2;
2370
2371             /* compute motion vector & mb_type and store in context */
2372             if(s->pict_type==AV_PICTURE_TYPE_B)
2373                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2374             else
2375                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2376         }
2377         s->first_slice_line=0;
2378     }
2379     return 0;
2380 }
2381
2382 static int mb_var_thread(AVCodecContext *c, void *arg){
2383     MpegEncContext *s= *(void**)arg;
2384     int mb_x, mb_y;
2385
2386     ff_check_alignment();
2387
2388     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2389         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2390             int xx = mb_x * 16;
2391             int yy = mb_y * 16;
2392             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2393             int varc;
2394             int sum = s->dsp.pix_sum(pix, s->linesize);
2395
2396             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2397
2398             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2399             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2400             s->me.mb_var_sum_temp    += varc;
2401         }
2402     }
2403     return 0;
2404 }
2405
2406 static void write_slice_end(MpegEncContext *s){
2407     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2408         if(s->partitioned_frame){
2409             ff_mpeg4_merge_partitions(s);
2410         }
2411
2412         ff_mpeg4_stuffing(&s->pb);
2413     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2414         ff_mjpeg_encode_stuffing(&s->pb);
2415     }
2416
2417     avpriv_align_put_bits(&s->pb);
2418     flush_put_bits(&s->pb);
2419
2420     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2421         s->misc_bits+= get_bits_diff(s);
2422 }
2423
2424 static void write_mb_info(MpegEncContext *s)
2425 {
2426     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2427     int offset = put_bits_count(&s->pb);
2428     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2429     int gobn = s->mb_y / s->gob_index;
2430     int pred_x, pred_y;
2431     if (CONFIG_H263_ENCODER)
2432         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2433     bytestream_put_le32(&ptr, offset);
2434     bytestream_put_byte(&ptr, s->qscale);
2435     bytestream_put_byte(&ptr, gobn);
2436     bytestream_put_le16(&ptr, mba);
2437     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2438     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2439     /* 4MV not implemented */
2440     bytestream_put_byte(&ptr, 0); /* hmv2 */
2441     bytestream_put_byte(&ptr, 0); /* vmv2 */
2442 }
2443
2444 static void update_mb_info(MpegEncContext *s, int startcode)
2445 {
2446     if (!s->mb_info)
2447         return;
2448     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2449         s->mb_info_size += 12;
2450         s->prev_mb_info = s->last_mb_info;
2451     }
2452     if (startcode) {
2453         s->prev_mb_info = put_bits_count(&s->pb)/8;
2454         /* This might have incremented mb_info_size above, and we return without
2455          * actually writing any info into that slot yet. But in that case,
2456          * this will be called again at the start of the after writing the
2457          * start code, actually writing the mb info. */
2458         return;
2459     }
2460
2461     s->last_mb_info = put_bits_count(&s->pb)/8;
2462     if (!s->mb_info_size)
2463         s->mb_info_size += 12;
2464     write_mb_info(s);
2465 }
2466
2467 static int encode_thread(AVCodecContext *c, void *arg){
2468     MpegEncContext *s= *(void**)arg;
2469     int mb_x, mb_y, pdif = 0;
2470     int chr_h= 16>>s->chroma_y_shift;
2471     int i, j;
2472     MpegEncContext best_s, backup_s;
2473     uint8_t bit_buf[2][MAX_MB_BYTES];
2474     uint8_t bit_buf2[2][MAX_MB_BYTES];
2475     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2476     PutBitContext pb[2], pb2[2], tex_pb[2];
2477
2478     ff_check_alignment();
2479
2480     for(i=0; i<2; i++){
2481         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2482         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2483         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2484     }
2485
2486     s->last_bits= put_bits_count(&s->pb);
2487     s->mv_bits=0;
2488     s->misc_bits=0;
2489     s->i_tex_bits=0;
2490     s->p_tex_bits=0;
2491     s->i_count=0;
2492     s->f_count=0;
2493     s->b_count=0;
2494     s->skip_count=0;
2495
2496     for(i=0; i<3; i++){
2497         /* init last dc values */
2498         /* note: quant matrix value (8) is implied here */
2499         s->last_dc[i] = 128 << s->intra_dc_precision;
2500
2501         s->current_picture.f.error[i] = 0;
2502     }
2503     s->mb_skip_run = 0;
2504     memset(s->last_mv, 0, sizeof(s->last_mv));
2505
2506     s->last_mv_dir = 0;
2507
2508     switch(s->codec_id){
2509     case AV_CODEC_ID_H263:
2510     case AV_CODEC_ID_H263P:
2511     case AV_CODEC_ID_FLV1:
2512         if (CONFIG_H263_ENCODER)
2513             s->gob_index = ff_h263_get_gob_height(s);
2514         break;
2515     case AV_CODEC_ID_MPEG4:
2516         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2517             ff_mpeg4_init_partitions(s);
2518         break;
2519     }
2520
2521     s->resync_mb_x=0;
2522     s->resync_mb_y=0;
2523     s->first_slice_line = 1;
2524     s->ptr_lastgob = s->pb.buf;
2525     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2526         s->mb_x=0;
2527         s->mb_y= mb_y;
2528
2529         ff_set_qscale(s, s->qscale);
2530         ff_init_block_index(s);
2531
2532         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2533             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2534             int mb_type= s->mb_type[xy];
2535 //            int d;
2536             int dmin= INT_MAX;
2537             int dir;
2538
2539             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2540                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2541                 return -1;
2542             }
2543             if(s->data_partitioning){
2544                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2545                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2546                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2547                     return -1;
2548                 }
2549             }
2550
2551             s->mb_x = mb_x;
2552             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2553             ff_update_block_index(s);
2554
2555             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2556                 ff_h261_reorder_mb_index(s);
2557                 xy= s->mb_y*s->mb_stride + s->mb_x;
2558                 mb_type= s->mb_type[xy];
2559             }
2560
2561             /* write gob / video packet header  */
2562             if(s->rtp_mode){
2563                 int current_packet_size, is_gob_start;
2564
2565                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2566
2567                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2568
2569                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2570
2571                 switch(s->codec_id){
2572                 case AV_CODEC_ID_H263:
2573                 case AV_CODEC_ID_H263P:
2574                     if(!s->h263_slice_structured)
2575                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2576                     break;
2577                 case AV_CODEC_ID_MPEG2VIDEO:
2578                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2579                 case AV_CODEC_ID_MPEG1VIDEO:
2580                     if(s->mb_skip_run) is_gob_start=0;
2581                     break;
2582                 }
2583
2584                 if(is_gob_start){
2585                     if(s->start_mb_y != mb_y || mb_x!=0){
2586                         write_slice_end(s);
2587
2588                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2589                             ff_mpeg4_init_partitions(s);
2590                         }
2591                     }
2592
2593                     assert((put_bits_count(&s->pb)&7) == 0);
2594                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2595
2596                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2597                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2598                         int d = 100 / s->error_rate;
2599                         if(r % d == 0){
2600                             current_packet_size=0;
2601                             s->pb.buf_ptr= s->ptr_lastgob;
2602                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2603                         }
2604                     }
2605
2606                     if (s->avctx->rtp_callback){
2607                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2608                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2609                     }
2610                     update_mb_info(s, 1);
2611
2612                     switch(s->codec_id){
2613                     case AV_CODEC_ID_MPEG4:
2614                         if (CONFIG_MPEG4_ENCODER) {
2615                             ff_mpeg4_encode_video_packet_header(s);
2616                             ff_mpeg4_clean_buffers(s);
2617                         }
2618                     break;
2619                     case AV_CODEC_ID_MPEG1VIDEO:
2620                     case AV_CODEC_ID_MPEG2VIDEO:
2621                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2622                             ff_mpeg1_encode_slice_header(s);
2623                             ff_mpeg1_clean_buffers(s);
2624                         }
2625                     break;
2626                     case AV_CODEC_ID_H263:
2627                     case AV_CODEC_ID_H263P:
2628                         if (CONFIG_H263_ENCODER)
2629                             ff_h263_encode_gob_header(s, mb_y);
2630                     break;
2631                     }
2632
2633                     if(s->flags&CODEC_FLAG_PASS1){
2634                         int bits= put_bits_count(&s->pb);
2635                         s->misc_bits+= bits - s->last_bits;
2636                         s->last_bits= bits;
2637                     }
2638
2639                     s->ptr_lastgob += current_packet_size;
2640                     s->first_slice_line=1;
2641                     s->resync_mb_x=mb_x;
2642                     s->resync_mb_y=mb_y;
2643                 }
2644             }
2645
2646             if(  (s->resync_mb_x   == s->mb_x)
2647                && s->resync_mb_y+1 == s->mb_y){
2648                 s->first_slice_line=0;
2649             }
2650
2651             s->mb_skipped=0;
2652             s->dquant=0; //only for QP_RD
2653
2654             update_mb_info(s, 0);
2655
2656             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2657                 int next_block=0;
2658                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2659
2660                 copy_context_before_encode(&backup_s, s, -1);
2661                 backup_s.pb= s->pb;
2662                 best_s.data_partitioning= s->data_partitioning;
2663                 best_s.partitioned_frame= s->partitioned_frame;
2664                 if(s->data_partitioning){
2665                     backup_s.pb2= s->pb2;
2666                     backup_s.tex_pb= s->tex_pb;
2667                 }
2668
2669                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2670                     s->mv_dir = MV_DIR_FORWARD;
2671                     s->mv_type = MV_TYPE_16X16;
2672                     s->mb_intra= 0;
2673                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2674                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2675                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2676                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2677                 }
2678                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2679                     s->mv_dir = MV_DIR_FORWARD;
2680                     s->mv_type = MV_TYPE_FIELD;
2681                     s->mb_intra= 0;
2682                     for(i=0; i<2; i++){
2683                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2684                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2685                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2686                     }
2687                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2688                                  &dmin, &next_block, 0, 0);
2689                 }
2690                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2691                     s->mv_dir = MV_DIR_FORWARD;
2692                     s->mv_type = MV_TYPE_16X16;
2693                     s->mb_intra= 0;
2694                     s->mv[0][0][0] = 0;
2695                     s->mv[0][0][1] = 0;
2696                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2697                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2698                 }
2699                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2700                     s->mv_dir = MV_DIR_FORWARD;
2701                     s->mv_type = MV_TYPE_8X8;
2702                     s->mb_intra= 0;
2703                     for(i=0; i<4; i++){
2704                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2705                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2706                     }
2707                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2708                                  &dmin, &next_block, 0, 0);
2709                 }
2710                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2711                     s->mv_dir = MV_DIR_FORWARD;
2712                     s->mv_type = MV_TYPE_16X16;
2713                     s->mb_intra= 0;
2714                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2715                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2716                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2717                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2718                 }
2719                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2720                     s->mv_dir = MV_DIR_BACKWARD;
2721                     s->mv_type = MV_TYPE_16X16;
2722                     s->mb_intra= 0;
2723                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2724                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2725                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2726                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2727                 }
2728                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2729                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2730                     s->mv_type = MV_TYPE_16X16;
2731                     s->mb_intra= 0;
2732                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2733                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2734                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2735                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2736                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2737                                  &dmin, &next_block, 0, 0);
2738                 }
2739                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2740                     s->mv_dir = MV_DIR_FORWARD;
2741                     s->mv_type = MV_TYPE_FIELD;
2742                     s->mb_intra= 0;
2743                     for(i=0; i<2; i++){
2744                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2745                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2746                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2747                     }
2748                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2749                                  &dmin, &next_block, 0, 0);
2750                 }
2751                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2752                     s->mv_dir = MV_DIR_BACKWARD;
2753                     s->mv_type = MV_TYPE_FIELD;
2754                     s->mb_intra= 0;
2755                     for(i=0; i<2; i++){
2756                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2757                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2758                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2759                     }
2760                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2761                                  &dmin, &next_block, 0, 0);
2762                 }
2763                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2764                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2765                     s->mv_type = MV_TYPE_FIELD;
2766                     s->mb_intra= 0;
2767                     for(dir=0; dir<2; dir++){
2768                         for(i=0; i<2; i++){
2769                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2770                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2771                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2772                         }
2773                     }
2774                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2775                                  &dmin, &next_block, 0, 0);
2776                 }
2777                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2778                     s->mv_dir = 0;
2779                     s->mv_type = MV_TYPE_16X16;
2780                     s->mb_intra= 1;
2781                     s->mv[0][0][0] = 0;
2782                     s->mv[0][0][1] = 0;
2783                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2784                                  &dmin, &next_block, 0, 0);
2785                     if(s->h263_pred || s->h263_aic){
2786                         if(best_s.mb_intra)
2787                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2788                         else
2789                             ff_clean_intra_table_entries(s); //old mode?
2790                     }
2791                 }
2792
2793                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2794                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2795                         const int last_qp= backup_s.qscale;
2796                         int qpi, qp, dc[6];
2797                         int16_t ac[6][16];
2798                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2799                         static const int dquant_tab[4]={-1,1,-2,2};
2800
2801                         assert(backup_s.dquant == 0);
2802
2803                         //FIXME intra
2804                         s->mv_dir= best_s.mv_dir;
2805                         s->mv_type = MV_TYPE_16X16;
2806                         s->mb_intra= best_s.mb_intra;
2807                         s->mv[0][0][0] = best_s.mv[0][0][0];
2808                         s->mv[0][0][1] = best_s.mv[0][0][1];
2809                         s->mv[1][0][0] = best_s.mv[1][0][0];
2810                         s->mv[1][0][1] = best_s.mv[1][0][1];
2811
2812                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2813                         for(; qpi<4; qpi++){
2814                             int dquant= dquant_tab[qpi];
2815                             qp= last_qp + dquant;
2816                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2817                                 continue;
2818                             backup_s.dquant= dquant;
2819                             if(s->mb_intra && s->dc_val[0]){
2820                                 for(i=0; i<6; i++){
2821                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2822                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2823                                 }
2824                             }
2825
2826                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2827                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2828                             if(best_s.qscale != qp){
2829                                 if(s->mb_intra && s->dc_val[0]){
2830                                     for(i=0; i<6; i++){
2831                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2832                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2833                                     }
2834                                 }
2835                             }
2836                         }
2837                     }
2838                 }
2839                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2840                     int mx= s->b_direct_mv_table[xy][0];
2841                     int my= s->b_direct_mv_table[xy][1];
2842
2843                     backup_s.dquant = 0;
2844                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2845                     s->mb_intra= 0;
2846                     ff_mpeg4_set_direct_mv(s, mx, my);
2847                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2848                                  &dmin, &next_block, mx, my);
2849                 }
2850                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2851                     backup_s.dquant = 0;
2852                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2853                     s->mb_intra= 0;
2854                     ff_mpeg4_set_direct_mv(s, 0, 0);
2855                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2856                                  &dmin, &next_block, 0, 0);
2857                 }
2858                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2859                     int coded=0;
2860                     for(i=0; i<6; i++)
2861                         coded |= s->block_last_index[i];
2862                     if(coded){
2863                         int mx,my;
2864                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2865                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2866                             mx=my=0; //FIXME find the one we actually used
2867                             ff_mpeg4_set_direct_mv(s, mx, my);
2868                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2869                             mx= s->mv[1][0][0];
2870                             my= s->mv[1][0][1];
2871                         }else{
2872                             mx= s->mv[0][0][0];
2873                             my= s->mv[0][0][1];
2874                         }
2875
2876                         s->mv_dir= best_s.mv_dir;
2877                         s->mv_type = best_s.mv_type;
2878                         s->mb_intra= 0;
2879 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2880                         s->mv[0][0][1] = best_s.mv[0][0][1];
2881                         s->mv[1][0][0] = best_s.mv[1][0][0];
2882                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2883                         backup_s.dquant= 0;
2884                         s->skipdct=1;
2885                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2886                                         &dmin, &next_block, mx, my);
2887                         s->skipdct=0;
2888                     }
2889                 }
2890
2891                 s->current_picture.qscale_table[xy] = best_s.qscale;
2892
2893                 copy_context_after_encode(s, &best_s, -1);
2894
2895                 pb_bits_count= put_bits_count(&s->pb);
2896                 flush_put_bits(&s->pb);
2897                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2898                 s->pb= backup_s.pb;
2899
2900                 if(s->data_partitioning){
2901                     pb2_bits_count= put_bits_count(&s->pb2);
2902                     flush_put_bits(&s->pb2);
2903                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2904                     s->pb2= backup_s.pb2;
2905
2906                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2907                     flush_put_bits(&s->tex_pb);
2908                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2909                     s->tex_pb= backup_s.tex_pb;
2910                 }
2911                 s->last_bits= put_bits_count(&s->pb);
2912
2913                 if (CONFIG_H263_ENCODER &&
2914                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2915                     ff_h263_update_motion_val(s);
2916
2917                 if(next_block==0){ //FIXME 16 vs linesize16
2918                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2919                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2920                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2921                 }
2922
2923                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2924                     ff_MPV_decode_mb(s, s->block);
2925             } else {
2926                 int motion_x = 0, motion_y = 0;
2927                 s->mv_type=MV_TYPE_16X16;
2928                 // only one MB-Type possible
2929
2930                 switch(mb_type){
2931                 case CANDIDATE_MB_TYPE_INTRA:
2932                     s->mv_dir = 0;
2933                     s->mb_intra= 1;
2934                     motion_x= s->mv[0][0][0] = 0;
2935                     motion_y= s->mv[0][0][1] = 0;
2936                     break;
2937                 case CANDIDATE_MB_TYPE_INTER:
2938                     s->mv_dir = MV_DIR_FORWARD;
2939                     s->mb_intra= 0;
2940                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2941                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2942                     break;
2943                 case CANDIDATE_MB_TYPE_INTER_I:
2944                     s->mv_dir = MV_DIR_FORWARD;
2945                     s->mv_type = MV_TYPE_FIELD;
2946                     s->mb_intra= 0;
2947                     for(i=0; i<2; i++){
2948                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2949                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2950                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2951                     }
2952                     break;
2953                 case CANDIDATE_MB_TYPE_INTER4V:
2954                     s->mv_dir = MV_DIR_FORWARD;
2955                     s->mv_type = MV_TYPE_8X8;
2956                     s->mb_intra= 0;
2957                     for(i=0; i<4; i++){
2958                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2959                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2960                     }
2961                     break;
2962                 case CANDIDATE_MB_TYPE_DIRECT:
2963                     if (CONFIG_MPEG4_ENCODER) {
2964                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2965                         s->mb_intra= 0;
2966                         motion_x=s->b_direct_mv_table[xy][0];
2967                         motion_y=s->b_direct_mv_table[xy][1];
2968                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2969                     }
2970                     break;
2971                 case CANDIDATE_MB_TYPE_DIRECT0:
2972                     if (CONFIG_MPEG4_ENCODER) {
2973                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2974                         s->mb_intra= 0;
2975                         ff_mpeg4_set_direct_mv(s, 0, 0);
2976                     }
2977                     break;
2978                 case CANDIDATE_MB_TYPE_BIDIR:
2979                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2980                     s->mb_intra= 0;
2981                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2982                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2983                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2984                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2985                     break;
2986                 case CANDIDATE_MB_TYPE_BACKWARD:
2987                     s->mv_dir = MV_DIR_BACKWARD;
2988                     s->mb_intra= 0;
2989                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2990                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2991                     break;
2992                 case CANDIDATE_MB_TYPE_FORWARD:
2993                     s->mv_dir = MV_DIR_FORWARD;
2994                     s->mb_intra= 0;
2995                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2996                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2997                     break;
2998                 case CANDIDATE_MB_TYPE_FORWARD_I:
2999                     s->mv_dir = MV_DIR_FORWARD;
3000                     s->mv_type = MV_TYPE_FIELD;
3001                     s->mb_intra= 0;
3002                     for(i=0; i<2; i++){
3003                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3004                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3005                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3006                     }
3007                     break;
3008                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3009                     s->mv_dir = MV_DIR_BACKWARD;
3010                     s->mv_type = MV_TYPE_FIELD;
3011                     s->mb_intra= 0;
3012                     for(i=0; i<2; i++){
3013                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3014                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3015                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3016                     }
3017                     break;
3018                 case CANDIDATE_MB_TYPE_BIDIR_I:
3019                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3020                     s->mv_type = MV_TYPE_FIELD;
3021                     s->mb_intra= 0;
3022                     for(dir=0; dir<2; dir++){
3023                         for(i=0; i<2; i++){
3024                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3025                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3026                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3027                         }
3028                     }
3029                     break;
3030                 default:
3031                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3032                 }
3033
3034                 encode_mb(s, motion_x, motion_y);
3035
3036                 // RAL: Update last macroblock type
3037                 s->last_mv_dir = s->mv_dir;
3038
3039                 if (CONFIG_H263_ENCODER &&
3040                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3041                     ff_h263_update_motion_val(s);
3042
3043                 ff_MPV_decode_mb(s, s->block);
3044             }
3045
3046             /* clean the MV table in IPS frames for direct mode in B frames */
3047             if(s->mb_intra /* && I,P,S_TYPE */){
3048                 s->p_mv_table[xy][0]=0;
3049                 s->p_mv_table[xy][1]=0;
3050             }
3051
3052             if(s->flags&CODEC_FLAG_PSNR){
3053                 int w= 16;
3054                 int h= 16;
3055
3056                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3057                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3058
3059                 s->current_picture.f.error[0] += sse(
3060                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3061                     s->dest[0], w, h, s->linesize);
3062                 s->current_picture.f.error[1] += sse(
3063                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3064                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3065                 s->current_picture.f.error[2] += sse(
3066                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3067                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3068             }
3069             if(s->loop_filter){
3070                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3071                     ff_h263_loop_filter(s);
3072             }
3073             av_dlog(s->avctx, "MB %d %d bits\n",
3074                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3075         }
3076     }
3077
3078     //not beautiful here but we must write it before flushing so it has to be here
3079     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3080         ff_msmpeg4_encode_ext_header(s);
3081
3082     write_slice_end(s);
3083
3084     /* Send the last GOB if RTP */
3085     if (s->avctx->rtp_callback) {
3086         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3087         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3088         /* Call the RTP callback to send the last GOB */
3089         emms_c();
3090         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3091     }
3092
3093     return 0;
3094 }
3095
3096 #define MERGE(field) dst->field += src->field; src->field=0
3097 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3098     MERGE(me.scene_change_score);
3099     MERGE(me.mc_mb_var_sum_temp);
3100     MERGE(me.mb_var_sum_temp);
3101 }
3102
3103 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3104     int i;
3105
3106     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3107     MERGE(dct_count[1]);
3108     MERGE(mv_bits);
3109     MERGE(i_tex_bits);
3110     MERGE(p_tex_bits);
3111     MERGE(i_count);
3112     MERGE(f_count);
3113     MERGE(b_count);
3114     MERGE(skip_count);
3115     MERGE(misc_bits);
3116     MERGE(er.error_count);
3117     MERGE(padding_bug_score);
3118     MERGE(current_picture.f.error[0]);
3119     MERGE(current_picture.f.error[1]);
3120     MERGE(current_picture.f.error[2]);
3121
3122     if(dst->avctx->noise_reduction){
3123         for(i=0; i<64; i++){
3124             MERGE(dct_error_sum[0][i]);
3125             MERGE(dct_error_sum[1][i]);
3126         }
3127     }
3128
3129     assert(put_bits_count(&src->pb) % 8 ==0);
3130     assert(put_bits_count(&dst->pb) % 8 ==0);
3131     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3132     flush_put_bits(&dst->pb);
3133 }
3134
3135 static int estimate_qp(MpegEncContext *s, int dry_run){
3136     if (s->next_lambda){
3137         s->current_picture_ptr->f.quality =
3138         s->current_picture.f.quality = s->next_lambda;
3139         if(!dry_run) s->next_lambda= 0;
3140     } else if (!s->fixed_qscale) {
3141         s->current_picture_ptr->f.quality =
3142         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3143         if (s->current_picture.f.quality < 0)
3144             return -1;
3145     }
3146
3147     if(s->adaptive_quant){
3148         switch(s->codec_id){
3149         case AV_CODEC_ID_MPEG4:
3150             if (CONFIG_MPEG4_ENCODER)
3151                 ff_clean_mpeg4_qscales(s);
3152             break;
3153         case AV_CODEC_ID_H263:
3154         case AV_CODEC_ID_H263P:
3155         case AV_CODEC_ID_FLV1:
3156             if (CONFIG_H263_ENCODER)
3157                 ff_clean_h263_qscales(s);
3158             break;
3159         default:
3160             ff_init_qscale_tab(s);
3161         }
3162
3163         s->lambda= s->lambda_table[0];
3164         //FIXME broken
3165     }else
3166         s->lambda = s->current_picture.f.quality;
3167     update_qscale(s);
3168     return 0;
3169 }
3170
3171 /* must be called before writing the header */
3172 static void set_frame_distances(MpegEncContext * s){
3173     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3174     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3175
3176     if(s->pict_type==AV_PICTURE_TYPE_B){
3177         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3178         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3179     }else{
3180         s->pp_time= s->time - s->last_non_b_time;
3181         s->last_non_b_time= s->time;
3182         assert(s->picture_number==0 || s->pp_time > 0);
3183     }
3184 }
3185
3186 static int encode_picture(MpegEncContext *s, int picture_number)
3187 {
3188     int i, ret;
3189     int bits;
3190     int context_count = s->slice_context_count;
3191
3192     s->picture_number = picture_number;
3193
3194     /* Reset the average MB variance */
3195     s->me.mb_var_sum_temp    =
3196     s->me.mc_mb_var_sum_temp = 0;
3197
3198     /* we need to initialize some time vars before we can encode b-frames */
3199     // RAL: Condition added for MPEG1VIDEO
3200     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3201         set_frame_distances(s);
3202     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3203         ff_set_mpeg4_time(s);
3204
3205     s->me.scene_change_score=0;
3206
3207 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3208
3209     if(s->pict_type==AV_PICTURE_TYPE_I){
3210         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3211         else                        s->no_rounding=0;
3212     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3213         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3214             s->no_rounding ^= 1;
3215     }
3216
3217     if(s->flags & CODEC_FLAG_PASS2){
3218         if (estimate_qp(s,1) < 0)
3219             return -1;
3220         ff_get_2pass_fcode(s);
3221     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3222         if(s->pict_type==AV_PICTURE_TYPE_B)
3223             s->lambda= s->last_lambda_for[s->pict_type];
3224         else
3225             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3226         update_qscale(s);
3227     }
3228
3229     s->mb_intra=0; //for the rate distortion & bit compare functions
3230     for(i=1; i<context_count; i++){
3231         ret = ff_update_duplicate_context(s->thread_context[i], s);
3232         if (ret < 0)
3233             return ret;
3234     }
3235
3236     if(ff_init_me(s)<0)
3237         return -1;
3238
3239     /* Estimate motion for every MB */
3240     if(s->pict_type != AV_PICTURE_TYPE_I){
3241         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3242         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3243         if (s->pict_type != AV_PICTURE_TYPE_B) {
3244             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3245                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3246             }
3247         }
3248
3249         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3250     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3251         /* I-Frame */
3252         for(i=0; i<s->mb_stride*s->mb_height; i++)
3253             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3254
3255         if(!s->fixed_qscale){
3256             /* finding spatial complexity for I-frame rate control */
3257             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3258         }
3259     }
3260     for(i=1; i<context_count; i++){
3261         merge_context_after_me(s, s->thread_context[i]);
3262     }
3263     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3264     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3265     emms_c();
3266
3267     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3268         s->pict_type= AV_PICTURE_TYPE_I;
3269         for(i=0; i<s->mb_stride*s->mb_height; i++)
3270             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3271         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3272                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3273     }
3274
3275     if(!s->umvplus){
3276         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3277             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3278
3279             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3280                 int a,b;
3281                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3282                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3283                 s->f_code= FFMAX3(s->f_code, a, b);
3284             }
3285
3286             ff_fix_long_p_mvs(s);
3287             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3288             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3289                 int j;
3290                 for(i=0; i<2; i++){
3291                     for(j=0; j<2; j++)
3292                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3293                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3294                 }
3295             }
3296         }
3297
3298         if(s->pict_type==AV_PICTURE_TYPE_B){
3299             int a, b;
3300
3301             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3302             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3303             s->f_code = FFMAX(a, b);
3304
3305             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3306             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3307             s->b_code = FFMAX(a, b);
3308
3309             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3310             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3311             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3312             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3313             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3314                 int dir, j;
3315                 for(dir=0; dir<2; dir++){
3316                     for(i=0; i<2; i++){
3317                         for(j=0; j<2; j++){
3318                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3319                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3320                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3321                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3322                         }
3323                     }
3324                 }
3325             }
3326         }
3327     }
3328
3329     if (estimate_qp(s, 0) < 0)
3330         return -1;
3331
3332     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3333         s->qscale= 3; //reduce clipping problems
3334
3335     if (s->out_format == FMT_MJPEG) {
3336         /* for mjpeg, we do include qscale in the matrix */
3337         for(i=1;i<64;i++){
3338             int j= s->dsp.idct_permutation[i];
3339
3340             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3341         }
3342         s->y_dc_scale_table=
3343         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3344         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3345         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3346                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3347         s->qscale= 8;
3348     }
3349
3350     //FIXME var duplication
3351     s->current_picture_ptr->f.key_frame =
3352     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3353     s->current_picture_ptr->f.pict_type =
3354     s->current_picture.f.pict_type = s->pict_type;
3355
3356     if (s->current_picture.f.key_frame)
3357         s->picture_in_gop_number=0;
3358
3359     s->last_bits= put_bits_count(&s->pb);
3360     switch(s->out_format) {
3361     case FMT_MJPEG:
3362         if (CONFIG_MJPEG_ENCODER)
3363             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3364                                            s->intra_matrix);
3365         break;
3366     case FMT_H261:
3367         if (CONFIG_H261_ENCODER)
3368             ff_h261_encode_picture_header(s, picture_number);
3369         break;
3370     case FMT_H263:
3371         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3372             ff_wmv2_encode_picture_header(s, picture_number);
3373         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3374             ff_msmpeg4_encode_picture_header(s, picture_number);
3375         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3376             ff_mpeg4_encode_picture_header(s, picture_number);
3377         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3378             ff_rv10_encode_picture_header(s, picture_number);
3379         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3380             ff_rv20_encode_picture_header(s, picture_number);
3381         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3382             ff_flv_encode_picture_header(s, picture_number);
3383         else if (CONFIG_H263_ENCODER)
3384             ff_h263_encode_picture_header(s, picture_number);
3385         break;
3386     case FMT_MPEG1:
3387         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3388             ff_mpeg1_encode_picture_header(s, picture_number);
3389         break;
3390     default:
3391         assert(0);
3392     }
3393     bits= put_bits_count(&s->pb);
3394     s->header_bits= bits - s->last_bits;
3395
3396     for(i=1; i<context_count; i++){
3397         update_duplicate_context_after_me(s->thread_context[i], s);
3398     }
3399     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3400     for(i=1; i<context_count; i++){
3401         merge_context_after_encode(s, s->thread_context[i]);
3402     }
3403     emms_c();
3404     return 0;
3405 }
3406
3407 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3408     const int intra= s->mb_intra;
3409     int i;
3410
3411     s->dct_count[intra]++;
3412
3413     for(i=0; i<64; i++){
3414         int level= block[i];
3415
3416         if(level){
3417             if(level>0){
3418                 s->dct_error_sum[intra][i] += level;
3419                 level -= s->dct_offset[intra][i];
3420                 if(level<0) level=0;
3421             }else{
3422                 s->dct_error_sum[intra][i] -= level;
3423                 level += s->dct_offset[intra][i];
3424                 if(level>0) level=0;
3425             }
3426             block[i]= level;
3427         }
3428     }
3429 }
3430
3431 static int dct_quantize_trellis_c(MpegEncContext *s,
3432                                   int16_t *block, int n,
3433                                   int qscale, int *overflow){
3434     const int *qmat;
3435     const uint8_t *scantable= s->intra_scantable.scantable;
3436     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3437     int max=0;
3438     unsigned int threshold1, threshold2;
3439     int bias=0;
3440     int run_tab[65];
3441     int level_tab[65];
3442     int score_tab[65];
3443     int survivor[65];
3444     int survivor_count;
3445     int last_run=0;
3446     int last_level=0;
3447     int last_score= 0;
3448     int last_i;
3449     int coeff[2][64];
3450     int coeff_count[64];
3451     int qmul, qadd, start_i, last_non_zero, i, dc;
3452     const int esc_length= s->ac_esc_length;
3453     uint8_t * length;
3454     uint8_t * last_length;
3455     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3456
3457     s->dsp.fdct (block);
3458
3459     if(s->dct_error_sum)
3460         s->denoise_dct(s, block);
3461     qmul= qscale*16;
3462     qadd= ((qscale-1)|1)*8;
3463
3464     if (s->mb_intra) {
3465         int q;
3466         if (!s->h263_aic) {
3467             if (n < 4)
3468                 q = s->y_dc_scale;
3469             else
3470                 q = s->c_dc_scale;
3471             q = q << 3;
3472         } else{
3473             /* For AIC we skip quant/dequant of INTRADC */
3474             q = 1 << 3;
3475             qadd=0;
3476         }
3477
3478         /* note: block[0] is assumed to be positive */
3479         block[0] = (block[0] + (q >> 1)) / q;
3480         start_i = 1;
3481         last_non_zero = 0;
3482         qmat = s->q_intra_matrix[qscale];
3483         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3484             bias= 1<<(QMAT_SHIFT-1);
3485         length     = s->intra_ac_vlc_length;
3486         last_length= s->intra_ac_vlc_last_length;
3487     } else {
3488         start_i = 0;
3489         last_non_zero = -1;
3490         qmat = s->q_inter_matrix[qscale];
3491         length     = s->inter_ac_vlc_length;
3492         last_length= s->inter_ac_vlc_last_length;
3493     }
3494     last_i= start_i;
3495
3496     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3497     threshold2= (threshold1<<1);
3498
3499     for(i=63; i>=start_i; i--) {
3500         const int j = scantable[i];
3501         int level = block[j] * qmat[j];
3502
3503         if(((unsigned)(level+threshold1))>threshold2){
3504             last_non_zero = i;
3505             break;
3506         }
3507     }
3508
3509     for(i=start_i; i<=last_non_zero; i++) {
3510         const int j = scantable[i];
3511         int level = block[j] * qmat[j];
3512
3513 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3514 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3515         if(((unsigned)(level+threshold1))>threshold2){
3516             if(level>0){
3517                 level= (bias + level)>>QMAT_SHIFT;
3518                 coeff[0][i]= level;
3519                 coeff[1][i]= level-1;
3520 //                coeff[2][k]= level-2;
3521             }else{
3522                 level= (bias - level)>>QMAT_SHIFT;
3523                 coeff[0][i]= -level;
3524                 coeff[1][i]= -level+1;
3525 //                coeff[2][k]= -level+2;
3526             }
3527             coeff_count[i]= FFMIN(level, 2);
3528             assert(coeff_count[i]);
3529             max |=level;
3530         }else{
3531             coeff[0][i]= (level>>31)|1;
3532             coeff_count[i]= 1;
3533         }
3534     }
3535
3536     *overflow= s->max_qcoeff < max; //overflow might have happened
3537
3538     if(last_non_zero < start_i){
3539         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3540         return last_non_zero;
3541     }
3542
3543     score_tab[start_i]= 0;
3544     survivor[0]= start_i;
3545     survivor_count= 1;
3546
3547     for(i=start_i; i<=last_non_zero; i++){
3548         int level_index, j, zero_distortion;
3549         int dct_coeff= FFABS(block[ scantable[i] ]);
3550         int best_score=256*256*256*120;
3551
3552         if (s->dsp.fdct == ff_fdct_ifast)
3553             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3554         zero_distortion= dct_coeff*dct_coeff;
3555
3556         for(level_index=0; level_index < coeff_count[i]; level_index++){
3557             int distortion;
3558             int level= coeff[level_index][i];
3559             const int alevel= FFABS(level);
3560             int unquant_coeff;
3561
3562             assert(level);
3563
3564             if(s->out_format == FMT_H263){
3565                 unquant_coeff= alevel*qmul + qadd;
3566             }else{ //MPEG1
3567                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3568                 if(s->mb_intra){
3569                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3570                         unquant_coeff =   (unquant_coeff - 1) | 1;
3571                 }else{
3572                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3573                         unquant_coeff =   (unquant_coeff - 1) | 1;
3574                 }
3575                 unquant_coeff<<= 3;
3576             }
3577
3578             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3579             level+=64;
3580             if((level&(~127)) == 0){
3581                 for(j=survivor_count-1; j>=0; j--){
3582                     int run= i - survivor[j];
3583                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3584                     score += score_tab[i-run];
3585
3586                     if(score < best_score){
3587                         best_score= score;
3588                         run_tab[i+1]= run;
3589                         level_tab[i+1]= level-64;
3590                     }
3591                 }
3592
3593                 if(s->out_format == FMT_H263){
3594                     for(j=survivor_count-1; j>=0; j--){
3595                         int run= i - survivor[j];
3596                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3597                         score += score_tab[i-run];
3598                         if(score < last_score){
3599                             last_score= score;
3600                             last_run= run;
3601                             last_level= level-64;
3602                             last_i= i+1;
3603                         }
3604                     }
3605                 }
3606             }else{
3607                 distortion += esc_length*lambda;
3608                 for(j=survivor_count-1; j>=0; j--){
3609                     int run= i - survivor[j];
3610                     int score= distortion + score_tab[i-run];
3611
3612                     if(score < best_score){
3613                         best_score= score;
3614                         run_tab[i+1]= run;
3615                         level_tab[i+1]= level-64;
3616                     }
3617                 }
3618
3619                 if(s->out_format == FMT_H263){
3620                   for(j=survivor_count-1; j>=0; j--){
3621                         int run= i - survivor[j];
3622                         int score= distortion + score_tab[i-run];
3623                         if(score < last_score){
3624                             last_score= score;
3625                             last_run= run;
3626                             last_level= level-64;
3627                             last_i= i+1;
3628                         }
3629                     }
3630                 }
3631             }
3632         }
3633
3634         score_tab[i+1]= best_score;
3635
3636         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3637         if(last_non_zero <= 27){
3638             for(; survivor_count; survivor_count--){
3639                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3640                     break;
3641             }
3642         }else{
3643             for(; survivor_count; survivor_count--){
3644                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3645                     break;
3646             }
3647         }
3648
3649         survivor[ survivor_count++ ]= i+1;
3650     }
3651
3652     if(s->out_format != FMT_H263){
3653         last_score= 256*256*256*120;
3654         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3655             int score= score_tab[i];
3656             if(i) score += lambda*2; //FIXME exacter?
3657
3658             if(score < last_score){
3659                 last_score= score;
3660                 last_i= i;
3661                 last_level= level_tab[i];
3662                 last_run= run_tab[i];
3663             }
3664         }
3665     }
3666
3667     s->coded_score[n] = last_score;
3668
3669     dc= FFABS(block[0]);
3670     last_non_zero= last_i - 1;
3671     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3672
3673     if(last_non_zero < start_i)
3674         return last_non_zero;
3675
3676     if(last_non_zero == 0 && start_i == 0){
3677         int best_level= 0;
3678         int best_score= dc * dc;
3679
3680         for(i=0; i<coeff_count[0]; i++){
3681             int level= coeff[i][0];
3682             int alevel= FFABS(level);
3683             int unquant_coeff, score, distortion;
3684
3685             if(s->out_format == FMT_H263){
3686                     unquant_coeff= (alevel*qmul + qadd)>>3;
3687             }else{ //MPEG1
3688                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3689                     unquant_coeff =   (unquant_coeff - 1) | 1;
3690             }
3691             unquant_coeff = (unquant_coeff + 4) >> 3;
3692             unquant_coeff<<= 3 + 3;
3693
3694             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3695             level+=64;
3696             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3697             else                    score= distortion + esc_length*lambda;
3698
3699             if(score < best_score){
3700                 best_score= score;
3701                 best_level= level - 64;
3702             }
3703         }
3704         block[0]= best_level;
3705         s->coded_score[n] = best_score - dc*dc;
3706         if(best_level == 0) return -1;
3707         else                return last_non_zero;
3708     }
3709
3710     i= last_i;
3711     assert(last_level);
3712
3713     block[ perm_scantable[last_non_zero] ]= last_level;
3714     i -= last_run + 1;
3715
3716     for(; i>start_i; i -= run_tab[i] + 1){
3717         block[ perm_scantable[i-1] ]= level_tab[i];
3718     }
3719
3720     return last_non_zero;
3721 }
3722
3723 //#define REFINE_STATS 1
3724 static int16_t basis[64][64];
3725
3726 static void build_basis(uint8_t *perm){
3727     int i, j, x, y;
3728     emms_c();
3729     for(i=0; i<8; i++){
3730         for(j=0; j<8; j++){
3731             for(y=0; y<8; y++){
3732                 for(x=0; x<8; x++){
3733                     double s= 0.25*(1<<BASIS_SHIFT);
3734                     int index= 8*i + j;
3735                     int perm_index= perm[index];
3736                     if(i==0) s*= sqrt(0.5);
3737                     if(j==0) s*= sqrt(0.5);
3738                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3739                 }
3740             }
3741         }
3742     }
3743 }
3744
3745 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3746                         int16_t *block, int16_t *weight, int16_t *orig,
3747                         int n, int qscale){
3748     int16_t rem[64];
3749     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3750     const uint8_t *scantable= s->intra_scantable.scantable;
3751     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3752 //    unsigned int threshold1, threshold2;
3753 //    int bias=0;
3754     int run_tab[65];
3755     int prev_run=0;
3756     int prev_level=0;
3757     int qmul, qadd, start_i, last_non_zero, i, dc;
3758     uint8_t * length;
3759     uint8_t * last_length;
3760     int lambda;
3761     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3762 #ifdef REFINE_STATS
3763 static int count=0;
3764 static int after_last=0;
3765 static int to_zero=0;
3766 static int from_zero=0;
3767 static int raise=0;
3768 static int lower=0;
3769 static int messed_sign=0;
3770 #endif
3771
3772     if(basis[0][0] == 0)
3773         build_basis(s->dsp.idct_permutation);
3774
3775     qmul= qscale*2;
3776     qadd= (qscale-1)|1;
3777     if (s->mb_intra) {
3778         if (!s->h263_aic) {
3779             if (n < 4)
3780                 q = s->y_dc_scale;
3781             else
3782                 q = s->c_dc_scale;
3783         } else{
3784             /* For AIC we skip quant/dequant of INTRADC */
3785             q = 1;
3786             qadd=0;
3787         }
3788         q <<= RECON_SHIFT-3;
3789         /* note: block[0] is assumed to be positive */
3790         dc= block[0]*q;
3791 //        block[0] = (block[0] + (q >> 1)) / q;
3792         start_i = 1;
3793 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3794 //            bias= 1<<(QMAT_SHIFT-1);
3795         length     = s->intra_ac_vlc_length;
3796         last_length= s->intra_ac_vlc_last_length;
3797     } else {
3798         dc= 0;
3799         start_i = 0;
3800         length     = s->inter_ac_vlc_length;
3801         last_length= s->inter_ac_vlc_last_length;
3802     }
3803     last_non_zero = s->block_last_index[n];
3804
3805 #ifdef REFINE_STATS
3806 {START_TIMER
3807 #endif
3808     dc += (1<<(RECON_SHIFT-1));
3809     for(i=0; i<64; i++){
3810         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3811     }
3812 #ifdef REFINE_STATS
3813 STOP_TIMER("memset rem[]")}
3814 #endif
3815     sum=0;
3816     for(i=0; i<64; i++){
3817         int one= 36;
3818         int qns=4;
3819         int w;
3820
3821         w= FFABS(weight[i]) + qns*one;
3822         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3823
3824         weight[i] = w;
3825 //        w=weight[i] = (63*qns + (w/2)) / w;
3826
3827         assert(w>0);
3828         assert(w<(1<<6));
3829         sum += w*w;
3830     }
3831     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3832 #ifdef REFINE_STATS
3833 {START_TIMER
3834 #endif
3835     run=0;
3836     rle_index=0;
3837     for(i=start_i; i<=last_non_zero; i++){
3838         int j= perm_scantable[i];
3839         const int level= block[j];
3840         int coeff;
3841
3842         if(level){
3843             if(level<0) coeff= qmul*level - qadd;
3844             else        coeff= qmul*level + qadd;
3845             run_tab[rle_index++]=run;
3846             run=0;
3847
3848             s->dsp.add_8x8basis(rem, basis[j], coeff);
3849         }else{
3850             run++;
3851         }
3852     }
3853 #ifdef REFINE_STATS
3854 if(last_non_zero>0){
3855 STOP_TIMER("init rem[]")
3856 }
3857 }
3858
3859 {START_TIMER
3860 #endif
3861     for(;;){
3862         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3863         int best_coeff=0;
3864         int best_change=0;
3865         int run2, best_unquant_change=0, analyze_gradient;
3866 #ifdef REFINE_STATS
3867 {START_TIMER
3868 #endif
3869         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3870
3871         if(analyze_gradient){
3872 #ifdef REFINE_STATS
3873 {START_TIMER
3874 #endif
3875             for(i=0; i<64; i++){
3876                 int w= weight[i];
3877
3878                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3879             }
3880 #ifdef REFINE_STATS
3881 STOP_TIMER("rem*w*w")}
3882 {START_TIMER
3883 #endif
3884             s->dsp.fdct(d1);
3885 #ifdef REFINE_STATS
3886 STOP_TIMER("dct")}
3887 #endif
3888         }
3889
3890         if(start_i){
3891             const int level= block[0];
3892             int change, old_coeff;
3893
3894             assert(s->mb_intra);
3895
3896             old_coeff= q*level;
3897
3898             for(change=-1; change<=1; change+=2){
3899                 int new_level= level + change;
3900                 int score, new_coeff;
3901
3902                 new_coeff= q*new_level;
3903                 if(new_coeff >= 2048 || new_coeff < 0)
3904                     continue;
3905
3906                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3907                 if(score<best_score){
3908                     best_score= score;
3909                     best_coeff= 0;
3910                     best_change= change;
3911                     best_unquant_change= new_coeff - old_coeff;
3912                 }
3913             }
3914         }
3915
3916         run=0;
3917         rle_index=0;
3918         run2= run_tab[rle_index++];
3919         prev_level=0;
3920         prev_run=0;
3921
3922         for(i=start_i; i<64; i++){
3923             int j= perm_scantable[i];
3924             const int level= block[j];
3925             int change, old_coeff;
3926
3927             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3928                 break;
3929
3930             if(level){
3931                 if(level<0) old_coeff= qmul*level - qadd;
3932                 else        old_coeff= qmul*level + qadd;
3933                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3934             }else{
3935                 old_coeff=0;
3936                 run2--;
3937                 assert(run2>=0 || i >= last_non_zero );
3938             }
3939
3940             for(change=-1; change<=1; change+=2){
3941                 int new_level= level + change;
3942                 int score, new_coeff, unquant_change;
3943
3944                 score=0;
3945                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3946                    continue;
3947
3948                 if(new_level){
3949                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3950                     else            new_coeff= qmul*new_level + qadd;
3951                     if(new_coeff >= 2048 || new_coeff <= -2048)
3952                         continue;
3953                     //FIXME check for overflow
3954
3955                     if(level){
3956                         if(level < 63 && level > -63){
3957                             if(i < last_non_zero)
3958                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3959                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3960                             else
3961                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3962                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3963                         }
3964                     }else{
3965                         assert(FFABS(new_level)==1);
3966
3967                         if(analyze_gradient){
3968                             int g= d1[ scantable[i] ];
3969                             if(g && (g^new_level) >= 0)
3970                                 continue;
3971                         }
3972
3973                         if(i < last_non_zero){
3974                             int next_i= i + run2 + 1;
3975                             int next_level= block[ perm_scantable[next_i] ] + 64;
3976
3977                             if(next_level&(~127))
3978                                 next_level= 0;
3979
3980                             if(next_i < last_non_zero)
3981                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3982                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3983                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3984                             else
3985                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3986                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3987                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3988                         }else{
3989                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3990                             if(prev_level){
3991                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3992                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3993                             }
3994                         }
3995                     }
3996                 }else{
3997                     new_coeff=0;
3998                     assert(FFABS(level)==1);
3999
4000                     if(i < last_non_zero){
4001                         int next_i= i + run2 + 1;
4002                         int next_level= block[ perm_scantable[next_i] ] + 64;
4003
4004                         if(next_level&(~127))
4005                             next_level= 0;
4006
4007                         if(next_i < last_non_zero)
4008                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4009                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4010                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4011                         else
4012                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4013                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4014                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4015                     }else{
4016                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4017                         if(prev_level){
4018                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4019                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4020                         }
4021                     }
4022                 }
4023
4024                 score *= lambda;
4025
4026                 unquant_change= new_coeff - old_coeff;
4027                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4028
4029                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4030                 if(score<best_score){
4031                     best_score= score;
4032                     best_coeff= i;
4033                     best_change= change;
4034                     best_unquant_change= unquant_change;
4035                 }
4036             }
4037             if(level){
4038                 prev_level= level + 64;
4039                 if(prev_level&(~127))
4040                     prev_level= 0;
4041                 prev_run= run;
4042                 run=0;
4043             }else{
4044                 run++;
4045             }
4046         }
4047 #ifdef REFINE_STATS
4048 STOP_TIMER("iterative step")}
4049 #endif
4050
4051         if(best_change){
4052             int j= perm_scantable[ best_coeff ];
4053
4054             block[j] += best_change;
4055
4056             if(best_coeff > last_non_zero){
4057                 last_non_zero= best_coeff;
4058                 assert(block[j]);
4059 #ifdef REFINE_STATS
4060 after_last++;
4061 #endif
4062             }else{
4063 #ifdef REFINE_STATS
4064 if(block[j]){
4065     if(block[j] - best_change){
4066         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4067             raise++;
4068         }else{
4069             lower++;
4070         }
4071     }else{
4072         from_zero++;
4073     }
4074 }else{
4075     to_zero++;
4076 }
4077 #endif
4078                 for(; last_non_zero>=start_i; last_non_zero--){
4079                     if(block[perm_scantable[last_non_zero]])
4080                         break;
4081                 }
4082             }
4083 #ifdef REFINE_STATS
4084 count++;
4085 if(256*256*256*64 % count == 0){
4086     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4087 }
4088 #endif
4089             run=0;
4090             rle_index=0;
4091             for(i=start_i; i<=last_non_zero; i++){
4092                 int j= perm_scantable[i];
4093                 const int level= block[j];
4094
4095                  if(level){
4096                      run_tab[rle_index++]=run;
4097                      run=0;
4098                  }else{
4099                      run++;
4100                  }
4101             }
4102
4103             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4104         }else{
4105             break;
4106         }
4107     }
4108 #ifdef REFINE_STATS
4109 if(last_non_zero>0){
4110 STOP_TIMER("iterative search")
4111 }
4112 }
4113 #endif
4114
4115     return last_non_zero;
4116 }
4117
4118 int ff_dct_quantize_c(MpegEncContext *s,
4119                         int16_t *block, int n,
4120                         int qscale, int *overflow)
4121 {
4122     int i, j, level, last_non_zero, q, start_i;
4123     const int *qmat;
4124     const uint8_t *scantable= s->intra_scantable.scantable;
4125     int bias;
4126     int max=0;
4127     unsigned int threshold1, threshold2;
4128
4129     s->dsp.fdct (block);
4130
4131     if(s->dct_error_sum)
4132         s->denoise_dct(s, block);
4133
4134     if (s->mb_intra) {
4135         if (!s->h263_aic) {
4136             if (n < 4)
4137                 q = s->y_dc_scale;
4138             else
4139                 q = s->c_dc_scale;
4140             q = q << 3;
4141         } else
4142             /* For AIC we skip quant/dequant of INTRADC */
4143             q = 1 << 3;
4144
4145         /* note: block[0] is assumed to be positive */
4146         block[0] = (block[0] + (q >> 1)) / q;
4147         start_i = 1;
4148         last_non_zero = 0;
4149         qmat = s->q_intra_matrix[qscale];
4150         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4151     } else {
4152         start_i = 0;
4153         last_non_zero = -1;
4154         qmat = s->q_inter_matrix[qscale];
4155         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4156     }
4157     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4158     threshold2= (threshold1<<1);
4159     for(i=63;i>=start_i;i--) {
4160         j = scantable[i];
4161         level = block[j] * qmat[j];
4162
4163         if(((unsigned)(level+threshold1))>threshold2){
4164             last_non_zero = i;
4165             break;
4166         }else{
4167             block[j]=0;
4168         }
4169     }
4170     for(i=start_i; i<=last_non_zero; i++) {
4171         j = scantable[i];
4172         level = block[j] * qmat[j];
4173
4174 //        if(   bias+level >= (1<<QMAT_SHIFT)
4175 //           || bias-level >= (1<<QMAT_SHIFT)){
4176         if(((unsigned)(level+threshold1))>threshold2){
4177             if(level>0){
4178                 level= (bias + level)>>QMAT_SHIFT;
4179                 block[j]= level;
4180             }else{
4181                 level= (bias - level)>>QMAT_SHIFT;
4182                 block[j]= -level;
4183             }
4184             max |=level;
4185         }else{
4186             block[j]=0;
4187         }
4188     }
4189     *overflow= s->max_qcoeff < max; //overflow might have happened
4190
4191     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4192     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4193         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4194
4195     return last_non_zero;
4196 }
4197
4198 #define OFFSET(x) offsetof(MpegEncContext, x)
4199 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4200 static const AVOption h263_options[] = {
4201     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4202     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4203     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4204     FF_MPV_COMMON_OPTS
4205     { NULL },
4206 };
4207
4208 static const AVClass h263_class = {
4209     .class_name = "H.263 encoder",
4210     .item_name  = av_default_item_name,
4211     .option     = h263_options,
4212     .version    = LIBAVUTIL_VERSION_INT,
4213 };
4214
4215 AVCodec ff_h263_encoder = {
4216     .name           = "h263",
4217     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4218     .type           = AVMEDIA_TYPE_VIDEO,
4219     .id             = AV_CODEC_ID_H263,
4220     .priv_data_size = sizeof(MpegEncContext),
4221     .init           = ff_MPV_encode_init,
4222     .encode2        = ff_MPV_encode_picture,
4223     .close          = ff_MPV_encode_end,
4224     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4225     .priv_class     = &h263_class,
4226 };
4227
4228 static const AVOption h263p_options[] = {
4229     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4230     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4231     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4232     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4233     FF_MPV_COMMON_OPTS
4234     { NULL },
4235 };
4236 static const AVClass h263p_class = {
4237     .class_name = "H.263p encoder",
4238     .item_name  = av_default_item_name,
4239     .option     = h263p_options,
4240     .version    = LIBAVUTIL_VERSION_INT,
4241 };
4242
4243 AVCodec ff_h263p_encoder = {
4244     .name           = "h263p",
4245     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4246     .type           = AVMEDIA_TYPE_VIDEO,
4247     .id             = AV_CODEC_ID_H263P,
4248     .priv_data_size = sizeof(MpegEncContext),
4249     .init           = ff_MPV_encode_init,
4250     .encode2        = ff_MPV_encode_picture,
4251     .close          = ff_MPV_encode_end,
4252     .capabilities   = CODEC_CAP_SLICE_THREADS,
4253     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4254     .priv_class     = &h263p_class,
4255 };
4256
4257 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4258
4259 AVCodec ff_msmpeg4v2_encoder = {
4260     .name           = "msmpeg4v2",
4261     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4262     .type           = AVMEDIA_TYPE_VIDEO,
4263     .id             = AV_CODEC_ID_MSMPEG4V2,
4264     .priv_data_size = sizeof(MpegEncContext),
4265     .init           = ff_MPV_encode_init,
4266     .encode2        = ff_MPV_encode_picture,
4267     .close          = ff_MPV_encode_end,
4268     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4269     .priv_class     = &msmpeg4v2_class,
4270 };
4271
4272 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4273
4274 AVCodec ff_msmpeg4v3_encoder = {
4275     .name           = "msmpeg4",
4276     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4277     .type           = AVMEDIA_TYPE_VIDEO,
4278     .id             = AV_CODEC_ID_MSMPEG4V3,
4279     .priv_data_size = sizeof(MpegEncContext),
4280     .init           = ff_MPV_encode_init,
4281     .encode2        = ff_MPV_encode_picture,
4282     .close          = ff_MPV_encode_end,
4283     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4284     .priv_class     = &msmpeg4v3_class,
4285 };
4286
4287 FF_MPV_GENERIC_CLASS(wmv1)
4288
4289 AVCodec ff_wmv1_encoder = {
4290     .name           = "wmv1",
4291     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4292     .type           = AVMEDIA_TYPE_VIDEO,
4293     .id             = AV_CODEC_ID_WMV1,
4294     .priv_data_size = sizeof(MpegEncContext),
4295     .init           = ff_MPV_encode_init,
4296     .encode2        = ff_MPV_encode_picture,
4297     .close          = ff_MPV_encode_end,
4298     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4299     .priv_class     = &wmv1_class,
4300 };