]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
mpegvideo_enc: Don't call ff_h263dsp_init unconditionally
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "avcodec.h"
38 #include "dct.h"
39 #include "dsputil.h"
40 #include "mpeg12.h"
41 #include "mpegvideo.h"
42 #include "h261.h"
43 #include "h263.h"
44 #include "mathops.h"
45 #include "mjpegenc.h"
46 #include "msmpeg4.h"
47 #include "faandct.h"
48 #include "thread.h"
49 #include "aandcttab.h"
50 #include "flv.h"
51 #include "mpeg4video.h"
52 #include "internal.h"
53 #include "bytestream.h"
54 #include <limits.h>
55
56 static int encode_picture(MpegEncContext *s, int picture_number);
57 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
58 static int sse_mb(MpegEncContext *s);
59 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
60 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
61
62 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
63 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
64
65 const AVOption ff_mpv_generic_options[] = {
66     FF_MPV_COMMON_OPTS
67     { NULL },
68 };
69
70 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
71                        uint16_t (*qmat16)[2][64],
72                        const uint16_t *quant_matrix,
73                        int bias, int qmin, int qmax, int intra)
74 {
75     int qscale;
76     int shift = 0;
77
78     for (qscale = qmin; qscale <= qmax; qscale++) {
79         int i;
80         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
81             dsp->fdct == ff_jpeg_fdct_islow_10 ||
82             dsp->fdct == ff_faandct) {
83             for (i = 0; i < 64; i++) {
84                 const int j = dsp->idct_permutation[i];
85                 /* 16 <= qscale * quant_matrix[i] <= 7905
86                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
87                  *             19952 <=              x  <= 249205026
88                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
89                  *           3444240 >= (1 << 36) / (x) >= 275 */
90
91                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
92                                         (qscale * quant_matrix[j]));
93             }
94         } else if (dsp->fdct == ff_fdct_ifast) {
95             for (i = 0; i < 64; i++) {
96                 const int j = dsp->idct_permutation[i];
97                 /* 16 <= qscale * quant_matrix[i] <= 7905
98                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
99                  *             19952 <=              x  <= 249205026
100                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
101                  *           3444240 >= (1 << 36) / (x) >= 275 */
102
103                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
104                                         (ff_aanscales[i] * qscale *
105                                          quant_matrix[j]));
106             }
107         } else {
108             for (i = 0; i < 64; i++) {
109                 const int j = dsp->idct_permutation[i];
110                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
111                  * Assume x = qscale * quant_matrix[i]
112                  * So             16 <=              x  <= 7905
113                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
114                  * so          32768 >= (1 << 19) / (x) >= 67 */
115                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
116                                         (qscale * quant_matrix[j]));
117                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
118                 //                    (qscale * quant_matrix[i]);
119                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
120                                        (qscale * quant_matrix[j]);
121
122                 if (qmat16[qscale][0][i] == 0 ||
123                     qmat16[qscale][0][i] == 128 * 256)
124                     qmat16[qscale][0][i] = 128 * 256 - 1;
125                 qmat16[qscale][1][i] =
126                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
127                                 qmat16[qscale][0][i]);
128             }
129         }
130
131         for (i = intra; i < 64; i++) {
132             int64_t max = 8191;
133             if (dsp->fdct == ff_fdct_ifast) {
134                 max = (8191LL * ff_aanscales[i]) >> 14;
135             }
136             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
137                 shift++;
138             }
139         }
140     }
141     if (shift) {
142         av_log(NULL, AV_LOG_INFO,
143                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
144                QMAT_SHIFT - shift);
145     }
146 }
147
148 static inline void update_qscale(MpegEncContext *s)
149 {
150     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
151                 (FF_LAMBDA_SHIFT + 7);
152     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
153
154     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
155                  FF_LAMBDA_SHIFT;
156 }
157
158 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
159 {
160     int i;
161
162     if (matrix) {
163         put_bits(pb, 1, 1);
164         for (i = 0; i < 64; i++) {
165             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
166         }
167     } else
168         put_bits(pb, 1, 0);
169 }
170
171 /**
172  * init s->current_picture.qscale_table from s->lambda_table
173  */
174 void ff_init_qscale_tab(MpegEncContext *s)
175 {
176     int8_t * const qscale_table = s->current_picture.qscale_table;
177     int i;
178
179     for (i = 0; i < s->mb_num; i++) {
180         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
181         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
182         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
183                                                   s->avctx->qmax);
184     }
185 }
186
187 static void update_duplicate_context_after_me(MpegEncContext *dst,
188                                               MpegEncContext *src)
189 {
190 #define COPY(a) dst->a= src->a
191     COPY(pict_type);
192     COPY(current_picture);
193     COPY(f_code);
194     COPY(b_code);
195     COPY(qscale);
196     COPY(lambda);
197     COPY(lambda2);
198     COPY(picture_in_gop_number);
199     COPY(gop_picture_number);
200     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
201     COPY(progressive_frame);    // FIXME don't set in encode_header
202     COPY(partitioned_frame);    // FIXME don't set in encode_header
203 #undef COPY
204 }
205
206 /**
207  * Set the given MpegEncContext to defaults for encoding.
208  * the changed fields will not depend upon the prior state of the MpegEncContext.
209  */
210 static void MPV_encode_defaults(MpegEncContext *s)
211 {
212     int i;
213     ff_MPV_common_defaults(s);
214
215     for (i = -16; i < 16; i++) {
216         default_fcode_tab[i + MAX_MV] = 1;
217     }
218     s->me.mv_penalty = default_mv_penalty;
219     s->fcode_tab     = default_fcode_tab;
220
221     s->input_picture_number  = 0;
222     s->picture_in_gop_number = 0;
223 }
224
225 /* init video encoder */
226 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
227 {
228     MpegEncContext *s = avctx->priv_data;
229     int i, ret;
230
231     MPV_encode_defaults(s);
232
233     switch (avctx->codec_id) {
234     case AV_CODEC_ID_MPEG2VIDEO:
235         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
236             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
237             av_log(avctx, AV_LOG_ERROR,
238                    "only YUV420 and YUV422 are supported\n");
239             return -1;
240         }
241         break;
242     case AV_CODEC_ID_MJPEG:
243         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
244             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
245             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
246               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
247              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
248             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
249             return -1;
250         }
251         break;
252     default:
253         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
254             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
255             return -1;
256         }
257     }
258
259     switch (avctx->pix_fmt) {
260     case AV_PIX_FMT_YUVJ422P:
261     case AV_PIX_FMT_YUV422P:
262         s->chroma_format = CHROMA_422;
263         break;
264     case AV_PIX_FMT_YUVJ420P:
265     case AV_PIX_FMT_YUV420P:
266     default:
267         s->chroma_format = CHROMA_420;
268         break;
269     }
270
271     s->bit_rate = avctx->bit_rate;
272     s->width    = avctx->width;
273     s->height   = avctx->height;
274     if (avctx->gop_size > 600 &&
275         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
276         av_log(avctx, AV_LOG_ERROR,
277                "Warning keyframe interval too large! reducing it ...\n");
278         avctx->gop_size = 600;
279     }
280     s->gop_size     = avctx->gop_size;
281     s->avctx        = avctx;
282     s->flags        = avctx->flags;
283     s->flags2       = avctx->flags2;
284     if (avctx->max_b_frames > MAX_B_FRAMES) {
285         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
286                "is %d.\n", MAX_B_FRAMES);
287     }
288     s->max_b_frames = avctx->max_b_frames;
289     s->codec_id     = avctx->codec->id;
290     s->strict_std_compliance = avctx->strict_std_compliance;
291     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
292     s->mpeg_quant         = avctx->mpeg_quant;
293     s->rtp_mode           = !!avctx->rtp_payload_size;
294     s->intra_dc_precision = avctx->intra_dc_precision;
295     s->user_specified_pts = AV_NOPTS_VALUE;
296
297     if (s->gop_size <= 1) {
298         s->intra_only = 1;
299         s->gop_size   = 12;
300     } else {
301         s->intra_only = 0;
302     }
303
304     s->me_method = avctx->me_method;
305
306     /* Fixed QSCALE */
307     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
308
309     s->adaptive_quant = (s->avctx->lumi_masking ||
310                          s->avctx->dark_masking ||
311                          s->avctx->temporal_cplx_masking ||
312                          s->avctx->spatial_cplx_masking  ||
313                          s->avctx->p_masking      ||
314                          s->avctx->border_masking ||
315                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
316                         !s->fixed_qscale;
317
318     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
319
320     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
321         av_log(avctx, AV_LOG_ERROR,
322                "a vbv buffer size is needed, "
323                "for encoding with a maximum bitrate\n");
324         return -1;
325     }
326
327     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
328         av_log(avctx, AV_LOG_INFO,
329                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
330     }
331
332     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
333         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
334         return -1;
335     }
336
337     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
338         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
339         return -1;
340     }
341
342     if (avctx->rc_max_rate &&
343         avctx->rc_max_rate == avctx->bit_rate &&
344         avctx->rc_max_rate != avctx->rc_min_rate) {
345         av_log(avctx, AV_LOG_INFO,
346                "impossible bitrate constraints, this will fail\n");
347     }
348
349     if (avctx->rc_buffer_size &&
350         avctx->bit_rate * (int64_t)avctx->time_base.num >
351             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
352         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
353         return -1;
354     }
355
356     if (!s->fixed_qscale &&
357         avctx->bit_rate * av_q2d(avctx->time_base) >
358             avctx->bit_rate_tolerance) {
359         av_log(avctx, AV_LOG_ERROR,
360                "bitrate tolerance too small for bitrate\n");
361         return -1;
362     }
363
364     if (s->avctx->rc_max_rate &&
365         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
366         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
367          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
368         90000LL * (avctx->rc_buffer_size - 1) >
369             s->avctx->rc_max_rate * 0xFFFFLL) {
370         av_log(avctx, AV_LOG_INFO,
371                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
372                "specified vbv buffer is too large for the given bitrate!\n");
373     }
374
375     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
376         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
377         s->codec_id != AV_CODEC_ID_FLV1) {
378         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
379         return -1;
380     }
381
382     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
383         av_log(avctx, AV_LOG_ERROR,
384                "OBMC is only supported with simple mb decision\n");
385         return -1;
386     }
387
388     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
389         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
390         return -1;
391     }
392
393     if (s->max_b_frames                    &&
394         s->codec_id != AV_CODEC_ID_MPEG4      &&
395         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
396         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
397         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
398         return -1;
399     }
400
401     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
402          s->codec_id == AV_CODEC_ID_H263  ||
403          s->codec_id == AV_CODEC_ID_H263P) &&
404         (avctx->sample_aspect_ratio.num > 255 ||
405          avctx->sample_aspect_ratio.den > 255)) {
406         av_log(avctx, AV_LOG_ERROR,
407                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
408                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
409         return -1;
410     }
411
412     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
413         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
414         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
415         return -1;
416     }
417
418     // FIXME mpeg2 uses that too
419     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
420         av_log(avctx, AV_LOG_ERROR,
421                "mpeg2 style quantization not supported by codec\n");
422         return -1;
423     }
424
425     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
426         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
427         return -1;
428     }
429
430     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
431         s->avctx->mb_decision != FF_MB_DECISION_RD) {
432         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
433         return -1;
434     }
435
436     if (s->avctx->scenechange_threshold < 1000000000 &&
437         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
438         av_log(avctx, AV_LOG_ERROR,
439                "closed gop with scene change detection are not supported yet, "
440                "set threshold to 1000000000\n");
441         return -1;
442     }
443
444     if (s->flags & CODEC_FLAG_LOW_DELAY) {
445         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
446             av_log(avctx, AV_LOG_ERROR,
447                   "low delay forcing is only available for mpeg2\n");
448             return -1;
449         }
450         if (s->max_b_frames != 0) {
451             av_log(avctx, AV_LOG_ERROR,
452                    "b frames cannot be used with low delay\n");
453             return -1;
454         }
455     }
456
457     if (s->q_scale_type == 1) {
458         if (avctx->qmax > 12) {
459             av_log(avctx, AV_LOG_ERROR,
460                    "non linear quant only supports qmax <= 12 currently\n");
461             return -1;
462         }
463     }
464
465     if (s->avctx->thread_count > 1         &&
466         s->codec_id != AV_CODEC_ID_MPEG4      &&
467         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
468         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
469         (s->codec_id != AV_CODEC_ID_H263P)) {
470         av_log(avctx, AV_LOG_ERROR,
471                "multi threaded encoding not supported by codec\n");
472         return -1;
473     }
474
475     if (s->avctx->thread_count < 1) {
476         av_log(avctx, AV_LOG_ERROR,
477                "automatic thread number detection not supported by codec,"
478                "patch welcome\n");
479         return -1;
480     }
481
482     if (s->avctx->thread_count > 1)
483         s->rtp_mode = 1;
484
485     if (!avctx->time_base.den || !avctx->time_base.num) {
486         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
487         return -1;
488     }
489
490     i = (INT_MAX / 2 + 128) >> 8;
491     if (avctx->mb_threshold >= i) {
492         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
493                i - 1);
494         return -1;
495     }
496
497     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
498         av_log(avctx, AV_LOG_INFO,
499                "notice: b_frame_strategy only affects the first pass\n");
500         avctx->b_frame_strategy = 0;
501     }
502
503     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
504     if (i > 1) {
505         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
506         avctx->time_base.den /= i;
507         avctx->time_base.num /= i;
508         //return -1;
509     }
510
511     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
512         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
513         // (a + x * 3 / 8) / x
514         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
515         s->inter_quant_bias = 0;
516     } else {
517         s->intra_quant_bias = 0;
518         // (a - x / 4) / x
519         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
520     }
521
522     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
523         s->intra_quant_bias = avctx->intra_quant_bias;
524     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
525         s->inter_quant_bias = avctx->inter_quant_bias;
526
527     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
528         s->avctx->time_base.den > (1 << 16) - 1) {
529         av_log(avctx, AV_LOG_ERROR,
530                "timebase %d/%d not supported by MPEG 4 standard, "
531                "the maximum admitted value for the timebase denominator "
532                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
533                (1 << 16) - 1);
534         return -1;
535     }
536     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
537
538     switch (avctx->codec->id) {
539     case AV_CODEC_ID_MPEG1VIDEO:
540         s->out_format = FMT_MPEG1;
541         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
542         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
543         break;
544     case AV_CODEC_ID_MPEG2VIDEO:
545         s->out_format = FMT_MPEG1;
546         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
547         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
548         s->rtp_mode   = 1;
549         break;
550     case AV_CODEC_ID_MJPEG:
551         s->out_format = FMT_MJPEG;
552         s->intra_only = 1; /* force intra only for jpeg */
553         if (!CONFIG_MJPEG_ENCODER ||
554             ff_mjpeg_encode_init(s) < 0)
555             return -1;
556         avctx->delay = 0;
557         s->low_delay = 1;
558         break;
559     case AV_CODEC_ID_H261:
560         if (!CONFIG_H261_ENCODER)
561             return -1;
562         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
563             av_log(avctx, AV_LOG_ERROR,
564                    "The specified picture size of %dx%d is not valid for the "
565                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
566                     s->width, s->height);
567             return -1;
568         }
569         s->out_format = FMT_H261;
570         avctx->delay  = 0;
571         s->low_delay  = 1;
572         break;
573     case AV_CODEC_ID_H263:
574         if (!CONFIG_H263_ENCODER)
575         return -1;
576         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
577                              s->width, s->height) == 8) {
578             av_log(avctx, AV_LOG_INFO,
579                    "The specified picture size of %dx%d is not valid for "
580                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
581                    "352x288, 704x576, and 1408x1152."
582                    "Try H.263+.\n", s->width, s->height);
583             return -1;
584         }
585         s->out_format = FMT_H263;
586         avctx->delay  = 0;
587         s->low_delay  = 1;
588         break;
589     case AV_CODEC_ID_H263P:
590         s->out_format = FMT_H263;
591         s->h263_plus  = 1;
592         /* Fx */
593         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
594         s->modified_quant  = s->h263_aic;
595         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
596         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
597
598         /* /Fx */
599         /* These are just to be sure */
600         avctx->delay = 0;
601         s->low_delay = 1;
602         break;
603     case AV_CODEC_ID_FLV1:
604         s->out_format      = FMT_H263;
605         s->h263_flv        = 2; /* format = 1; 11-bit codes */
606         s->unrestricted_mv = 1;
607         s->rtp_mode  = 0; /* don't allow GOB */
608         avctx->delay = 0;
609         s->low_delay = 1;
610         break;
611     case AV_CODEC_ID_RV10:
612         s->out_format = FMT_H263;
613         avctx->delay  = 0;
614         s->low_delay  = 1;
615         break;
616     case AV_CODEC_ID_RV20:
617         s->out_format      = FMT_H263;
618         avctx->delay       = 0;
619         s->low_delay       = 1;
620         s->modified_quant  = 1;
621         s->h263_aic        = 1;
622         s->h263_plus       = 1;
623         s->loop_filter     = 1;
624         s->unrestricted_mv = 0;
625         break;
626     case AV_CODEC_ID_MPEG4:
627         s->out_format      = FMT_H263;
628         s->h263_pred       = 1;
629         s->unrestricted_mv = 1;
630         s->low_delay       = s->max_b_frames ? 0 : 1;
631         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
632         break;
633     case AV_CODEC_ID_MSMPEG4V2:
634         s->out_format      = FMT_H263;
635         s->h263_pred       = 1;
636         s->unrestricted_mv = 1;
637         s->msmpeg4_version = 2;
638         avctx->delay       = 0;
639         s->low_delay       = 1;
640         break;
641     case AV_CODEC_ID_MSMPEG4V3:
642         s->out_format        = FMT_H263;
643         s->h263_pred         = 1;
644         s->unrestricted_mv   = 1;
645         s->msmpeg4_version   = 3;
646         s->flipflop_rounding = 1;
647         avctx->delay         = 0;
648         s->low_delay         = 1;
649         break;
650     case AV_CODEC_ID_WMV1:
651         s->out_format        = FMT_H263;
652         s->h263_pred         = 1;
653         s->unrestricted_mv   = 1;
654         s->msmpeg4_version   = 4;
655         s->flipflop_rounding = 1;
656         avctx->delay         = 0;
657         s->low_delay         = 1;
658         break;
659     case AV_CODEC_ID_WMV2:
660         s->out_format        = FMT_H263;
661         s->h263_pred         = 1;
662         s->unrestricted_mv   = 1;
663         s->msmpeg4_version   = 5;
664         s->flipflop_rounding = 1;
665         avctx->delay         = 0;
666         s->low_delay         = 1;
667         break;
668     default:
669         return -1;
670     }
671
672     avctx->has_b_frames = !s->low_delay;
673
674     s->encoding = 1;
675
676     s->progressive_frame    =
677     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
678                                                 CODEC_FLAG_INTERLACED_ME) ||
679                                 s->alternate_scan);
680
681     /* init */
682     if (ff_MPV_common_init(s) < 0)
683         return -1;
684
685     if (ARCH_X86)
686         ff_MPV_encode_init_x86(s);
687
688     s->avctx->coded_frame = &s->current_picture.f;
689
690     if (s->msmpeg4_version) {
691         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
692                           2 * 2 * (MAX_LEVEL + 1) *
693                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
694     }
695     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
696
697     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
698     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
699     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
700     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
701     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
702                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
703     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
704                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
705
706     if (s->avctx->noise_reduction) {
707         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
708                           2 * 64 * sizeof(uint16_t), fail);
709     }
710
711     if (CONFIG_H263_ENCODER)
712         ff_h263dsp_init(&s->h263dsp);
713     if (!s->dct_quantize)
714         s->dct_quantize = ff_dct_quantize_c;
715     if (!s->denoise_dct)
716         s->denoise_dct  = denoise_dct_c;
717     s->fast_dct_quantize = s->dct_quantize;
718     if (avctx->trellis)
719         s->dct_quantize  = dct_quantize_trellis_c;
720
721     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
722         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
723
724     s->quant_precision = 5;
725
726     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
727     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
728
729     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
730         ff_h261_encode_init(s);
731     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
732         ff_h263_encode_init(s);
733     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
734         ff_msmpeg4_encode_init(s);
735     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
736         && s->out_format == FMT_MPEG1)
737         ff_mpeg1_encode_init(s);
738
739     /* init q matrix */
740     for (i = 0; i < 64; i++) {
741         int j = s->dsp.idct_permutation[i];
742         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
743             s->mpeg_quant) {
744             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
745             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
746         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
747             s->intra_matrix[j] =
748             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
749         } else {
750             /* mpeg1/2 */
751             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
752             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
753         }
754         if (s->avctx->intra_matrix)
755             s->intra_matrix[j] = s->avctx->intra_matrix[i];
756         if (s->avctx->inter_matrix)
757             s->inter_matrix[j] = s->avctx->inter_matrix[i];
758     }
759
760     /* precompute matrix */
761     /* for mjpeg, we do include qscale in the matrix */
762     if (s->out_format != FMT_MJPEG) {
763         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
764                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
765                           31, 1);
766         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
767                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
768                           31, 0);
769     }
770
771     if (ff_rate_control_init(s) < 0)
772         return -1;
773
774 #if FF_API_ERROR_RATE
775     FF_DISABLE_DEPRECATION_WARNINGS
776     if (avctx->error_rate)
777         s->error_rate = avctx->error_rate;
778     FF_ENABLE_DEPRECATION_WARNINGS;
779 #endif
780
781     if (avctx->b_frame_strategy == 2) {
782         for (i = 0; i < s->max_b_frames + 2; i++) {
783             s->tmp_frames[i] = av_frame_alloc();
784             if (!s->tmp_frames[i])
785                 return AVERROR(ENOMEM);
786
787             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
788             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
789             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
790
791             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
792             if (ret < 0)
793                 return ret;
794         }
795     }
796
797     return 0;
798 fail:
799     ff_MPV_encode_end(avctx);
800     return AVERROR_UNKNOWN;
801 }
802
803 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
804 {
805     MpegEncContext *s = avctx->priv_data;
806     int i;
807
808     ff_rate_control_uninit(s);
809
810     ff_MPV_common_end(s);
811     if (CONFIG_MJPEG_ENCODER &&
812         s->out_format == FMT_MJPEG)
813         ff_mjpeg_encode_close(s);
814
815     av_freep(&avctx->extradata);
816
817     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
818         av_frame_free(&s->tmp_frames[i]);
819
820     ff_free_picture_tables(&s->new_picture);
821     ff_mpeg_unref_picture(s, &s->new_picture);
822
823     av_freep(&s->avctx->stats_out);
824     av_freep(&s->ac_stats);
825
826     av_freep(&s->q_intra_matrix);
827     av_freep(&s->q_inter_matrix);
828     av_freep(&s->q_intra_matrix16);
829     av_freep(&s->q_inter_matrix16);
830     av_freep(&s->input_picture);
831     av_freep(&s->reordered_input_picture);
832     av_freep(&s->dct_offset);
833
834     return 0;
835 }
836
837 static int get_sae(uint8_t *src, int ref, int stride)
838 {
839     int x,y;
840     int acc = 0;
841
842     for (y = 0; y < 16; y++) {
843         for (x = 0; x < 16; x++) {
844             acc += FFABS(src[x + y * stride] - ref);
845         }
846     }
847
848     return acc;
849 }
850
851 static int get_intra_count(MpegEncContext *s, uint8_t *src,
852                            uint8_t *ref, int stride)
853 {
854     int x, y, w, h;
855     int acc = 0;
856
857     w = s->width  & ~15;
858     h = s->height & ~15;
859
860     for (y = 0; y < h; y += 16) {
861         for (x = 0; x < w; x += 16) {
862             int offset = x + y * stride;
863             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
864                                      16);
865             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
866             int sae  = get_sae(src + offset, mean, stride);
867
868             acc += sae + 500 < sad;
869         }
870     }
871     return acc;
872 }
873
874
875 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
876 {
877     Picture *pic = NULL;
878     int64_t pts;
879     int i, display_picture_number = 0, ret;
880     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
881                                                  (s->low_delay ? 0 : 1);
882     int direct = 1;
883
884     if (pic_arg) {
885         pts = pic_arg->pts;
886         display_picture_number = s->input_picture_number++;
887
888         if (pts != AV_NOPTS_VALUE) {
889             if (s->user_specified_pts != AV_NOPTS_VALUE) {
890                 int64_t time = pts;
891                 int64_t last = s->user_specified_pts;
892
893                 if (time <= last) {
894                     av_log(s->avctx, AV_LOG_ERROR,
895                            "Error, Invalid timestamp=%"PRId64", "
896                            "last=%"PRId64"\n", pts, s->user_specified_pts);
897                     return -1;
898                 }
899
900                 if (!s->low_delay && display_picture_number == 1)
901                     s->dts_delta = time - last;
902             }
903             s->user_specified_pts = pts;
904         } else {
905             if (s->user_specified_pts != AV_NOPTS_VALUE) {
906                 s->user_specified_pts =
907                 pts = s->user_specified_pts + 1;
908                 av_log(s->avctx, AV_LOG_INFO,
909                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
910                        pts);
911             } else {
912                 pts = display_picture_number;
913             }
914         }
915     }
916
917     if (pic_arg) {
918         if (!pic_arg->buf[0]);
919             direct = 0;
920         if (pic_arg->linesize[0] != s->linesize)
921             direct = 0;
922         if (pic_arg->linesize[1] != s->uvlinesize)
923             direct = 0;
924         if (pic_arg->linesize[2] != s->uvlinesize)
925             direct = 0;
926
927         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
928                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
929
930         if (direct) {
931             i = ff_find_unused_picture(s, 1);
932             if (i < 0)
933                 return i;
934
935             pic = &s->picture[i];
936             pic->reference = 3;
937
938             if ((ret = av_frame_ref(&pic->f, pic_arg)) < 0)
939                 return ret;
940             if (ff_alloc_picture(s, pic, 1) < 0) {
941                 return -1;
942             }
943         } else {
944             i = ff_find_unused_picture(s, 0);
945             if (i < 0)
946                 return i;
947
948             pic = &s->picture[i];
949             pic->reference = 3;
950
951             if (ff_alloc_picture(s, pic, 0) < 0) {
952                 return -1;
953             }
954
955             if (pic->f.data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
956                 pic->f.data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
957                 pic->f.data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
958                 // empty
959             } else {
960                 int h_chroma_shift, v_chroma_shift;
961                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
962                                                  &h_chroma_shift,
963                                                  &v_chroma_shift);
964
965                 for (i = 0; i < 3; i++) {
966                     int src_stride = pic_arg->linesize[i];
967                     int dst_stride = i ? s->uvlinesize : s->linesize;
968                     int h_shift = i ? h_chroma_shift : 0;
969                     int v_shift = i ? v_chroma_shift : 0;
970                     int w = s->width  >> h_shift;
971                     int h = s->height >> v_shift;
972                     uint8_t *src = pic_arg->data[i];
973                     uint8_t *dst = pic->f.data[i];
974
975                     if (!s->avctx->rc_buffer_size)
976                         dst += INPLACE_OFFSET;
977
978                     if (src_stride == dst_stride)
979                         memcpy(dst, src, src_stride * h);
980                     else {
981                         while (h--) {
982                             memcpy(dst, src, w);
983                             dst += dst_stride;
984                             src += src_stride;
985                         }
986                     }
987                 }
988             }
989         }
990         ret = av_frame_copy_props(&pic->f, pic_arg);
991         if (ret < 0)
992             return ret;
993
994         pic->f.display_picture_number = display_picture_number;
995         pic->f.pts = pts; // we set this here to avoid modifiying pic_arg
996     }
997
998     /* shift buffer entries */
999     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1000         s->input_picture[i - 1] = s->input_picture[i];
1001
1002     s->input_picture[encoding_delay] = (Picture*) pic;
1003
1004     return 0;
1005 }
1006
1007 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1008 {
1009     int x, y, plane;
1010     int score = 0;
1011     int64_t score64 = 0;
1012
1013     for (plane = 0; plane < 3; plane++) {
1014         const int stride = p->f.linesize[plane];
1015         const int bw = plane ? 1 : 2;
1016         for (y = 0; y < s->mb_height * bw; y++) {
1017             for (x = 0; x < s->mb_width * bw; x++) {
1018                 int off = p->shared ? 0 : 16;
1019                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1020                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1021                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1022
1023                 switch (s->avctx->frame_skip_exp) {
1024                 case 0: score    =  FFMAX(score, v);          break;
1025                 case 1: score   += FFABS(v);                  break;
1026                 case 2: score   += v * v;                     break;
1027                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1028                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1029                 }
1030             }
1031         }
1032     }
1033
1034     if (score)
1035         score64 = score;
1036
1037     if (score64 < s->avctx->frame_skip_threshold)
1038         return 1;
1039     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1040         return 1;
1041     return 0;
1042 }
1043
1044 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1045 {
1046     AVPacket pkt = { 0 };
1047     int ret, got_output;
1048
1049     av_init_packet(&pkt);
1050     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1051     if (ret < 0)
1052         return ret;
1053
1054     ret = pkt.size;
1055     av_free_packet(&pkt);
1056     return ret;
1057 }
1058
1059 static int estimate_best_b_count(MpegEncContext *s)
1060 {
1061     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1062     AVCodecContext *c = avcodec_alloc_context3(NULL);
1063     const int scale = s->avctx->brd_scale;
1064     int i, j, out_size, p_lambda, b_lambda, lambda2;
1065     int64_t best_rd  = INT64_MAX;
1066     int best_b_count = -1;
1067
1068     assert(scale >= 0 && scale <= 3);
1069
1070     //emms_c();
1071     //s->next_picture_ptr->quality;
1072     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1073     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1074     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1075     if (!b_lambda) // FIXME we should do this somewhere else
1076         b_lambda = p_lambda;
1077     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1078                FF_LAMBDA_SHIFT;
1079
1080     c->width        = s->width  >> scale;
1081     c->height       = s->height >> scale;
1082     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1083                       CODEC_FLAG_INPUT_PRESERVED;
1084     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1085     c->mb_decision  = s->avctx->mb_decision;
1086     c->me_cmp       = s->avctx->me_cmp;
1087     c->mb_cmp       = s->avctx->mb_cmp;
1088     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1089     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1090     c->time_base    = s->avctx->time_base;
1091     c->max_b_frames = s->max_b_frames;
1092
1093     if (avcodec_open2(c, codec, NULL) < 0)
1094         return -1;
1095
1096     for (i = 0; i < s->max_b_frames + 2; i++) {
1097         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1098                                                 s->next_picture_ptr;
1099
1100         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1101             pre_input = *pre_input_ptr;
1102
1103             if (!pre_input.shared && i) {
1104                 pre_input.f.data[0] += INPLACE_OFFSET;
1105                 pre_input.f.data[1] += INPLACE_OFFSET;
1106                 pre_input.f.data[2] += INPLACE_OFFSET;
1107             }
1108
1109             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1110                                  pre_input.f.data[0], pre_input.f.linesize[0],
1111                                  c->width,      c->height);
1112             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1113                                  pre_input.f.data[1], pre_input.f.linesize[1],
1114                                  c->width >> 1, c->height >> 1);
1115             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1116                                  pre_input.f.data[2], pre_input.f.linesize[2],
1117                                  c->width >> 1, c->height >> 1);
1118         }
1119     }
1120
1121     for (j = 0; j < s->max_b_frames + 1; j++) {
1122         int64_t rd = 0;
1123
1124         if (!s->input_picture[j])
1125             break;
1126
1127         c->error[0] = c->error[1] = c->error[2] = 0;
1128
1129         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1130         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1131
1132         out_size = encode_frame(c, s->tmp_frames[0]);
1133
1134         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1135
1136         for (i = 0; i < s->max_b_frames + 1; i++) {
1137             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1138
1139             s->tmp_frames[i + 1]->pict_type = is_p ?
1140                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1141             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1142
1143             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1144
1145             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1146         }
1147
1148         /* get the delayed frames */
1149         while (out_size) {
1150             out_size = encode_frame(c, NULL);
1151             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1152         }
1153
1154         rd += c->error[0] + c->error[1] + c->error[2];
1155
1156         if (rd < best_rd) {
1157             best_rd = rd;
1158             best_b_count = j;
1159         }
1160     }
1161
1162     avcodec_close(c);
1163     av_freep(&c);
1164
1165     return best_b_count;
1166 }
1167
1168 static int select_input_picture(MpegEncContext *s)
1169 {
1170     int i, ret;
1171
1172     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1173         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1174     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1175
1176     /* set next picture type & ordering */
1177     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1178         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1179             s->next_picture_ptr == NULL || s->intra_only) {
1180             s->reordered_input_picture[0] = s->input_picture[0];
1181             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1182             s->reordered_input_picture[0]->f.coded_picture_number =
1183                 s->coded_picture_number++;
1184         } else {
1185             int b_frames;
1186
1187             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1188                 if (s->picture_in_gop_number < s->gop_size &&
1189                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1190                     // FIXME check that te gop check above is +-1 correct
1191                     av_frame_unref(&s->input_picture[0]->f);
1192
1193                     emms_c();
1194                     ff_vbv_update(s, 0);
1195
1196                     goto no_output_pic;
1197                 }
1198             }
1199
1200             if (s->flags & CODEC_FLAG_PASS2) {
1201                 for (i = 0; i < s->max_b_frames + 1; i++) {
1202                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1203
1204                     if (pict_num >= s->rc_context.num_entries)
1205                         break;
1206                     if (!s->input_picture[i]) {
1207                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1208                         break;
1209                     }
1210
1211                     s->input_picture[i]->f.pict_type =
1212                         s->rc_context.entry[pict_num].new_pict_type;
1213                 }
1214             }
1215
1216             if (s->avctx->b_frame_strategy == 0) {
1217                 b_frames = s->max_b_frames;
1218                 while (b_frames && !s->input_picture[b_frames])
1219                     b_frames--;
1220             } else if (s->avctx->b_frame_strategy == 1) {
1221                 for (i = 1; i < s->max_b_frames + 1; i++) {
1222                     if (s->input_picture[i] &&
1223                         s->input_picture[i]->b_frame_score == 0) {
1224                         s->input_picture[i]->b_frame_score =
1225                             get_intra_count(s,
1226                                             s->input_picture[i    ]->f.data[0],
1227                                             s->input_picture[i - 1]->f.data[0],
1228                                             s->linesize) + 1;
1229                     }
1230                 }
1231                 for (i = 0; i < s->max_b_frames + 1; i++) {
1232                     if (s->input_picture[i] == NULL ||
1233                         s->input_picture[i]->b_frame_score - 1 >
1234                             s->mb_num / s->avctx->b_sensitivity)
1235                         break;
1236                 }
1237
1238                 b_frames = FFMAX(0, i - 1);
1239
1240                 /* reset scores */
1241                 for (i = 0; i < b_frames + 1; i++) {
1242                     s->input_picture[i]->b_frame_score = 0;
1243                 }
1244             } else if (s->avctx->b_frame_strategy == 2) {
1245                 b_frames = estimate_best_b_count(s);
1246             } else {
1247                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1248                 b_frames = 0;
1249             }
1250
1251             emms_c();
1252
1253             for (i = b_frames - 1; i >= 0; i--) {
1254                 int type = s->input_picture[i]->f.pict_type;
1255                 if (type && type != AV_PICTURE_TYPE_B)
1256                     b_frames = i;
1257             }
1258             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1259                 b_frames == s->max_b_frames) {
1260                 av_log(s->avctx, AV_LOG_ERROR,
1261                        "warning, too many b frames in a row\n");
1262             }
1263
1264             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1265                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1266                     s->gop_size > s->picture_in_gop_number) {
1267                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1268                 } else {
1269                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1270                         b_frames = 0;
1271                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1272                 }
1273             }
1274
1275             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1276                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1277                 b_frames--;
1278
1279             s->reordered_input_picture[0] = s->input_picture[b_frames];
1280             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1281                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1282             s->reordered_input_picture[0]->f.coded_picture_number =
1283                 s->coded_picture_number++;
1284             for (i = 0; i < b_frames; i++) {
1285                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1286                 s->reordered_input_picture[i + 1]->f.pict_type =
1287                     AV_PICTURE_TYPE_B;
1288                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1289                     s->coded_picture_number++;
1290             }
1291         }
1292     }
1293 no_output_pic:
1294     if (s->reordered_input_picture[0]) {
1295         s->reordered_input_picture[0]->reference =
1296            s->reordered_input_picture[0]->f.pict_type !=
1297                AV_PICTURE_TYPE_B ? 3 : 0;
1298
1299         ff_mpeg_unref_picture(s, &s->new_picture);
1300         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1301             return ret;
1302
1303         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1304             // input is a shared pix, so we can't modifiy it -> alloc a new
1305             // one & ensure that the shared one is reuseable
1306
1307             Picture *pic;
1308             int i = ff_find_unused_picture(s, 0);
1309             if (i < 0)
1310                 return i;
1311             pic = &s->picture[i];
1312
1313             pic->reference = s->reordered_input_picture[0]->reference;
1314             if (ff_alloc_picture(s, pic, 0) < 0) {
1315                 return -1;
1316             }
1317
1318             ret = av_frame_copy_props(&pic->f, &s->reordered_input_picture[0]->f);
1319             if (ret < 0)
1320                 return ret;
1321
1322             /* mark us unused / free shared pic */
1323             av_frame_unref(&s->reordered_input_picture[0]->f);
1324             s->reordered_input_picture[0]->shared = 0;
1325
1326             s->current_picture_ptr = pic;
1327         } else {
1328             // input is not a shared pix -> reuse buffer for current_pix
1329             s->current_picture_ptr = s->reordered_input_picture[0];
1330             for (i = 0; i < 4; i++) {
1331                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1332             }
1333         }
1334         ff_mpeg_unref_picture(s, &s->current_picture);
1335         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1336                                        s->current_picture_ptr)) < 0)
1337             return ret;
1338
1339         s->picture_number = s->new_picture.f.display_picture_number;
1340     } else {
1341         ff_mpeg_unref_picture(s, &s->new_picture);
1342     }
1343     return 0;
1344 }
1345
1346 static void frame_end(MpegEncContext *s)
1347 {
1348     int i;
1349
1350     if (s->unrestricted_mv &&
1351         s->current_picture.reference &&
1352         !s->intra_only) {
1353         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1354         int hshift = desc->log2_chroma_w;
1355         int vshift = desc->log2_chroma_h;
1356         s->dsp.draw_edges(s->current_picture.f.data[0], s->linesize,
1357                           s->h_edge_pos, s->v_edge_pos,
1358                           EDGE_WIDTH, EDGE_WIDTH,
1359                           EDGE_TOP | EDGE_BOTTOM);
1360         s->dsp.draw_edges(s->current_picture.f.data[1], s->uvlinesize,
1361                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1362                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1363                           EDGE_TOP | EDGE_BOTTOM);
1364         s->dsp.draw_edges(s->current_picture.f.data[2], s->uvlinesize,
1365                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1366                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1367                           EDGE_TOP | EDGE_BOTTOM);
1368     }
1369
1370     emms_c();
1371
1372     s->last_pict_type                 = s->pict_type;
1373     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f.quality;
1374     if (s->pict_type!= AV_PICTURE_TYPE_B)
1375         s->last_non_b_pict_type = s->pict_type;
1376
1377     if (s->encoding) {
1378         /* release non-reference frames */
1379         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1380             if (!s->picture[i].reference)
1381                 ff_mpeg_unref_picture(s, &s->picture[i]);
1382         }
1383     }
1384
1385     s->avctx->coded_frame = &s->current_picture_ptr->f;
1386
1387 }
1388
1389 static void update_noise_reduction(MpegEncContext *s)
1390 {
1391     int intra, i;
1392
1393     for (intra = 0; intra < 2; intra++) {
1394         if (s->dct_count[intra] > (1 << 16)) {
1395             for (i = 0; i < 64; i++) {
1396                 s->dct_error_sum[intra][i] >>= 1;
1397             }
1398             s->dct_count[intra] >>= 1;
1399         }
1400
1401         for (i = 0; i < 64; i++) {
1402             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1403                                        s->dct_count[intra] +
1404                                        s->dct_error_sum[intra][i] / 2) /
1405                                       (s->dct_error_sum[intra][i] + 1);
1406         }
1407     }
1408 }
1409
1410 static int frame_start(MpegEncContext *s)
1411 {
1412     int ret;
1413
1414     /* mark & release old frames */
1415     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1416         s->last_picture_ptr != s->next_picture_ptr &&
1417         s->last_picture_ptr->f.buf[0]) {
1418         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1419     }
1420
1421     s->current_picture_ptr->f.pict_type = s->pict_type;
1422     s->current_picture_ptr->f.key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1423
1424     ff_mpeg_unref_picture(s, &s->current_picture);
1425     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1426                                    s->current_picture_ptr)) < 0)
1427         return ret;
1428
1429     if (s->pict_type != AV_PICTURE_TYPE_B) {
1430         s->last_picture_ptr = s->next_picture_ptr;
1431         if (!s->droppable)
1432             s->next_picture_ptr = s->current_picture_ptr;
1433     }
1434
1435     if (s->last_picture_ptr) {
1436         ff_mpeg_unref_picture(s, &s->last_picture);
1437         if (s->last_picture_ptr->f.buf[0] &&
1438             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1439                                        s->last_picture_ptr)) < 0)
1440             return ret;
1441     }
1442     if (s->next_picture_ptr) {
1443         ff_mpeg_unref_picture(s, &s->next_picture);
1444         if (s->next_picture_ptr->f.buf[0] &&
1445             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1446                                        s->next_picture_ptr)) < 0)
1447             return ret;
1448     }
1449
1450     if (s->picture_structure!= PICT_FRAME) {
1451         int i;
1452         for (i = 0; i < 4; i++) {
1453             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1454                 s->current_picture.f.data[i] +=
1455                     s->current_picture.f.linesize[i];
1456             }
1457             s->current_picture.f.linesize[i] *= 2;
1458             s->last_picture.f.linesize[i]    *= 2;
1459             s->next_picture.f.linesize[i]    *= 2;
1460         }
1461     }
1462
1463     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1464         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1465         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1466     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1467         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1468         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1469     } else {
1470         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1471         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1472     }
1473
1474     if (s->dct_error_sum) {
1475         assert(s->avctx->noise_reduction && s->encoding);
1476         update_noise_reduction(s);
1477     }
1478
1479     return 0;
1480 }
1481
1482 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1483                           const AVFrame *pic_arg, int *got_packet)
1484 {
1485     MpegEncContext *s = avctx->priv_data;
1486     int i, stuffing_count, ret;
1487     int context_count = s->slice_context_count;
1488
1489     s->picture_in_gop_number++;
1490
1491     if (load_input_picture(s, pic_arg) < 0)
1492         return -1;
1493
1494     if (select_input_picture(s) < 0) {
1495         return -1;
1496     }
1497
1498     /* output? */
1499     if (s->new_picture.f.data[0]) {
1500         if (!pkt->data &&
1501             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1502             return ret;
1503         if (s->mb_info) {
1504             s->mb_info_ptr = av_packet_new_side_data(pkt,
1505                                  AV_PKT_DATA_H263_MB_INFO,
1506                                  s->mb_width*s->mb_height*12);
1507             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1508         }
1509
1510         for (i = 0; i < context_count; i++) {
1511             int start_y = s->thread_context[i]->start_mb_y;
1512             int   end_y = s->thread_context[i]->  end_mb_y;
1513             int h       = s->mb_height;
1514             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1515             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1516
1517             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1518         }
1519
1520         s->pict_type = s->new_picture.f.pict_type;
1521         //emms_c();
1522         ret = frame_start(s);
1523         if (ret < 0)
1524             return ret;
1525 vbv_retry:
1526         if (encode_picture(s, s->picture_number) < 0)
1527             return -1;
1528
1529         avctx->header_bits = s->header_bits;
1530         avctx->mv_bits     = s->mv_bits;
1531         avctx->misc_bits   = s->misc_bits;
1532         avctx->i_tex_bits  = s->i_tex_bits;
1533         avctx->p_tex_bits  = s->p_tex_bits;
1534         avctx->i_count     = s->i_count;
1535         // FIXME f/b_count in avctx
1536         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1537         avctx->skip_count  = s->skip_count;
1538
1539         frame_end(s);
1540
1541         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1542             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1543
1544         if (avctx->rc_buffer_size) {
1545             RateControlContext *rcc = &s->rc_context;
1546             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1547
1548             if (put_bits_count(&s->pb) > max_size &&
1549                 s->lambda < s->avctx->lmax) {
1550                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1551                                        (s->qscale + 1) / s->qscale);
1552                 if (s->adaptive_quant) {
1553                     int i;
1554                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1555                         s->lambda_table[i] =
1556                             FFMAX(s->lambda_table[i] + 1,
1557                                   s->lambda_table[i] * (s->qscale + 1) /
1558                                   s->qscale);
1559                 }
1560                 s->mb_skipped = 0;        // done in frame_start()
1561                 // done in encode_picture() so we must undo it
1562                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1563                     if (s->flipflop_rounding          ||
1564                         s->codec_id == AV_CODEC_ID_H263P ||
1565                         s->codec_id == AV_CODEC_ID_MPEG4)
1566                         s->no_rounding ^= 1;
1567                 }
1568                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1569                     s->time_base       = s->last_time_base;
1570                     s->last_non_b_time = s->time - s->pp_time;
1571                 }
1572                 for (i = 0; i < context_count; i++) {
1573                     PutBitContext *pb = &s->thread_context[i]->pb;
1574                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1575                 }
1576                 goto vbv_retry;
1577             }
1578
1579             assert(s->avctx->rc_max_rate);
1580         }
1581
1582         if (s->flags & CODEC_FLAG_PASS1)
1583             ff_write_pass1_stats(s);
1584
1585         for (i = 0; i < 4; i++) {
1586             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1587             avctx->error[i] += s->current_picture_ptr->f.error[i];
1588         }
1589
1590         if (s->flags & CODEC_FLAG_PASS1)
1591             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1592                    avctx->i_tex_bits + avctx->p_tex_bits ==
1593                        put_bits_count(&s->pb));
1594         flush_put_bits(&s->pb);
1595         s->frame_bits  = put_bits_count(&s->pb);
1596
1597         stuffing_count = ff_vbv_update(s, s->frame_bits);
1598         if (stuffing_count) {
1599             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1600                     stuffing_count + 50) {
1601                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1602                 return -1;
1603             }
1604
1605             switch (s->codec_id) {
1606             case AV_CODEC_ID_MPEG1VIDEO:
1607             case AV_CODEC_ID_MPEG2VIDEO:
1608                 while (stuffing_count--) {
1609                     put_bits(&s->pb, 8, 0);
1610                 }
1611             break;
1612             case AV_CODEC_ID_MPEG4:
1613                 put_bits(&s->pb, 16, 0);
1614                 put_bits(&s->pb, 16, 0x1C3);
1615                 stuffing_count -= 4;
1616                 while (stuffing_count--) {
1617                     put_bits(&s->pb, 8, 0xFF);
1618                 }
1619             break;
1620             default:
1621                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1622             }
1623             flush_put_bits(&s->pb);
1624             s->frame_bits  = put_bits_count(&s->pb);
1625         }
1626
1627         /* update mpeg1/2 vbv_delay for CBR */
1628         if (s->avctx->rc_max_rate                          &&
1629             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1630             s->out_format == FMT_MPEG1                     &&
1631             90000LL * (avctx->rc_buffer_size - 1) <=
1632                 s->avctx->rc_max_rate * 0xFFFFLL) {
1633             int vbv_delay, min_delay;
1634             double inbits  = s->avctx->rc_max_rate *
1635                              av_q2d(s->avctx->time_base);
1636             int    minbits = s->frame_bits - 8 *
1637                              (s->vbv_delay_ptr - s->pb.buf - 1);
1638             double bits    = s->rc_context.buffer_index + minbits - inbits;
1639
1640             if (bits < 0)
1641                 av_log(s->avctx, AV_LOG_ERROR,
1642                        "Internal error, negative bits\n");
1643
1644             assert(s->repeat_first_field == 0);
1645
1646             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1647             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1648                         s->avctx->rc_max_rate;
1649
1650             vbv_delay = FFMAX(vbv_delay, min_delay);
1651
1652             assert(vbv_delay < 0xFFFF);
1653
1654             s->vbv_delay_ptr[0] &= 0xF8;
1655             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1656             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1657             s->vbv_delay_ptr[2] &= 0x07;
1658             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1659             avctx->vbv_delay     = vbv_delay * 300;
1660         }
1661         s->total_bits     += s->frame_bits;
1662         avctx->frame_bits  = s->frame_bits;
1663
1664         pkt->pts = s->current_picture.f.pts;
1665         if (!s->low_delay) {
1666             if (!s->current_picture.f.coded_picture_number)
1667                 pkt->dts = pkt->pts - s->dts_delta;
1668             else
1669                 pkt->dts = s->reordered_pts;
1670             s->reordered_pts = s->input_picture[0]->f.pts;
1671         } else
1672             pkt->dts = pkt->pts;
1673         if (s->current_picture.f.key_frame)
1674             pkt->flags |= AV_PKT_FLAG_KEY;
1675         if (s->mb_info)
1676             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1677     } else {
1678         s->frame_bits = 0;
1679     }
1680     assert((s->frame_bits & 7) == 0);
1681
1682     pkt->size = s->frame_bits / 8;
1683     *got_packet = !!pkt->size;
1684     return 0;
1685 }
1686
1687 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1688                                                 int n, int threshold)
1689 {
1690     static const char tab[64] = {
1691         3, 2, 2, 1, 1, 1, 1, 1,
1692         1, 1, 1, 1, 1, 1, 1, 1,
1693         1, 1, 1, 1, 1, 1, 1, 1,
1694         0, 0, 0, 0, 0, 0, 0, 0,
1695         0, 0, 0, 0, 0, 0, 0, 0,
1696         0, 0, 0, 0, 0, 0, 0, 0,
1697         0, 0, 0, 0, 0, 0, 0, 0,
1698         0, 0, 0, 0, 0, 0, 0, 0
1699     };
1700     int score = 0;
1701     int run = 0;
1702     int i;
1703     int16_t *block = s->block[n];
1704     const int last_index = s->block_last_index[n];
1705     int skip_dc;
1706
1707     if (threshold < 0) {
1708         skip_dc = 0;
1709         threshold = -threshold;
1710     } else
1711         skip_dc = 1;
1712
1713     /* Are all we could set to zero already zero? */
1714     if (last_index <= skip_dc - 1)
1715         return;
1716
1717     for (i = 0; i <= last_index; i++) {
1718         const int j = s->intra_scantable.permutated[i];
1719         const int level = FFABS(block[j]);
1720         if (level == 1) {
1721             if (skip_dc && i == 0)
1722                 continue;
1723             score += tab[run];
1724             run = 0;
1725         } else if (level > 1) {
1726             return;
1727         } else {
1728             run++;
1729         }
1730     }
1731     if (score >= threshold)
1732         return;
1733     for (i = skip_dc; i <= last_index; i++) {
1734         const int j = s->intra_scantable.permutated[i];
1735         block[j] = 0;
1736     }
1737     if (block[0])
1738         s->block_last_index[n] = 0;
1739     else
1740         s->block_last_index[n] = -1;
1741 }
1742
1743 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1744                                int last_index)
1745 {
1746     int i;
1747     const int maxlevel = s->max_qcoeff;
1748     const int minlevel = s->min_qcoeff;
1749     int overflow = 0;
1750
1751     if (s->mb_intra) {
1752         i = 1; // skip clipping of intra dc
1753     } else
1754         i = 0;
1755
1756     for (; i <= last_index; i++) {
1757         const int j = s->intra_scantable.permutated[i];
1758         int level = block[j];
1759
1760         if (level > maxlevel) {
1761             level = maxlevel;
1762             overflow++;
1763         } else if (level < minlevel) {
1764             level = minlevel;
1765             overflow++;
1766         }
1767
1768         block[j] = level;
1769     }
1770
1771     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1772         av_log(s->avctx, AV_LOG_INFO,
1773                "warning, clipping %d dct coefficients to %d..%d\n",
1774                overflow, minlevel, maxlevel);
1775 }
1776
1777 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1778 {
1779     int x, y;
1780     // FIXME optimize
1781     for (y = 0; y < 8; y++) {
1782         for (x = 0; x < 8; x++) {
1783             int x2, y2;
1784             int sum = 0;
1785             int sqr = 0;
1786             int count = 0;
1787
1788             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1789                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1790                     int v = ptr[x2 + y2 * stride];
1791                     sum += v;
1792                     sqr += v * v;
1793                     count++;
1794                 }
1795             }
1796             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1797         }
1798     }
1799 }
1800
1801 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1802                                                 int motion_x, int motion_y,
1803                                                 int mb_block_height,
1804                                                 int mb_block_count)
1805 {
1806     int16_t weight[8][64];
1807     int16_t orig[8][64];
1808     const int mb_x = s->mb_x;
1809     const int mb_y = s->mb_y;
1810     int i;
1811     int skip_dct[8];
1812     int dct_offset = s->linesize * 8; // default for progressive frames
1813     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1814     ptrdiff_t wrap_y, wrap_c;
1815
1816     for (i = 0; i < mb_block_count; i++)
1817         skip_dct[i] = s->skipdct;
1818
1819     if (s->adaptive_quant) {
1820         const int last_qp = s->qscale;
1821         const int mb_xy = mb_x + mb_y * s->mb_stride;
1822
1823         s->lambda = s->lambda_table[mb_xy];
1824         update_qscale(s);
1825
1826         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1827             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1828             s->dquant = s->qscale - last_qp;
1829
1830             if (s->out_format == FMT_H263) {
1831                 s->dquant = av_clip(s->dquant, -2, 2);
1832
1833                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1834                     if (!s->mb_intra) {
1835                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1836                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1837                                 s->dquant = 0;
1838                         }
1839                         if (s->mv_type == MV_TYPE_8X8)
1840                             s->dquant = 0;
1841                     }
1842                 }
1843             }
1844         }
1845         ff_set_qscale(s, last_qp + s->dquant);
1846     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1847         ff_set_qscale(s, s->qscale + s->dquant);
1848
1849     wrap_y = s->linesize;
1850     wrap_c = s->uvlinesize;
1851     ptr_y  = s->new_picture.f.data[0] +
1852              (mb_y * 16 * wrap_y)              + mb_x * 16;
1853     ptr_cb = s->new_picture.f.data[1] +
1854              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1855     ptr_cr = s->new_picture.f.data[2] +
1856              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1857
1858     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1859         uint8_t *ebuf = s->edge_emu_buffer + 32;
1860         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1861                                  wrap_y, wrap_y,
1862                                  16, 16, mb_x * 16, mb_y * 16,
1863                                  s->width, s->height);
1864         ptr_y = ebuf;
1865         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1866                                  wrap_c, wrap_c,
1867                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1868                                  s->width >> 1, s->height >> 1);
1869         ptr_cb = ebuf + 18 * wrap_y;
1870         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1871                                  wrap_c, wrap_c,
1872                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1873                                  s->width >> 1, s->height >> 1);
1874         ptr_cr = ebuf + 18 * wrap_y + 8;
1875     }
1876
1877     if (s->mb_intra) {
1878         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1879             int progressive_score, interlaced_score;
1880
1881             s->interlaced_dct = 0;
1882             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1883                                                     NULL, wrap_y, 8) +
1884                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1885                                                     NULL, wrap_y, 8) - 400;
1886
1887             if (progressive_score > 0) {
1888                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1889                                                        NULL, wrap_y * 2, 8) +
1890                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1891                                                        NULL, wrap_y * 2, 8);
1892                 if (progressive_score > interlaced_score) {
1893                     s->interlaced_dct = 1;
1894
1895                     dct_offset = wrap_y;
1896                     wrap_y <<= 1;
1897                     if (s->chroma_format == CHROMA_422)
1898                         wrap_c <<= 1;
1899                 }
1900             }
1901         }
1902
1903         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1904         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1905         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1906         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1907
1908         if (s->flags & CODEC_FLAG_GRAY) {
1909             skip_dct[4] = 1;
1910             skip_dct[5] = 1;
1911         } else {
1912             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1913             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1914             if (!s->chroma_y_shift) { /* 422 */
1915                 s->dsp.get_pixels(s->block[6],
1916                                   ptr_cb + (dct_offset >> 1), wrap_c);
1917                 s->dsp.get_pixels(s->block[7],
1918                                   ptr_cr + (dct_offset >> 1), wrap_c);
1919             }
1920         }
1921     } else {
1922         op_pixels_func (*op_pix)[4];
1923         qpel_mc_func (*op_qpix)[16];
1924         uint8_t *dest_y, *dest_cb, *dest_cr;
1925
1926         dest_y  = s->dest[0];
1927         dest_cb = s->dest[1];
1928         dest_cr = s->dest[2];
1929
1930         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1931             op_pix  = s->hdsp.put_pixels_tab;
1932             op_qpix = s->dsp.put_qpel_pixels_tab;
1933         } else {
1934             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1935             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1936         }
1937
1938         if (s->mv_dir & MV_DIR_FORWARD) {
1939             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1940                           s->last_picture.f.data,
1941                           op_pix, op_qpix);
1942             op_pix  = s->hdsp.avg_pixels_tab;
1943             op_qpix = s->dsp.avg_qpel_pixels_tab;
1944         }
1945         if (s->mv_dir & MV_DIR_BACKWARD) {
1946             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1947                           s->next_picture.f.data,
1948                           op_pix, op_qpix);
1949         }
1950
1951         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1952             int progressive_score, interlaced_score;
1953
1954             s->interlaced_dct = 0;
1955             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1956                                                     ptr_y,              wrap_y,
1957                                                     8) +
1958                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1959                                                     ptr_y + wrap_y * 8, wrap_y,
1960                                                     8) - 400;
1961
1962             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1963                 progressive_score -= 400;
1964
1965             if (progressive_score > 0) {
1966                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1967                                                        ptr_y,
1968                                                        wrap_y * 2, 8) +
1969                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1970                                                        ptr_y + wrap_y,
1971                                                        wrap_y * 2, 8);
1972
1973                 if (progressive_score > interlaced_score) {
1974                     s->interlaced_dct = 1;
1975
1976                     dct_offset = wrap_y;
1977                     wrap_y <<= 1;
1978                     if (s->chroma_format == CHROMA_422)
1979                         wrap_c <<= 1;
1980                 }
1981             }
1982         }
1983
1984         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1985         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1986         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1987                            dest_y + dct_offset, wrap_y);
1988         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1989                            dest_y + dct_offset + 8, wrap_y);
1990
1991         if (s->flags & CODEC_FLAG_GRAY) {
1992             skip_dct[4] = 1;
1993             skip_dct[5] = 1;
1994         } else {
1995             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1996             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1997             if (!s->chroma_y_shift) { /* 422 */
1998                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1999                                    dest_cb + (dct_offset >> 1), wrap_c);
2000                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2001                                    dest_cr + (dct_offset >> 1), wrap_c);
2002             }
2003         }
2004         /* pre quantization */
2005         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2006                 2 * s->qscale * s->qscale) {
2007             // FIXME optimize
2008             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2009                               wrap_y, 8) < 20 * s->qscale)
2010                 skip_dct[0] = 1;
2011             if (s->dsp.sad[1](NULL, ptr_y + 8,
2012                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2013                 skip_dct[1] = 1;
2014             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2015                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2016                 skip_dct[2] = 1;
2017             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2018                               dest_y + dct_offset + 8,
2019                               wrap_y, 8) < 20 * s->qscale)
2020                 skip_dct[3] = 1;
2021             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2022                               wrap_c, 8) < 20 * s->qscale)
2023                 skip_dct[4] = 1;
2024             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2025                               wrap_c, 8) < 20 * s->qscale)
2026                 skip_dct[5] = 1;
2027             if (!s->chroma_y_shift) { /* 422 */
2028                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2029                                   dest_cb + (dct_offset >> 1),
2030                                   wrap_c, 8) < 20 * s->qscale)
2031                     skip_dct[6] = 1;
2032                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2033                                   dest_cr + (dct_offset >> 1),
2034                                   wrap_c, 8) < 20 * s->qscale)
2035                     skip_dct[7] = 1;
2036             }
2037         }
2038     }
2039
2040     if (s->quantizer_noise_shaping) {
2041         if (!skip_dct[0])
2042             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2043         if (!skip_dct[1])
2044             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2045         if (!skip_dct[2])
2046             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2047         if (!skip_dct[3])
2048             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2049         if (!skip_dct[4])
2050             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2051         if (!skip_dct[5])
2052             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2053         if (!s->chroma_y_shift) { /* 422 */
2054             if (!skip_dct[6])
2055                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2056                                   wrap_c);
2057             if (!skip_dct[7])
2058                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2059                                   wrap_c);
2060         }
2061         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2062     }
2063
2064     /* DCT & quantize */
2065     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2066     {
2067         for (i = 0; i < mb_block_count; i++) {
2068             if (!skip_dct[i]) {
2069                 int overflow;
2070                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2071                 // FIXME we could decide to change to quantizer instead of
2072                 // clipping
2073                 // JS: I don't think that would be a good idea it could lower
2074                 //     quality instead of improve it. Just INTRADC clipping
2075                 //     deserves changes in quantizer
2076                 if (overflow)
2077                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2078             } else
2079                 s->block_last_index[i] = -1;
2080         }
2081         if (s->quantizer_noise_shaping) {
2082             for (i = 0; i < mb_block_count; i++) {
2083                 if (!skip_dct[i]) {
2084                     s->block_last_index[i] =
2085                         dct_quantize_refine(s, s->block[i], weight[i],
2086                                             orig[i], i, s->qscale);
2087                 }
2088             }
2089         }
2090
2091         if (s->luma_elim_threshold && !s->mb_intra)
2092             for (i = 0; i < 4; i++)
2093                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2094         if (s->chroma_elim_threshold && !s->mb_intra)
2095             for (i = 4; i < mb_block_count; i++)
2096                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2097
2098         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2099             for (i = 0; i < mb_block_count; i++) {
2100                 if (s->block_last_index[i] == -1)
2101                     s->coded_score[i] = INT_MAX / 256;
2102             }
2103         }
2104     }
2105
2106     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2107         s->block_last_index[4] =
2108         s->block_last_index[5] = 0;
2109         s->block[4][0] =
2110         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2111     }
2112
2113     // non c quantize code returns incorrect block_last_index FIXME
2114     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2115         for (i = 0; i < mb_block_count; i++) {
2116             int j;
2117             if (s->block_last_index[i] > 0) {
2118                 for (j = 63; j > 0; j--) {
2119                     if (s->block[i][s->intra_scantable.permutated[j]])
2120                         break;
2121                 }
2122                 s->block_last_index[i] = j;
2123             }
2124         }
2125     }
2126
2127     /* huffman encode */
2128     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2129     case AV_CODEC_ID_MPEG1VIDEO:
2130     case AV_CODEC_ID_MPEG2VIDEO:
2131         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2132             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2133         break;
2134     case AV_CODEC_ID_MPEG4:
2135         if (CONFIG_MPEG4_ENCODER)
2136             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2137         break;
2138     case AV_CODEC_ID_MSMPEG4V2:
2139     case AV_CODEC_ID_MSMPEG4V3:
2140     case AV_CODEC_ID_WMV1:
2141         if (CONFIG_MSMPEG4_ENCODER)
2142             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2143         break;
2144     case AV_CODEC_ID_WMV2:
2145         if (CONFIG_WMV2_ENCODER)
2146             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2147         break;
2148     case AV_CODEC_ID_H261:
2149         if (CONFIG_H261_ENCODER)
2150             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2151         break;
2152     case AV_CODEC_ID_H263:
2153     case AV_CODEC_ID_H263P:
2154     case AV_CODEC_ID_FLV1:
2155     case AV_CODEC_ID_RV10:
2156     case AV_CODEC_ID_RV20:
2157         if (CONFIG_H263_ENCODER)
2158             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2159         break;
2160     case AV_CODEC_ID_MJPEG:
2161         if (CONFIG_MJPEG_ENCODER)
2162             ff_mjpeg_encode_mb(s, s->block);
2163         break;
2164     default:
2165         assert(0);
2166     }
2167 }
2168
2169 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2170 {
2171     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2172     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2173 }
2174
2175 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2176     int i;
2177
2178     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2179
2180     /* mpeg1 */
2181     d->mb_skip_run= s->mb_skip_run;
2182     for(i=0; i<3; i++)
2183         d->last_dc[i] = s->last_dc[i];
2184
2185     /* statistics */
2186     d->mv_bits= s->mv_bits;
2187     d->i_tex_bits= s->i_tex_bits;
2188     d->p_tex_bits= s->p_tex_bits;
2189     d->i_count= s->i_count;
2190     d->f_count= s->f_count;
2191     d->b_count= s->b_count;
2192     d->skip_count= s->skip_count;
2193     d->misc_bits= s->misc_bits;
2194     d->last_bits= 0;
2195
2196     d->mb_skipped= 0;
2197     d->qscale= s->qscale;
2198     d->dquant= s->dquant;
2199
2200     d->esc3_level_length= s->esc3_level_length;
2201 }
2202
2203 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2204     int i;
2205
2206     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2207     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2208
2209     /* mpeg1 */
2210     d->mb_skip_run= s->mb_skip_run;
2211     for(i=0; i<3; i++)
2212         d->last_dc[i] = s->last_dc[i];
2213
2214     /* statistics */
2215     d->mv_bits= s->mv_bits;
2216     d->i_tex_bits= s->i_tex_bits;
2217     d->p_tex_bits= s->p_tex_bits;
2218     d->i_count= s->i_count;
2219     d->f_count= s->f_count;
2220     d->b_count= s->b_count;
2221     d->skip_count= s->skip_count;
2222     d->misc_bits= s->misc_bits;
2223
2224     d->mb_intra= s->mb_intra;
2225     d->mb_skipped= s->mb_skipped;
2226     d->mv_type= s->mv_type;
2227     d->mv_dir= s->mv_dir;
2228     d->pb= s->pb;
2229     if(s->data_partitioning){
2230         d->pb2= s->pb2;
2231         d->tex_pb= s->tex_pb;
2232     }
2233     d->block= s->block;
2234     for(i=0; i<8; i++)
2235         d->block_last_index[i]= s->block_last_index[i];
2236     d->interlaced_dct= s->interlaced_dct;
2237     d->qscale= s->qscale;
2238
2239     d->esc3_level_length= s->esc3_level_length;
2240 }
2241
2242 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2243                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2244                            int *dmin, int *next_block, int motion_x, int motion_y)
2245 {
2246     int score;
2247     uint8_t *dest_backup[3];
2248
2249     copy_context_before_encode(s, backup, type);
2250
2251     s->block= s->blocks[*next_block];
2252     s->pb= pb[*next_block];
2253     if(s->data_partitioning){
2254         s->pb2   = pb2   [*next_block];
2255         s->tex_pb= tex_pb[*next_block];
2256     }
2257
2258     if(*next_block){
2259         memcpy(dest_backup, s->dest, sizeof(s->dest));
2260         s->dest[0] = s->rd_scratchpad;
2261         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2262         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2263         assert(s->linesize >= 32); //FIXME
2264     }
2265
2266     encode_mb(s, motion_x, motion_y);
2267
2268     score= put_bits_count(&s->pb);
2269     if(s->data_partitioning){
2270         score+= put_bits_count(&s->pb2);
2271         score+= put_bits_count(&s->tex_pb);
2272     }
2273
2274     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2275         ff_MPV_decode_mb(s, s->block);
2276
2277         score *= s->lambda2;
2278         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2279     }
2280
2281     if(*next_block){
2282         memcpy(s->dest, dest_backup, sizeof(s->dest));
2283     }
2284
2285     if(score<*dmin){
2286         *dmin= score;
2287         *next_block^=1;
2288
2289         copy_context_after_encode(best, s, type);
2290     }
2291 }
2292
2293 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2294     uint32_t *sq = ff_squareTbl + 256;
2295     int acc=0;
2296     int x,y;
2297
2298     if(w==16 && h==16)
2299         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2300     else if(w==8 && h==8)
2301         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2302
2303     for(y=0; y<h; y++){
2304         for(x=0; x<w; x++){
2305             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2306         }
2307     }
2308
2309     assert(acc>=0);
2310
2311     return acc;
2312 }
2313
2314 static int sse_mb(MpegEncContext *s){
2315     int w= 16;
2316     int h= 16;
2317
2318     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2319     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2320
2321     if(w==16 && h==16)
2322       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2323         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2324                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2325                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2326       }else{
2327         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2328                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2329                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2330       }
2331     else
2332         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2333                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2334                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2335 }
2336
2337 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2338     MpegEncContext *s= *(void**)arg;
2339
2340
2341     s->me.pre_pass=1;
2342     s->me.dia_size= s->avctx->pre_dia_size;
2343     s->first_slice_line=1;
2344     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2345         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2346             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2347         }
2348         s->first_slice_line=0;
2349     }
2350
2351     s->me.pre_pass=0;
2352
2353     return 0;
2354 }
2355
2356 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2357     MpegEncContext *s= *(void**)arg;
2358
2359     ff_check_alignment();
2360
2361     s->me.dia_size= s->avctx->dia_size;
2362     s->first_slice_line=1;
2363     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2364         s->mb_x=0; //for block init below
2365         ff_init_block_index(s);
2366         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2367             s->block_index[0]+=2;
2368             s->block_index[1]+=2;
2369             s->block_index[2]+=2;
2370             s->block_index[3]+=2;
2371
2372             /* compute motion vector & mb_type and store in context */
2373             if(s->pict_type==AV_PICTURE_TYPE_B)
2374                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2375             else
2376                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2377         }
2378         s->first_slice_line=0;
2379     }
2380     return 0;
2381 }
2382
2383 static int mb_var_thread(AVCodecContext *c, void *arg){
2384     MpegEncContext *s= *(void**)arg;
2385     int mb_x, mb_y;
2386
2387     ff_check_alignment();
2388
2389     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2390         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2391             int xx = mb_x * 16;
2392             int yy = mb_y * 16;
2393             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2394             int varc;
2395             int sum = s->dsp.pix_sum(pix, s->linesize);
2396
2397             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2398
2399             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2400             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2401             s->me.mb_var_sum_temp    += varc;
2402         }
2403     }
2404     return 0;
2405 }
2406
2407 static void write_slice_end(MpegEncContext *s){
2408     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2409         if(s->partitioned_frame){
2410             ff_mpeg4_merge_partitions(s);
2411         }
2412
2413         ff_mpeg4_stuffing(&s->pb);
2414     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2415         ff_mjpeg_encode_stuffing(&s->pb);
2416     }
2417
2418     avpriv_align_put_bits(&s->pb);
2419     flush_put_bits(&s->pb);
2420
2421     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2422         s->misc_bits+= get_bits_diff(s);
2423 }
2424
2425 static void write_mb_info(MpegEncContext *s)
2426 {
2427     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2428     int offset = put_bits_count(&s->pb);
2429     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2430     int gobn = s->mb_y / s->gob_index;
2431     int pred_x, pred_y;
2432     if (CONFIG_H263_ENCODER)
2433         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2434     bytestream_put_le32(&ptr, offset);
2435     bytestream_put_byte(&ptr, s->qscale);
2436     bytestream_put_byte(&ptr, gobn);
2437     bytestream_put_le16(&ptr, mba);
2438     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2439     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2440     /* 4MV not implemented */
2441     bytestream_put_byte(&ptr, 0); /* hmv2 */
2442     bytestream_put_byte(&ptr, 0); /* vmv2 */
2443 }
2444
2445 static void update_mb_info(MpegEncContext *s, int startcode)
2446 {
2447     if (!s->mb_info)
2448         return;
2449     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2450         s->mb_info_size += 12;
2451         s->prev_mb_info = s->last_mb_info;
2452     }
2453     if (startcode) {
2454         s->prev_mb_info = put_bits_count(&s->pb)/8;
2455         /* This might have incremented mb_info_size above, and we return without
2456          * actually writing any info into that slot yet. But in that case,
2457          * this will be called again at the start of the after writing the
2458          * start code, actually writing the mb info. */
2459         return;
2460     }
2461
2462     s->last_mb_info = put_bits_count(&s->pb)/8;
2463     if (!s->mb_info_size)
2464         s->mb_info_size += 12;
2465     write_mb_info(s);
2466 }
2467
2468 static int encode_thread(AVCodecContext *c, void *arg){
2469     MpegEncContext *s= *(void**)arg;
2470     int mb_x, mb_y, pdif = 0;
2471     int chr_h= 16>>s->chroma_y_shift;
2472     int i, j;
2473     MpegEncContext best_s, backup_s;
2474     uint8_t bit_buf[2][MAX_MB_BYTES];
2475     uint8_t bit_buf2[2][MAX_MB_BYTES];
2476     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2477     PutBitContext pb[2], pb2[2], tex_pb[2];
2478
2479     ff_check_alignment();
2480
2481     for(i=0; i<2; i++){
2482         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2483         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2484         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2485     }
2486
2487     s->last_bits= put_bits_count(&s->pb);
2488     s->mv_bits=0;
2489     s->misc_bits=0;
2490     s->i_tex_bits=0;
2491     s->p_tex_bits=0;
2492     s->i_count=0;
2493     s->f_count=0;
2494     s->b_count=0;
2495     s->skip_count=0;
2496
2497     for(i=0; i<3; i++){
2498         /* init last dc values */
2499         /* note: quant matrix value (8) is implied here */
2500         s->last_dc[i] = 128 << s->intra_dc_precision;
2501
2502         s->current_picture.f.error[i] = 0;
2503     }
2504     s->mb_skip_run = 0;
2505     memset(s->last_mv, 0, sizeof(s->last_mv));
2506
2507     s->last_mv_dir = 0;
2508
2509     switch(s->codec_id){
2510     case AV_CODEC_ID_H263:
2511     case AV_CODEC_ID_H263P:
2512     case AV_CODEC_ID_FLV1:
2513         if (CONFIG_H263_ENCODER)
2514             s->gob_index = ff_h263_get_gob_height(s);
2515         break;
2516     case AV_CODEC_ID_MPEG4:
2517         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2518             ff_mpeg4_init_partitions(s);
2519         break;
2520     }
2521
2522     s->resync_mb_x=0;
2523     s->resync_mb_y=0;
2524     s->first_slice_line = 1;
2525     s->ptr_lastgob = s->pb.buf;
2526     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2527         s->mb_x=0;
2528         s->mb_y= mb_y;
2529
2530         ff_set_qscale(s, s->qscale);
2531         ff_init_block_index(s);
2532
2533         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2534             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2535             int mb_type= s->mb_type[xy];
2536 //            int d;
2537             int dmin= INT_MAX;
2538             int dir;
2539
2540             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2541                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2542                 return -1;
2543             }
2544             if(s->data_partitioning){
2545                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2546                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2547                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2548                     return -1;
2549                 }
2550             }
2551
2552             s->mb_x = mb_x;
2553             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2554             ff_update_block_index(s);
2555
2556             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2557                 ff_h261_reorder_mb_index(s);
2558                 xy= s->mb_y*s->mb_stride + s->mb_x;
2559                 mb_type= s->mb_type[xy];
2560             }
2561
2562             /* write gob / video packet header  */
2563             if(s->rtp_mode){
2564                 int current_packet_size, is_gob_start;
2565
2566                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2567
2568                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2569
2570                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2571
2572                 switch(s->codec_id){
2573                 case AV_CODEC_ID_H263:
2574                 case AV_CODEC_ID_H263P:
2575                     if(!s->h263_slice_structured)
2576                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2577                     break;
2578                 case AV_CODEC_ID_MPEG2VIDEO:
2579                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2580                 case AV_CODEC_ID_MPEG1VIDEO:
2581                     if(s->mb_skip_run) is_gob_start=0;
2582                     break;
2583                 }
2584
2585                 if(is_gob_start){
2586                     if(s->start_mb_y != mb_y || mb_x!=0){
2587                         write_slice_end(s);
2588
2589                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2590                             ff_mpeg4_init_partitions(s);
2591                         }
2592                     }
2593
2594                     assert((put_bits_count(&s->pb)&7) == 0);
2595                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2596
2597                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2598                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2599                         int d = 100 / s->error_rate;
2600                         if(r % d == 0){
2601                             current_packet_size=0;
2602                             s->pb.buf_ptr= s->ptr_lastgob;
2603                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2604                         }
2605                     }
2606
2607                     if (s->avctx->rtp_callback){
2608                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2609                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2610                     }
2611                     update_mb_info(s, 1);
2612
2613                     switch(s->codec_id){
2614                     case AV_CODEC_ID_MPEG4:
2615                         if (CONFIG_MPEG4_ENCODER) {
2616                             ff_mpeg4_encode_video_packet_header(s);
2617                             ff_mpeg4_clean_buffers(s);
2618                         }
2619                     break;
2620                     case AV_CODEC_ID_MPEG1VIDEO:
2621                     case AV_CODEC_ID_MPEG2VIDEO:
2622                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2623                             ff_mpeg1_encode_slice_header(s);
2624                             ff_mpeg1_clean_buffers(s);
2625                         }
2626                     break;
2627                     case AV_CODEC_ID_H263:
2628                     case AV_CODEC_ID_H263P:
2629                         if (CONFIG_H263_ENCODER)
2630                             ff_h263_encode_gob_header(s, mb_y);
2631                     break;
2632                     }
2633
2634                     if(s->flags&CODEC_FLAG_PASS1){
2635                         int bits= put_bits_count(&s->pb);
2636                         s->misc_bits+= bits - s->last_bits;
2637                         s->last_bits= bits;
2638                     }
2639
2640                     s->ptr_lastgob += current_packet_size;
2641                     s->first_slice_line=1;
2642                     s->resync_mb_x=mb_x;
2643                     s->resync_mb_y=mb_y;
2644                 }
2645             }
2646
2647             if(  (s->resync_mb_x   == s->mb_x)
2648                && s->resync_mb_y+1 == s->mb_y){
2649                 s->first_slice_line=0;
2650             }
2651
2652             s->mb_skipped=0;
2653             s->dquant=0; //only for QP_RD
2654
2655             update_mb_info(s, 0);
2656
2657             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2658                 int next_block=0;
2659                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2660
2661                 copy_context_before_encode(&backup_s, s, -1);
2662                 backup_s.pb= s->pb;
2663                 best_s.data_partitioning= s->data_partitioning;
2664                 best_s.partitioned_frame= s->partitioned_frame;
2665                 if(s->data_partitioning){
2666                     backup_s.pb2= s->pb2;
2667                     backup_s.tex_pb= s->tex_pb;
2668                 }
2669
2670                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2671                     s->mv_dir = MV_DIR_FORWARD;
2672                     s->mv_type = MV_TYPE_16X16;
2673                     s->mb_intra= 0;
2674                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2675                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2676                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2677                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2678                 }
2679                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2680                     s->mv_dir = MV_DIR_FORWARD;
2681                     s->mv_type = MV_TYPE_FIELD;
2682                     s->mb_intra= 0;
2683                     for(i=0; i<2; i++){
2684                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2685                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2686                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2687                     }
2688                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2689                                  &dmin, &next_block, 0, 0);
2690                 }
2691                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2692                     s->mv_dir = MV_DIR_FORWARD;
2693                     s->mv_type = MV_TYPE_16X16;
2694                     s->mb_intra= 0;
2695                     s->mv[0][0][0] = 0;
2696                     s->mv[0][0][1] = 0;
2697                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2698                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2699                 }
2700                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2701                     s->mv_dir = MV_DIR_FORWARD;
2702                     s->mv_type = MV_TYPE_8X8;
2703                     s->mb_intra= 0;
2704                     for(i=0; i<4; i++){
2705                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2706                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2707                     }
2708                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2709                                  &dmin, &next_block, 0, 0);
2710                 }
2711                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2712                     s->mv_dir = MV_DIR_FORWARD;
2713                     s->mv_type = MV_TYPE_16X16;
2714                     s->mb_intra= 0;
2715                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2716                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2717                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2718                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2719                 }
2720                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2721                     s->mv_dir = MV_DIR_BACKWARD;
2722                     s->mv_type = MV_TYPE_16X16;
2723                     s->mb_intra= 0;
2724                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2725                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2726                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2727                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2728                 }
2729                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2730                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2731                     s->mv_type = MV_TYPE_16X16;
2732                     s->mb_intra= 0;
2733                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2734                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2735                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2736                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2737                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2738                                  &dmin, &next_block, 0, 0);
2739                 }
2740                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2741                     s->mv_dir = MV_DIR_FORWARD;
2742                     s->mv_type = MV_TYPE_FIELD;
2743                     s->mb_intra= 0;
2744                     for(i=0; i<2; i++){
2745                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2746                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2747                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2748                     }
2749                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2750                                  &dmin, &next_block, 0, 0);
2751                 }
2752                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2753                     s->mv_dir = MV_DIR_BACKWARD;
2754                     s->mv_type = MV_TYPE_FIELD;
2755                     s->mb_intra= 0;
2756                     for(i=0; i<2; i++){
2757                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2758                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2759                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2760                     }
2761                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2762                                  &dmin, &next_block, 0, 0);
2763                 }
2764                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2765                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2766                     s->mv_type = MV_TYPE_FIELD;
2767                     s->mb_intra= 0;
2768                     for(dir=0; dir<2; dir++){
2769                         for(i=0; i<2; i++){
2770                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2771                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2772                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2773                         }
2774                     }
2775                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2776                                  &dmin, &next_block, 0, 0);
2777                 }
2778                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2779                     s->mv_dir = 0;
2780                     s->mv_type = MV_TYPE_16X16;
2781                     s->mb_intra= 1;
2782                     s->mv[0][0][0] = 0;
2783                     s->mv[0][0][1] = 0;
2784                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2785                                  &dmin, &next_block, 0, 0);
2786                     if(s->h263_pred || s->h263_aic){
2787                         if(best_s.mb_intra)
2788                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2789                         else
2790                             ff_clean_intra_table_entries(s); //old mode?
2791                     }
2792                 }
2793
2794                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2795                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2796                         const int last_qp= backup_s.qscale;
2797                         int qpi, qp, dc[6];
2798                         int16_t ac[6][16];
2799                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2800                         static const int dquant_tab[4]={-1,1,-2,2};
2801
2802                         assert(backup_s.dquant == 0);
2803
2804                         //FIXME intra
2805                         s->mv_dir= best_s.mv_dir;
2806                         s->mv_type = MV_TYPE_16X16;
2807                         s->mb_intra= best_s.mb_intra;
2808                         s->mv[0][0][0] = best_s.mv[0][0][0];
2809                         s->mv[0][0][1] = best_s.mv[0][0][1];
2810                         s->mv[1][0][0] = best_s.mv[1][0][0];
2811                         s->mv[1][0][1] = best_s.mv[1][0][1];
2812
2813                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2814                         for(; qpi<4; qpi++){
2815                             int dquant= dquant_tab[qpi];
2816                             qp= last_qp + dquant;
2817                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2818                                 continue;
2819                             backup_s.dquant= dquant;
2820                             if(s->mb_intra && s->dc_val[0]){
2821                                 for(i=0; i<6; i++){
2822                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2823                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2824                                 }
2825                             }
2826
2827                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2828                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2829                             if(best_s.qscale != qp){
2830                                 if(s->mb_intra && s->dc_val[0]){
2831                                     for(i=0; i<6; i++){
2832                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2833                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2834                                     }
2835                                 }
2836                             }
2837                         }
2838                     }
2839                 }
2840                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2841                     int mx= s->b_direct_mv_table[xy][0];
2842                     int my= s->b_direct_mv_table[xy][1];
2843
2844                     backup_s.dquant = 0;
2845                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2846                     s->mb_intra= 0;
2847                     ff_mpeg4_set_direct_mv(s, mx, my);
2848                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2849                                  &dmin, &next_block, mx, my);
2850                 }
2851                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2852                     backup_s.dquant = 0;
2853                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2854                     s->mb_intra= 0;
2855                     ff_mpeg4_set_direct_mv(s, 0, 0);
2856                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2857                                  &dmin, &next_block, 0, 0);
2858                 }
2859                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2860                     int coded=0;
2861                     for(i=0; i<6; i++)
2862                         coded |= s->block_last_index[i];
2863                     if(coded){
2864                         int mx,my;
2865                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2866                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2867                             mx=my=0; //FIXME find the one we actually used
2868                             ff_mpeg4_set_direct_mv(s, mx, my);
2869                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2870                             mx= s->mv[1][0][0];
2871                             my= s->mv[1][0][1];
2872                         }else{
2873                             mx= s->mv[0][0][0];
2874                             my= s->mv[0][0][1];
2875                         }
2876
2877                         s->mv_dir= best_s.mv_dir;
2878                         s->mv_type = best_s.mv_type;
2879                         s->mb_intra= 0;
2880 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2881                         s->mv[0][0][1] = best_s.mv[0][0][1];
2882                         s->mv[1][0][0] = best_s.mv[1][0][0];
2883                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2884                         backup_s.dquant= 0;
2885                         s->skipdct=1;
2886                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2887                                         &dmin, &next_block, mx, my);
2888                         s->skipdct=0;
2889                     }
2890                 }
2891
2892                 s->current_picture.qscale_table[xy] = best_s.qscale;
2893
2894                 copy_context_after_encode(s, &best_s, -1);
2895
2896                 pb_bits_count= put_bits_count(&s->pb);
2897                 flush_put_bits(&s->pb);
2898                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2899                 s->pb= backup_s.pb;
2900
2901                 if(s->data_partitioning){
2902                     pb2_bits_count= put_bits_count(&s->pb2);
2903                     flush_put_bits(&s->pb2);
2904                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2905                     s->pb2= backup_s.pb2;
2906
2907                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2908                     flush_put_bits(&s->tex_pb);
2909                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2910                     s->tex_pb= backup_s.tex_pb;
2911                 }
2912                 s->last_bits= put_bits_count(&s->pb);
2913
2914                 if (CONFIG_H263_ENCODER &&
2915                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2916                     ff_h263_update_motion_val(s);
2917
2918                 if(next_block==0){ //FIXME 16 vs linesize16
2919                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2920                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2921                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2922                 }
2923
2924                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2925                     ff_MPV_decode_mb(s, s->block);
2926             } else {
2927                 int motion_x = 0, motion_y = 0;
2928                 s->mv_type=MV_TYPE_16X16;
2929                 // only one MB-Type possible
2930
2931                 switch(mb_type){
2932                 case CANDIDATE_MB_TYPE_INTRA:
2933                     s->mv_dir = 0;
2934                     s->mb_intra= 1;
2935                     motion_x= s->mv[0][0][0] = 0;
2936                     motion_y= s->mv[0][0][1] = 0;
2937                     break;
2938                 case CANDIDATE_MB_TYPE_INTER:
2939                     s->mv_dir = MV_DIR_FORWARD;
2940                     s->mb_intra= 0;
2941                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2942                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2943                     break;
2944                 case CANDIDATE_MB_TYPE_INTER_I:
2945                     s->mv_dir = MV_DIR_FORWARD;
2946                     s->mv_type = MV_TYPE_FIELD;
2947                     s->mb_intra= 0;
2948                     for(i=0; i<2; i++){
2949                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2950                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2951                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2952                     }
2953                     break;
2954                 case CANDIDATE_MB_TYPE_INTER4V:
2955                     s->mv_dir = MV_DIR_FORWARD;
2956                     s->mv_type = MV_TYPE_8X8;
2957                     s->mb_intra= 0;
2958                     for(i=0; i<4; i++){
2959                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2960                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2961                     }
2962                     break;
2963                 case CANDIDATE_MB_TYPE_DIRECT:
2964                     if (CONFIG_MPEG4_ENCODER) {
2965                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2966                         s->mb_intra= 0;
2967                         motion_x=s->b_direct_mv_table[xy][0];
2968                         motion_y=s->b_direct_mv_table[xy][1];
2969                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2970                     }
2971                     break;
2972                 case CANDIDATE_MB_TYPE_DIRECT0:
2973                     if (CONFIG_MPEG4_ENCODER) {
2974                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2975                         s->mb_intra= 0;
2976                         ff_mpeg4_set_direct_mv(s, 0, 0);
2977                     }
2978                     break;
2979                 case CANDIDATE_MB_TYPE_BIDIR:
2980                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2981                     s->mb_intra= 0;
2982                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2983                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2984                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2985                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2986                     break;
2987                 case CANDIDATE_MB_TYPE_BACKWARD:
2988                     s->mv_dir = MV_DIR_BACKWARD;
2989                     s->mb_intra= 0;
2990                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2991                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2992                     break;
2993                 case CANDIDATE_MB_TYPE_FORWARD:
2994                     s->mv_dir = MV_DIR_FORWARD;
2995                     s->mb_intra= 0;
2996                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2997                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2998                     break;
2999                 case CANDIDATE_MB_TYPE_FORWARD_I:
3000                     s->mv_dir = MV_DIR_FORWARD;
3001                     s->mv_type = MV_TYPE_FIELD;
3002                     s->mb_intra= 0;
3003                     for(i=0; i<2; i++){
3004                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3005                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3006                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3007                     }
3008                     break;
3009                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3010                     s->mv_dir = MV_DIR_BACKWARD;
3011                     s->mv_type = MV_TYPE_FIELD;
3012                     s->mb_intra= 0;
3013                     for(i=0; i<2; i++){
3014                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3015                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3016                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3017                     }
3018                     break;
3019                 case CANDIDATE_MB_TYPE_BIDIR_I:
3020                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3021                     s->mv_type = MV_TYPE_FIELD;
3022                     s->mb_intra= 0;
3023                     for(dir=0; dir<2; dir++){
3024                         for(i=0; i<2; i++){
3025                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3026                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3027                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3028                         }
3029                     }
3030                     break;
3031                 default:
3032                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3033                 }
3034
3035                 encode_mb(s, motion_x, motion_y);
3036
3037                 // RAL: Update last macroblock type
3038                 s->last_mv_dir = s->mv_dir;
3039
3040                 if (CONFIG_H263_ENCODER &&
3041                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3042                     ff_h263_update_motion_val(s);
3043
3044                 ff_MPV_decode_mb(s, s->block);
3045             }
3046
3047             /* clean the MV table in IPS frames for direct mode in B frames */
3048             if(s->mb_intra /* && I,P,S_TYPE */){
3049                 s->p_mv_table[xy][0]=0;
3050                 s->p_mv_table[xy][1]=0;
3051             }
3052
3053             if(s->flags&CODEC_FLAG_PSNR){
3054                 int w= 16;
3055                 int h= 16;
3056
3057                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3058                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3059
3060                 s->current_picture.f.error[0] += sse(
3061                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3062                     s->dest[0], w, h, s->linesize);
3063                 s->current_picture.f.error[1] += sse(
3064                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3065                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3066                 s->current_picture.f.error[2] += sse(
3067                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3068                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3069             }
3070             if(s->loop_filter){
3071                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3072                     ff_h263_loop_filter(s);
3073             }
3074             av_dlog(s->avctx, "MB %d %d bits\n",
3075                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3076         }
3077     }
3078
3079     //not beautiful here but we must write it before flushing so it has to be here
3080     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3081         ff_msmpeg4_encode_ext_header(s);
3082
3083     write_slice_end(s);
3084
3085     /* Send the last GOB if RTP */
3086     if (s->avctx->rtp_callback) {
3087         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3088         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3089         /* Call the RTP callback to send the last GOB */
3090         emms_c();
3091         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3092     }
3093
3094     return 0;
3095 }
3096
3097 #define MERGE(field) dst->field += src->field; src->field=0
3098 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3099     MERGE(me.scene_change_score);
3100     MERGE(me.mc_mb_var_sum_temp);
3101     MERGE(me.mb_var_sum_temp);
3102 }
3103
3104 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3105     int i;
3106
3107     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3108     MERGE(dct_count[1]);
3109     MERGE(mv_bits);
3110     MERGE(i_tex_bits);
3111     MERGE(p_tex_bits);
3112     MERGE(i_count);
3113     MERGE(f_count);
3114     MERGE(b_count);
3115     MERGE(skip_count);
3116     MERGE(misc_bits);
3117     MERGE(er.error_count);
3118     MERGE(padding_bug_score);
3119     MERGE(current_picture.f.error[0]);
3120     MERGE(current_picture.f.error[1]);
3121     MERGE(current_picture.f.error[2]);
3122
3123     if(dst->avctx->noise_reduction){
3124         for(i=0; i<64; i++){
3125             MERGE(dct_error_sum[0][i]);
3126             MERGE(dct_error_sum[1][i]);
3127         }
3128     }
3129
3130     assert(put_bits_count(&src->pb) % 8 ==0);
3131     assert(put_bits_count(&dst->pb) % 8 ==0);
3132     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3133     flush_put_bits(&dst->pb);
3134 }
3135
3136 static int estimate_qp(MpegEncContext *s, int dry_run){
3137     if (s->next_lambda){
3138         s->current_picture_ptr->f.quality =
3139         s->current_picture.f.quality = s->next_lambda;
3140         if(!dry_run) s->next_lambda= 0;
3141     } else if (!s->fixed_qscale) {
3142         s->current_picture_ptr->f.quality =
3143         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3144         if (s->current_picture.f.quality < 0)
3145             return -1;
3146     }
3147
3148     if(s->adaptive_quant){
3149         switch(s->codec_id){
3150         case AV_CODEC_ID_MPEG4:
3151             if (CONFIG_MPEG4_ENCODER)
3152                 ff_clean_mpeg4_qscales(s);
3153             break;
3154         case AV_CODEC_ID_H263:
3155         case AV_CODEC_ID_H263P:
3156         case AV_CODEC_ID_FLV1:
3157             if (CONFIG_H263_ENCODER)
3158                 ff_clean_h263_qscales(s);
3159             break;
3160         default:
3161             ff_init_qscale_tab(s);
3162         }
3163
3164         s->lambda= s->lambda_table[0];
3165         //FIXME broken
3166     }else
3167         s->lambda = s->current_picture.f.quality;
3168     update_qscale(s);
3169     return 0;
3170 }
3171
3172 /* must be called before writing the header */
3173 static void set_frame_distances(MpegEncContext * s){
3174     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3175     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3176
3177     if(s->pict_type==AV_PICTURE_TYPE_B){
3178         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3179         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3180     }else{
3181         s->pp_time= s->time - s->last_non_b_time;
3182         s->last_non_b_time= s->time;
3183         assert(s->picture_number==0 || s->pp_time > 0);
3184     }
3185 }
3186
3187 static int encode_picture(MpegEncContext *s, int picture_number)
3188 {
3189     int i, ret;
3190     int bits;
3191     int context_count = s->slice_context_count;
3192
3193     s->picture_number = picture_number;
3194
3195     /* Reset the average MB variance */
3196     s->me.mb_var_sum_temp    =
3197     s->me.mc_mb_var_sum_temp = 0;
3198
3199     /* we need to initialize some time vars before we can encode b-frames */
3200     // RAL: Condition added for MPEG1VIDEO
3201     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3202         set_frame_distances(s);
3203     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3204         ff_set_mpeg4_time(s);
3205
3206     s->me.scene_change_score=0;
3207
3208 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3209
3210     if(s->pict_type==AV_PICTURE_TYPE_I){
3211         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3212         else                        s->no_rounding=0;
3213     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3214         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3215             s->no_rounding ^= 1;
3216     }
3217
3218     if(s->flags & CODEC_FLAG_PASS2){
3219         if (estimate_qp(s,1) < 0)
3220             return -1;
3221         ff_get_2pass_fcode(s);
3222     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3223         if(s->pict_type==AV_PICTURE_TYPE_B)
3224             s->lambda= s->last_lambda_for[s->pict_type];
3225         else
3226             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3227         update_qscale(s);
3228     }
3229
3230     s->mb_intra=0; //for the rate distortion & bit compare functions
3231     for(i=1; i<context_count; i++){
3232         ret = ff_update_duplicate_context(s->thread_context[i], s);
3233         if (ret < 0)
3234             return ret;
3235     }
3236
3237     if(ff_init_me(s)<0)
3238         return -1;
3239
3240     /* Estimate motion for every MB */
3241     if(s->pict_type != AV_PICTURE_TYPE_I){
3242         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3243         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3244         if (s->pict_type != AV_PICTURE_TYPE_B) {
3245             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3246                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3247             }
3248         }
3249
3250         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3251     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3252         /* I-Frame */
3253         for(i=0; i<s->mb_stride*s->mb_height; i++)
3254             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3255
3256         if(!s->fixed_qscale){
3257             /* finding spatial complexity for I-frame rate control */
3258             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3259         }
3260     }
3261     for(i=1; i<context_count; i++){
3262         merge_context_after_me(s, s->thread_context[i]);
3263     }
3264     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3265     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3266     emms_c();
3267
3268     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3269         s->pict_type= AV_PICTURE_TYPE_I;
3270         for(i=0; i<s->mb_stride*s->mb_height; i++)
3271             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3272         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3273                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3274     }
3275
3276     if(!s->umvplus){
3277         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3278             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3279
3280             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3281                 int a,b;
3282                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3283                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3284                 s->f_code= FFMAX3(s->f_code, a, b);
3285             }
3286
3287             ff_fix_long_p_mvs(s);
3288             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3289             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3290                 int j;
3291                 for(i=0; i<2; i++){
3292                     for(j=0; j<2; j++)
3293                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3294                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3295                 }
3296             }
3297         }
3298
3299         if(s->pict_type==AV_PICTURE_TYPE_B){
3300             int a, b;
3301
3302             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3303             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3304             s->f_code = FFMAX(a, b);
3305
3306             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3307             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3308             s->b_code = FFMAX(a, b);
3309
3310             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3311             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3312             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3313             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3314             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3315                 int dir, j;
3316                 for(dir=0; dir<2; dir++){
3317                     for(i=0; i<2; i++){
3318                         for(j=0; j<2; j++){
3319                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3320                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3321                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3322                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3323                         }
3324                     }
3325                 }
3326             }
3327         }
3328     }
3329
3330     if (estimate_qp(s, 0) < 0)
3331         return -1;
3332
3333     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3334         s->qscale= 3; //reduce clipping problems
3335
3336     if (s->out_format == FMT_MJPEG) {
3337         /* for mjpeg, we do include qscale in the matrix */
3338         for(i=1;i<64;i++){
3339             int j= s->dsp.idct_permutation[i];
3340
3341             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3342         }
3343         s->y_dc_scale_table=
3344         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3345         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3346         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3347                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3348         s->qscale= 8;
3349     }
3350
3351     //FIXME var duplication
3352     s->current_picture_ptr->f.key_frame =
3353     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3354     s->current_picture_ptr->f.pict_type =
3355     s->current_picture.f.pict_type = s->pict_type;
3356
3357     if (s->current_picture.f.key_frame)
3358         s->picture_in_gop_number=0;
3359
3360     s->last_bits= put_bits_count(&s->pb);
3361     switch(s->out_format) {
3362     case FMT_MJPEG:
3363         if (CONFIG_MJPEG_ENCODER)
3364             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3365                                            s->intra_matrix);
3366         break;
3367     case FMT_H261:
3368         if (CONFIG_H261_ENCODER)
3369             ff_h261_encode_picture_header(s, picture_number);
3370         break;
3371     case FMT_H263:
3372         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3373             ff_wmv2_encode_picture_header(s, picture_number);
3374         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3375             ff_msmpeg4_encode_picture_header(s, picture_number);
3376         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3377             ff_mpeg4_encode_picture_header(s, picture_number);
3378         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3379             ff_rv10_encode_picture_header(s, picture_number);
3380         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3381             ff_rv20_encode_picture_header(s, picture_number);
3382         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3383             ff_flv_encode_picture_header(s, picture_number);
3384         else if (CONFIG_H263_ENCODER)
3385             ff_h263_encode_picture_header(s, picture_number);
3386         break;
3387     case FMT_MPEG1:
3388         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3389             ff_mpeg1_encode_picture_header(s, picture_number);
3390         break;
3391     default:
3392         assert(0);
3393     }
3394     bits= put_bits_count(&s->pb);
3395     s->header_bits= bits - s->last_bits;
3396
3397     for(i=1; i<context_count; i++){
3398         update_duplicate_context_after_me(s->thread_context[i], s);
3399     }
3400     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3401     for(i=1; i<context_count; i++){
3402         merge_context_after_encode(s, s->thread_context[i]);
3403     }
3404     emms_c();
3405     return 0;
3406 }
3407
3408 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3409     const int intra= s->mb_intra;
3410     int i;
3411
3412     s->dct_count[intra]++;
3413
3414     for(i=0; i<64; i++){
3415         int level= block[i];
3416
3417         if(level){
3418             if(level>0){
3419                 s->dct_error_sum[intra][i] += level;
3420                 level -= s->dct_offset[intra][i];
3421                 if(level<0) level=0;
3422             }else{
3423                 s->dct_error_sum[intra][i] -= level;
3424                 level += s->dct_offset[intra][i];
3425                 if(level>0) level=0;
3426             }
3427             block[i]= level;
3428         }
3429     }
3430 }
3431
3432 static int dct_quantize_trellis_c(MpegEncContext *s,
3433                                   int16_t *block, int n,
3434                                   int qscale, int *overflow){
3435     const int *qmat;
3436     const uint8_t *scantable= s->intra_scantable.scantable;
3437     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3438     int max=0;
3439     unsigned int threshold1, threshold2;
3440     int bias=0;
3441     int run_tab[65];
3442     int level_tab[65];
3443     int score_tab[65];
3444     int survivor[65];
3445     int survivor_count;
3446     int last_run=0;
3447     int last_level=0;
3448     int last_score= 0;
3449     int last_i;
3450     int coeff[2][64];
3451     int coeff_count[64];
3452     int qmul, qadd, start_i, last_non_zero, i, dc;
3453     const int esc_length= s->ac_esc_length;
3454     uint8_t * length;
3455     uint8_t * last_length;
3456     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3457
3458     s->dsp.fdct (block);
3459
3460     if(s->dct_error_sum)
3461         s->denoise_dct(s, block);
3462     qmul= qscale*16;
3463     qadd= ((qscale-1)|1)*8;
3464
3465     if (s->mb_intra) {
3466         int q;
3467         if (!s->h263_aic) {
3468             if (n < 4)
3469                 q = s->y_dc_scale;
3470             else
3471                 q = s->c_dc_scale;
3472             q = q << 3;
3473         } else{
3474             /* For AIC we skip quant/dequant of INTRADC */
3475             q = 1 << 3;
3476             qadd=0;
3477         }
3478
3479         /* note: block[0] is assumed to be positive */
3480         block[0] = (block[0] + (q >> 1)) / q;
3481         start_i = 1;
3482         last_non_zero = 0;
3483         qmat = s->q_intra_matrix[qscale];
3484         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3485             bias= 1<<(QMAT_SHIFT-1);
3486         length     = s->intra_ac_vlc_length;
3487         last_length= s->intra_ac_vlc_last_length;
3488     } else {
3489         start_i = 0;
3490         last_non_zero = -1;
3491         qmat = s->q_inter_matrix[qscale];
3492         length     = s->inter_ac_vlc_length;
3493         last_length= s->inter_ac_vlc_last_length;
3494     }
3495     last_i= start_i;
3496
3497     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3498     threshold2= (threshold1<<1);
3499
3500     for(i=63; i>=start_i; i--) {
3501         const int j = scantable[i];
3502         int level = block[j] * qmat[j];
3503
3504         if(((unsigned)(level+threshold1))>threshold2){
3505             last_non_zero = i;
3506             break;
3507         }
3508     }
3509
3510     for(i=start_i; i<=last_non_zero; i++) {
3511         const int j = scantable[i];
3512         int level = block[j] * qmat[j];
3513
3514 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3515 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3516         if(((unsigned)(level+threshold1))>threshold2){
3517             if(level>0){
3518                 level= (bias + level)>>QMAT_SHIFT;
3519                 coeff[0][i]= level;
3520                 coeff[1][i]= level-1;
3521 //                coeff[2][k]= level-2;
3522             }else{
3523                 level= (bias - level)>>QMAT_SHIFT;
3524                 coeff[0][i]= -level;
3525                 coeff[1][i]= -level+1;
3526 //                coeff[2][k]= -level+2;
3527             }
3528             coeff_count[i]= FFMIN(level, 2);
3529             assert(coeff_count[i]);
3530             max |=level;
3531         }else{
3532             coeff[0][i]= (level>>31)|1;
3533             coeff_count[i]= 1;
3534         }
3535     }
3536
3537     *overflow= s->max_qcoeff < max; //overflow might have happened
3538
3539     if(last_non_zero < start_i){
3540         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3541         return last_non_zero;
3542     }
3543
3544     score_tab[start_i]= 0;
3545     survivor[0]= start_i;
3546     survivor_count= 1;
3547
3548     for(i=start_i; i<=last_non_zero; i++){
3549         int level_index, j, zero_distortion;
3550         int dct_coeff= FFABS(block[ scantable[i] ]);
3551         int best_score=256*256*256*120;
3552
3553         if (s->dsp.fdct == ff_fdct_ifast)
3554             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3555         zero_distortion= dct_coeff*dct_coeff;
3556
3557         for(level_index=0; level_index < coeff_count[i]; level_index++){
3558             int distortion;
3559             int level= coeff[level_index][i];
3560             const int alevel= FFABS(level);
3561             int unquant_coeff;
3562
3563             assert(level);
3564
3565             if(s->out_format == FMT_H263){
3566                 unquant_coeff= alevel*qmul + qadd;
3567             }else{ //MPEG1
3568                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3569                 if(s->mb_intra){
3570                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3571                         unquant_coeff =   (unquant_coeff - 1) | 1;
3572                 }else{
3573                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3574                         unquant_coeff =   (unquant_coeff - 1) | 1;
3575                 }
3576                 unquant_coeff<<= 3;
3577             }
3578
3579             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3580             level+=64;
3581             if((level&(~127)) == 0){
3582                 for(j=survivor_count-1; j>=0; j--){
3583                     int run= i - survivor[j];
3584                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3585                     score += score_tab[i-run];
3586
3587                     if(score < best_score){
3588                         best_score= score;
3589                         run_tab[i+1]= run;
3590                         level_tab[i+1]= level-64;
3591                     }
3592                 }
3593
3594                 if(s->out_format == FMT_H263){
3595                     for(j=survivor_count-1; j>=0; j--){
3596                         int run= i - survivor[j];
3597                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3598                         score += score_tab[i-run];
3599                         if(score < last_score){
3600                             last_score= score;
3601                             last_run= run;
3602                             last_level= level-64;
3603                             last_i= i+1;
3604                         }
3605                     }
3606                 }
3607             }else{
3608                 distortion += esc_length*lambda;
3609                 for(j=survivor_count-1; j>=0; j--){
3610                     int run= i - survivor[j];
3611                     int score= distortion + score_tab[i-run];
3612
3613                     if(score < best_score){
3614                         best_score= score;
3615                         run_tab[i+1]= run;
3616                         level_tab[i+1]= level-64;
3617                     }
3618                 }
3619
3620                 if(s->out_format == FMT_H263){
3621                   for(j=survivor_count-1; j>=0; j--){
3622                         int run= i - survivor[j];
3623                         int score= distortion + score_tab[i-run];
3624                         if(score < last_score){
3625                             last_score= score;
3626                             last_run= run;
3627                             last_level= level-64;
3628                             last_i= i+1;
3629                         }
3630                     }
3631                 }
3632             }
3633         }
3634
3635         score_tab[i+1]= best_score;
3636
3637         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3638         if(last_non_zero <= 27){
3639             for(; survivor_count; survivor_count--){
3640                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3641                     break;
3642             }
3643         }else{
3644             for(; survivor_count; survivor_count--){
3645                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3646                     break;
3647             }
3648         }
3649
3650         survivor[ survivor_count++ ]= i+1;
3651     }
3652
3653     if(s->out_format != FMT_H263){
3654         last_score= 256*256*256*120;
3655         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3656             int score= score_tab[i];
3657             if(i) score += lambda*2; //FIXME exacter?
3658
3659             if(score < last_score){
3660                 last_score= score;
3661                 last_i= i;
3662                 last_level= level_tab[i];
3663                 last_run= run_tab[i];
3664             }
3665         }
3666     }
3667
3668     s->coded_score[n] = last_score;
3669
3670     dc= FFABS(block[0]);
3671     last_non_zero= last_i - 1;
3672     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3673
3674     if(last_non_zero < start_i)
3675         return last_non_zero;
3676
3677     if(last_non_zero == 0 && start_i == 0){
3678         int best_level= 0;
3679         int best_score= dc * dc;
3680
3681         for(i=0; i<coeff_count[0]; i++){
3682             int level= coeff[i][0];
3683             int alevel= FFABS(level);
3684             int unquant_coeff, score, distortion;
3685
3686             if(s->out_format == FMT_H263){
3687                     unquant_coeff= (alevel*qmul + qadd)>>3;
3688             }else{ //MPEG1
3689                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3690                     unquant_coeff =   (unquant_coeff - 1) | 1;
3691             }
3692             unquant_coeff = (unquant_coeff + 4) >> 3;
3693             unquant_coeff<<= 3 + 3;
3694
3695             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3696             level+=64;
3697             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3698             else                    score= distortion + esc_length*lambda;
3699
3700             if(score < best_score){
3701                 best_score= score;
3702                 best_level= level - 64;
3703             }
3704         }
3705         block[0]= best_level;
3706         s->coded_score[n] = best_score - dc*dc;
3707         if(best_level == 0) return -1;
3708         else                return last_non_zero;
3709     }
3710
3711     i= last_i;
3712     assert(last_level);
3713
3714     block[ perm_scantable[last_non_zero] ]= last_level;
3715     i -= last_run + 1;
3716
3717     for(; i>start_i; i -= run_tab[i] + 1){
3718         block[ perm_scantable[i-1] ]= level_tab[i];
3719     }
3720
3721     return last_non_zero;
3722 }
3723
3724 //#define REFINE_STATS 1
3725 static int16_t basis[64][64];
3726
3727 static void build_basis(uint8_t *perm){
3728     int i, j, x, y;
3729     emms_c();
3730     for(i=0; i<8; i++){
3731         for(j=0; j<8; j++){
3732             for(y=0; y<8; y++){
3733                 for(x=0; x<8; x++){
3734                     double s= 0.25*(1<<BASIS_SHIFT);
3735                     int index= 8*i + j;
3736                     int perm_index= perm[index];
3737                     if(i==0) s*= sqrt(0.5);
3738                     if(j==0) s*= sqrt(0.5);
3739                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3740                 }
3741             }
3742         }
3743     }
3744 }
3745
3746 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3747                         int16_t *block, int16_t *weight, int16_t *orig,
3748                         int n, int qscale){
3749     int16_t rem[64];
3750     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3751     const uint8_t *scantable= s->intra_scantable.scantable;
3752     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3753 //    unsigned int threshold1, threshold2;
3754 //    int bias=0;
3755     int run_tab[65];
3756     int prev_run=0;
3757     int prev_level=0;
3758     int qmul, qadd, start_i, last_non_zero, i, dc;
3759     uint8_t * length;
3760     uint8_t * last_length;
3761     int lambda;
3762     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3763 #ifdef REFINE_STATS
3764 static int count=0;
3765 static int after_last=0;
3766 static int to_zero=0;
3767 static int from_zero=0;
3768 static int raise=0;
3769 static int lower=0;
3770 static int messed_sign=0;
3771 #endif
3772
3773     if(basis[0][0] == 0)
3774         build_basis(s->dsp.idct_permutation);
3775
3776     qmul= qscale*2;
3777     qadd= (qscale-1)|1;
3778     if (s->mb_intra) {
3779         if (!s->h263_aic) {
3780             if (n < 4)
3781                 q = s->y_dc_scale;
3782             else
3783                 q = s->c_dc_scale;
3784         } else{
3785             /* For AIC we skip quant/dequant of INTRADC */
3786             q = 1;
3787             qadd=0;
3788         }
3789         q <<= RECON_SHIFT-3;
3790         /* note: block[0] is assumed to be positive */
3791         dc= block[0]*q;
3792 //        block[0] = (block[0] + (q >> 1)) / q;
3793         start_i = 1;
3794 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3795 //            bias= 1<<(QMAT_SHIFT-1);
3796         length     = s->intra_ac_vlc_length;
3797         last_length= s->intra_ac_vlc_last_length;
3798     } else {
3799         dc= 0;
3800         start_i = 0;
3801         length     = s->inter_ac_vlc_length;
3802         last_length= s->inter_ac_vlc_last_length;
3803     }
3804     last_non_zero = s->block_last_index[n];
3805
3806 #ifdef REFINE_STATS
3807 {START_TIMER
3808 #endif
3809     dc += (1<<(RECON_SHIFT-1));
3810     for(i=0; i<64; i++){
3811         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3812     }
3813 #ifdef REFINE_STATS
3814 STOP_TIMER("memset rem[]")}
3815 #endif
3816     sum=0;
3817     for(i=0; i<64; i++){
3818         int one= 36;
3819         int qns=4;
3820         int w;
3821
3822         w= FFABS(weight[i]) + qns*one;
3823         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3824
3825         weight[i] = w;
3826 //        w=weight[i] = (63*qns + (w/2)) / w;
3827
3828         assert(w>0);
3829         assert(w<(1<<6));
3830         sum += w*w;
3831     }
3832     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3833 #ifdef REFINE_STATS
3834 {START_TIMER
3835 #endif
3836     run=0;
3837     rle_index=0;
3838     for(i=start_i; i<=last_non_zero; i++){
3839         int j= perm_scantable[i];
3840         const int level= block[j];
3841         int coeff;
3842
3843         if(level){
3844             if(level<0) coeff= qmul*level - qadd;
3845             else        coeff= qmul*level + qadd;
3846             run_tab[rle_index++]=run;
3847             run=0;
3848
3849             s->dsp.add_8x8basis(rem, basis[j], coeff);
3850         }else{
3851             run++;
3852         }
3853     }
3854 #ifdef REFINE_STATS
3855 if(last_non_zero>0){
3856 STOP_TIMER("init rem[]")
3857 }
3858 }
3859
3860 {START_TIMER
3861 #endif
3862     for(;;){
3863         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3864         int best_coeff=0;
3865         int best_change=0;
3866         int run2, best_unquant_change=0, analyze_gradient;
3867 #ifdef REFINE_STATS
3868 {START_TIMER
3869 #endif
3870         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3871
3872         if(analyze_gradient){
3873 #ifdef REFINE_STATS
3874 {START_TIMER
3875 #endif
3876             for(i=0; i<64; i++){
3877                 int w= weight[i];
3878
3879                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3880             }
3881 #ifdef REFINE_STATS
3882 STOP_TIMER("rem*w*w")}
3883 {START_TIMER
3884 #endif
3885             s->dsp.fdct(d1);
3886 #ifdef REFINE_STATS
3887 STOP_TIMER("dct")}
3888 #endif
3889         }
3890
3891         if(start_i){
3892             const int level= block[0];
3893             int change, old_coeff;
3894
3895             assert(s->mb_intra);
3896
3897             old_coeff= q*level;
3898
3899             for(change=-1; change<=1; change+=2){
3900                 int new_level= level + change;
3901                 int score, new_coeff;
3902
3903                 new_coeff= q*new_level;
3904                 if(new_coeff >= 2048 || new_coeff < 0)
3905                     continue;
3906
3907                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3908                 if(score<best_score){
3909                     best_score= score;
3910                     best_coeff= 0;
3911                     best_change= change;
3912                     best_unquant_change= new_coeff - old_coeff;
3913                 }
3914             }
3915         }
3916
3917         run=0;
3918         rle_index=0;
3919         run2= run_tab[rle_index++];
3920         prev_level=0;
3921         prev_run=0;
3922
3923         for(i=start_i; i<64; i++){
3924             int j= perm_scantable[i];
3925             const int level= block[j];
3926             int change, old_coeff;
3927
3928             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3929                 break;
3930
3931             if(level){
3932                 if(level<0) old_coeff= qmul*level - qadd;
3933                 else        old_coeff= qmul*level + qadd;
3934                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3935             }else{
3936                 old_coeff=0;
3937                 run2--;
3938                 assert(run2>=0 || i >= last_non_zero );
3939             }
3940
3941             for(change=-1; change<=1; change+=2){
3942                 int new_level= level + change;
3943                 int score, new_coeff, unquant_change;
3944
3945                 score=0;
3946                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3947                    continue;
3948
3949                 if(new_level){
3950                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3951                     else            new_coeff= qmul*new_level + qadd;
3952                     if(new_coeff >= 2048 || new_coeff <= -2048)
3953                         continue;
3954                     //FIXME check for overflow
3955
3956                     if(level){
3957                         if(level < 63 && level > -63){
3958                             if(i < last_non_zero)
3959                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3960                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3961                             else
3962                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3963                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3964                         }
3965                     }else{
3966                         assert(FFABS(new_level)==1);
3967
3968                         if(analyze_gradient){
3969                             int g= d1[ scantable[i] ];
3970                             if(g && (g^new_level) >= 0)
3971                                 continue;
3972                         }
3973
3974                         if(i < last_non_zero){
3975                             int next_i= i + run2 + 1;
3976                             int next_level= block[ perm_scantable[next_i] ] + 64;
3977
3978                             if(next_level&(~127))
3979                                 next_level= 0;
3980
3981                             if(next_i < last_non_zero)
3982                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3983                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3984                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3985                             else
3986                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3987                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3988                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3989                         }else{
3990                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3991                             if(prev_level){
3992                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3993                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3994                             }
3995                         }
3996                     }
3997                 }else{
3998                     new_coeff=0;
3999                     assert(FFABS(level)==1);
4000
4001                     if(i < last_non_zero){
4002                         int next_i= i + run2 + 1;
4003                         int next_level= block[ perm_scantable[next_i] ] + 64;
4004
4005                         if(next_level&(~127))
4006                             next_level= 0;
4007
4008                         if(next_i < last_non_zero)
4009                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4010                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4011                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4012                         else
4013                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4014                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4015                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4016                     }else{
4017                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4018                         if(prev_level){
4019                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4020                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4021                         }
4022                     }
4023                 }
4024
4025                 score *= lambda;
4026
4027                 unquant_change= new_coeff - old_coeff;
4028                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4029
4030                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4031                 if(score<best_score){
4032                     best_score= score;
4033                     best_coeff= i;
4034                     best_change= change;
4035                     best_unquant_change= unquant_change;
4036                 }
4037             }
4038             if(level){
4039                 prev_level= level + 64;
4040                 if(prev_level&(~127))
4041                     prev_level= 0;
4042                 prev_run= run;
4043                 run=0;
4044             }else{
4045                 run++;
4046             }
4047         }
4048 #ifdef REFINE_STATS
4049 STOP_TIMER("iterative step")}
4050 #endif
4051
4052         if(best_change){
4053             int j= perm_scantable[ best_coeff ];
4054
4055             block[j] += best_change;
4056
4057             if(best_coeff > last_non_zero){
4058                 last_non_zero= best_coeff;
4059                 assert(block[j]);
4060 #ifdef REFINE_STATS
4061 after_last++;
4062 #endif
4063             }else{
4064 #ifdef REFINE_STATS
4065 if(block[j]){
4066     if(block[j] - best_change){
4067         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4068             raise++;
4069         }else{
4070             lower++;
4071         }
4072     }else{
4073         from_zero++;
4074     }
4075 }else{
4076     to_zero++;
4077 }
4078 #endif
4079                 for(; last_non_zero>=start_i; last_non_zero--){
4080                     if(block[perm_scantable[last_non_zero]])
4081                         break;
4082                 }
4083             }
4084 #ifdef REFINE_STATS
4085 count++;
4086 if(256*256*256*64 % count == 0){
4087     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4088 }
4089 #endif
4090             run=0;
4091             rle_index=0;
4092             for(i=start_i; i<=last_non_zero; i++){
4093                 int j= perm_scantable[i];
4094                 const int level= block[j];
4095
4096                  if(level){
4097                      run_tab[rle_index++]=run;
4098                      run=0;
4099                  }else{
4100                      run++;
4101                  }
4102             }
4103
4104             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4105         }else{
4106             break;
4107         }
4108     }
4109 #ifdef REFINE_STATS
4110 if(last_non_zero>0){
4111 STOP_TIMER("iterative search")
4112 }
4113 }
4114 #endif
4115
4116     return last_non_zero;
4117 }
4118
4119 int ff_dct_quantize_c(MpegEncContext *s,
4120                         int16_t *block, int n,
4121                         int qscale, int *overflow)
4122 {
4123     int i, j, level, last_non_zero, q, start_i;
4124     const int *qmat;
4125     const uint8_t *scantable= s->intra_scantable.scantable;
4126     int bias;
4127     int max=0;
4128     unsigned int threshold1, threshold2;
4129
4130     s->dsp.fdct (block);
4131
4132     if(s->dct_error_sum)
4133         s->denoise_dct(s, block);
4134
4135     if (s->mb_intra) {
4136         if (!s->h263_aic) {
4137             if (n < 4)
4138                 q = s->y_dc_scale;
4139             else
4140                 q = s->c_dc_scale;
4141             q = q << 3;
4142         } else
4143             /* For AIC we skip quant/dequant of INTRADC */
4144             q = 1 << 3;
4145
4146         /* note: block[0] is assumed to be positive */
4147         block[0] = (block[0] + (q >> 1)) / q;
4148         start_i = 1;
4149         last_non_zero = 0;
4150         qmat = s->q_intra_matrix[qscale];
4151         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4152     } else {
4153         start_i = 0;
4154         last_non_zero = -1;
4155         qmat = s->q_inter_matrix[qscale];
4156         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4157     }
4158     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4159     threshold2= (threshold1<<1);
4160     for(i=63;i>=start_i;i--) {
4161         j = scantable[i];
4162         level = block[j] * qmat[j];
4163
4164         if(((unsigned)(level+threshold1))>threshold2){
4165             last_non_zero = i;
4166             break;
4167         }else{
4168             block[j]=0;
4169         }
4170     }
4171     for(i=start_i; i<=last_non_zero; i++) {
4172         j = scantable[i];
4173         level = block[j] * qmat[j];
4174
4175 //        if(   bias+level >= (1<<QMAT_SHIFT)
4176 //           || bias-level >= (1<<QMAT_SHIFT)){
4177         if(((unsigned)(level+threshold1))>threshold2){
4178             if(level>0){
4179                 level= (bias + level)>>QMAT_SHIFT;
4180                 block[j]= level;
4181             }else{
4182                 level= (bias - level)>>QMAT_SHIFT;
4183                 block[j]= -level;
4184             }
4185             max |=level;
4186         }else{
4187             block[j]=0;
4188         }
4189     }
4190     *overflow= s->max_qcoeff < max; //overflow might have happened
4191
4192     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4193     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4194         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4195
4196     return last_non_zero;
4197 }
4198
4199 #define OFFSET(x) offsetof(MpegEncContext, x)
4200 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4201 static const AVOption h263_options[] = {
4202     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4203     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4204     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4205     FF_MPV_COMMON_OPTS
4206     { NULL },
4207 };
4208
4209 static const AVClass h263_class = {
4210     .class_name = "H.263 encoder",
4211     .item_name  = av_default_item_name,
4212     .option     = h263_options,
4213     .version    = LIBAVUTIL_VERSION_INT,
4214 };
4215
4216 AVCodec ff_h263_encoder = {
4217     .name           = "h263",
4218     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4219     .type           = AVMEDIA_TYPE_VIDEO,
4220     .id             = AV_CODEC_ID_H263,
4221     .priv_data_size = sizeof(MpegEncContext),
4222     .init           = ff_MPV_encode_init,
4223     .encode2        = ff_MPV_encode_picture,
4224     .close          = ff_MPV_encode_end,
4225     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4226     .priv_class     = &h263_class,
4227 };
4228
4229 static const AVOption h263p_options[] = {
4230     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4231     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4232     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4233     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4234     FF_MPV_COMMON_OPTS
4235     { NULL },
4236 };
4237 static const AVClass h263p_class = {
4238     .class_name = "H.263p encoder",
4239     .item_name  = av_default_item_name,
4240     .option     = h263p_options,
4241     .version    = LIBAVUTIL_VERSION_INT,
4242 };
4243
4244 AVCodec ff_h263p_encoder = {
4245     .name           = "h263p",
4246     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4247     .type           = AVMEDIA_TYPE_VIDEO,
4248     .id             = AV_CODEC_ID_H263P,
4249     .priv_data_size = sizeof(MpegEncContext),
4250     .init           = ff_MPV_encode_init,
4251     .encode2        = ff_MPV_encode_picture,
4252     .close          = ff_MPV_encode_end,
4253     .capabilities   = CODEC_CAP_SLICE_THREADS,
4254     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4255     .priv_class     = &h263p_class,
4256 };
4257
4258 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4259
4260 AVCodec ff_msmpeg4v2_encoder = {
4261     .name           = "msmpeg4v2",
4262     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4263     .type           = AVMEDIA_TYPE_VIDEO,
4264     .id             = AV_CODEC_ID_MSMPEG4V2,
4265     .priv_data_size = sizeof(MpegEncContext),
4266     .init           = ff_MPV_encode_init,
4267     .encode2        = ff_MPV_encode_picture,
4268     .close          = ff_MPV_encode_end,
4269     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4270     .priv_class     = &msmpeg4v2_class,
4271 };
4272
4273 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4274
4275 AVCodec ff_msmpeg4v3_encoder = {
4276     .name           = "msmpeg4",
4277     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4278     .type           = AVMEDIA_TYPE_VIDEO,
4279     .id             = AV_CODEC_ID_MSMPEG4V3,
4280     .priv_data_size = sizeof(MpegEncContext),
4281     .init           = ff_MPV_encode_init,
4282     .encode2        = ff_MPV_encode_picture,
4283     .close          = ff_MPV_encode_end,
4284     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4285     .priv_class     = &msmpeg4v3_class,
4286 };
4287
4288 FF_MPV_GENERIC_CLASS(wmv1)
4289
4290 AVCodec ff_wmv1_encoder = {
4291     .name           = "wmv1",
4292     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4293     .type           = AVMEDIA_TYPE_VIDEO,
4294     .id             = AV_CODEC_ID_WMV1,
4295     .priv_data_size = sizeof(MpegEncContext),
4296     .init           = ff_MPV_encode_init,
4297     .encode2        = ff_MPV_encode_picture,
4298     .close          = ff_MPV_encode_end,
4299     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4300     .priv_class     = &wmv1_class,
4301 };