]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
dsputil: cosmetics: Lose camelCase on ff_cropTbl and ff_squareTbl names
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "dsputil.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mathops.h"
46 #include "mjpegenc.h"
47 #include "msmpeg4.h"
48 #include "faandct.h"
49 #include "thread.h"
50 #include "aandcttab.h"
51 #include "flv.h"
52 #include "mpeg4video.h"
53 #include "internal.h"
54 #include "bytestream.h"
55 #include <limits.h>
56
57 static int encode_picture(MpegEncContext *s, int picture_number);
58 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
59 static int sse_mb(MpegEncContext *s);
60 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
61 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
62
63 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
64 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
65
66 const AVOption ff_mpv_generic_options[] = {
67     FF_MPV_COMMON_OPTS
68     { NULL },
69 };
70
71 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
72                        uint16_t (*qmat16)[2][64],
73                        const uint16_t *quant_matrix,
74                        int bias, int qmin, int qmax, int intra)
75 {
76     int qscale;
77     int shift = 0;
78
79     for (qscale = qmin; qscale <= qmax; qscale++) {
80         int i;
81         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
82             dsp->fdct == ff_jpeg_fdct_islow_10 ||
83             dsp->fdct == ff_faandct) {
84             for (i = 0; i < 64; i++) {
85                 const int j = dsp->idct_permutation[i];
86                 /* 16 <= qscale * quant_matrix[i] <= 7905
87                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
88                  *             19952 <=              x  <= 249205026
89                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
90                  *           3444240 >= (1 << 36) / (x) >= 275 */
91
92                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
93                                         (qscale * quant_matrix[j]));
94             }
95         } else if (dsp->fdct == ff_fdct_ifast) {
96             for (i = 0; i < 64; i++) {
97                 const int j = dsp->idct_permutation[i];
98                 /* 16 <= qscale * quant_matrix[i] <= 7905
99                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
100                  *             19952 <=              x  <= 249205026
101                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
102                  *           3444240 >= (1 << 36) / (x) >= 275 */
103
104                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
105                                         (ff_aanscales[i] * qscale *
106                                          quant_matrix[j]));
107             }
108         } else {
109             for (i = 0; i < 64; i++) {
110                 const int j = dsp->idct_permutation[i];
111                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
112                  * Assume x = qscale * quant_matrix[i]
113                  * So             16 <=              x  <= 7905
114                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
115                  * so          32768 >= (1 << 19) / (x) >= 67 */
116                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
117                                         (qscale * quant_matrix[j]));
118                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
119                 //                    (qscale * quant_matrix[i]);
120                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
121                                        (qscale * quant_matrix[j]);
122
123                 if (qmat16[qscale][0][i] == 0 ||
124                     qmat16[qscale][0][i] == 128 * 256)
125                     qmat16[qscale][0][i] = 128 * 256 - 1;
126                 qmat16[qscale][1][i] =
127                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
128                                 qmat16[qscale][0][i]);
129             }
130         }
131
132         for (i = intra; i < 64; i++) {
133             int64_t max = 8191;
134             if (dsp->fdct == ff_fdct_ifast) {
135                 max = (8191LL * ff_aanscales[i]) >> 14;
136             }
137             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
138                 shift++;
139             }
140         }
141     }
142     if (shift) {
143         av_log(NULL, AV_LOG_INFO,
144                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
145                QMAT_SHIFT - shift);
146     }
147 }
148
149 static inline void update_qscale(MpegEncContext *s)
150 {
151     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
152                 (FF_LAMBDA_SHIFT + 7);
153     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
154
155     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
156                  FF_LAMBDA_SHIFT;
157 }
158
159 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
160 {
161     int i;
162
163     if (matrix) {
164         put_bits(pb, 1, 1);
165         for (i = 0; i < 64; i++) {
166             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
167         }
168     } else
169         put_bits(pb, 1, 0);
170 }
171
172 /**
173  * init s->current_picture.qscale_table from s->lambda_table
174  */
175 void ff_init_qscale_tab(MpegEncContext *s)
176 {
177     int8_t * const qscale_table = s->current_picture.qscale_table;
178     int i;
179
180     for (i = 0; i < s->mb_num; i++) {
181         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
182         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
183         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
184                                                   s->avctx->qmax);
185     }
186 }
187
188 static void update_duplicate_context_after_me(MpegEncContext *dst,
189                                               MpegEncContext *src)
190 {
191 #define COPY(a) dst->a= src->a
192     COPY(pict_type);
193     COPY(current_picture);
194     COPY(f_code);
195     COPY(b_code);
196     COPY(qscale);
197     COPY(lambda);
198     COPY(lambda2);
199     COPY(picture_in_gop_number);
200     COPY(gop_picture_number);
201     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
202     COPY(progressive_frame);    // FIXME don't set in encode_header
203     COPY(partitioned_frame);    // FIXME don't set in encode_header
204 #undef COPY
205 }
206
207 /**
208  * Set the given MpegEncContext to defaults for encoding.
209  * the changed fields will not depend upon the prior state of the MpegEncContext.
210  */
211 static void MPV_encode_defaults(MpegEncContext *s)
212 {
213     int i;
214     ff_MPV_common_defaults(s);
215
216     for (i = -16; i < 16; i++) {
217         default_fcode_tab[i + MAX_MV] = 1;
218     }
219     s->me.mv_penalty = default_mv_penalty;
220     s->fcode_tab     = default_fcode_tab;
221
222     s->input_picture_number  = 0;
223     s->picture_in_gop_number = 0;
224 }
225
226 /* init video encoder */
227 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
228 {
229     MpegEncContext *s = avctx->priv_data;
230     int i, ret;
231
232     MPV_encode_defaults(s);
233
234     switch (avctx->codec_id) {
235     case AV_CODEC_ID_MPEG2VIDEO:
236         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
237             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
238             av_log(avctx, AV_LOG_ERROR,
239                    "only YUV420 and YUV422 are supported\n");
240             return -1;
241         }
242         break;
243     case AV_CODEC_ID_MJPEG:
244         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
245             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
246             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
247               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
248              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
249             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
250             return -1;
251         }
252         break;
253     default:
254         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
255             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
256             return -1;
257         }
258     }
259
260     switch (avctx->pix_fmt) {
261     case AV_PIX_FMT_YUVJ422P:
262     case AV_PIX_FMT_YUV422P:
263         s->chroma_format = CHROMA_422;
264         break;
265     case AV_PIX_FMT_YUVJ420P:
266     case AV_PIX_FMT_YUV420P:
267     default:
268         s->chroma_format = CHROMA_420;
269         break;
270     }
271
272     s->bit_rate = avctx->bit_rate;
273     s->width    = avctx->width;
274     s->height   = avctx->height;
275     if (avctx->gop_size > 600 &&
276         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
277         av_log(avctx, AV_LOG_ERROR,
278                "Warning keyframe interval too large! reducing it ...\n");
279         avctx->gop_size = 600;
280     }
281     s->gop_size     = avctx->gop_size;
282     s->avctx        = avctx;
283     s->flags        = avctx->flags;
284     s->flags2       = avctx->flags2;
285     if (avctx->max_b_frames > MAX_B_FRAMES) {
286         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
287                "is %d.\n", MAX_B_FRAMES);
288     }
289     s->max_b_frames = avctx->max_b_frames;
290     s->codec_id     = avctx->codec->id;
291     s->strict_std_compliance = avctx->strict_std_compliance;
292     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
293     s->mpeg_quant         = avctx->mpeg_quant;
294     s->rtp_mode           = !!avctx->rtp_payload_size;
295     s->intra_dc_precision = avctx->intra_dc_precision;
296     s->user_specified_pts = AV_NOPTS_VALUE;
297
298     if (s->gop_size <= 1) {
299         s->intra_only = 1;
300         s->gop_size   = 12;
301     } else {
302         s->intra_only = 0;
303     }
304
305     s->me_method = avctx->me_method;
306
307     /* Fixed QSCALE */
308     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
309
310     s->adaptive_quant = (s->avctx->lumi_masking ||
311                          s->avctx->dark_masking ||
312                          s->avctx->temporal_cplx_masking ||
313                          s->avctx->spatial_cplx_masking  ||
314                          s->avctx->p_masking      ||
315                          s->avctx->border_masking ||
316                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
317                         !s->fixed_qscale;
318
319     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
320
321     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
322         av_log(avctx, AV_LOG_ERROR,
323                "a vbv buffer size is needed, "
324                "for encoding with a maximum bitrate\n");
325         return -1;
326     }
327
328     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
329         av_log(avctx, AV_LOG_INFO,
330                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
331     }
332
333     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
334         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
335         return -1;
336     }
337
338     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
339         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
340         return -1;
341     }
342
343     if (avctx->rc_max_rate &&
344         avctx->rc_max_rate == avctx->bit_rate &&
345         avctx->rc_max_rate != avctx->rc_min_rate) {
346         av_log(avctx, AV_LOG_INFO,
347                "impossible bitrate constraints, this will fail\n");
348     }
349
350     if (avctx->rc_buffer_size &&
351         avctx->bit_rate * (int64_t)avctx->time_base.num >
352             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
353         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
354         return -1;
355     }
356
357     if (!s->fixed_qscale &&
358         avctx->bit_rate * av_q2d(avctx->time_base) >
359             avctx->bit_rate_tolerance) {
360         av_log(avctx, AV_LOG_ERROR,
361                "bitrate tolerance too small for bitrate\n");
362         return -1;
363     }
364
365     if (s->avctx->rc_max_rate &&
366         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
367         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
368          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
369         90000LL * (avctx->rc_buffer_size - 1) >
370             s->avctx->rc_max_rate * 0xFFFFLL) {
371         av_log(avctx, AV_LOG_INFO,
372                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
373                "specified vbv buffer is too large for the given bitrate!\n");
374     }
375
376     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
377         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
378         s->codec_id != AV_CODEC_ID_FLV1) {
379         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
380         return -1;
381     }
382
383     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
384         av_log(avctx, AV_LOG_ERROR,
385                "OBMC is only supported with simple mb decision\n");
386         return -1;
387     }
388
389     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
390         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
391         return -1;
392     }
393
394     if (s->max_b_frames                    &&
395         s->codec_id != AV_CODEC_ID_MPEG4      &&
396         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
397         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
398         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
399         return -1;
400     }
401
402     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
403          s->codec_id == AV_CODEC_ID_H263  ||
404          s->codec_id == AV_CODEC_ID_H263P) &&
405         (avctx->sample_aspect_ratio.num > 255 ||
406          avctx->sample_aspect_ratio.den > 255)) {
407         av_log(avctx, AV_LOG_ERROR,
408                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
409                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
410         return -1;
411     }
412
413     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
414         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
415         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
416         return -1;
417     }
418
419     // FIXME mpeg2 uses that too
420     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
421         av_log(avctx, AV_LOG_ERROR,
422                "mpeg2 style quantization not supported by codec\n");
423         return -1;
424     }
425
426     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
427         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
428         return -1;
429     }
430
431     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
432         s->avctx->mb_decision != FF_MB_DECISION_RD) {
433         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
434         return -1;
435     }
436
437     if (s->avctx->scenechange_threshold < 1000000000 &&
438         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
439         av_log(avctx, AV_LOG_ERROR,
440                "closed gop with scene change detection are not supported yet, "
441                "set threshold to 1000000000\n");
442         return -1;
443     }
444
445     if (s->flags & CODEC_FLAG_LOW_DELAY) {
446         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
447             av_log(avctx, AV_LOG_ERROR,
448                   "low delay forcing is only available for mpeg2\n");
449             return -1;
450         }
451         if (s->max_b_frames != 0) {
452             av_log(avctx, AV_LOG_ERROR,
453                    "b frames cannot be used with low delay\n");
454             return -1;
455         }
456     }
457
458     if (s->q_scale_type == 1) {
459         if (avctx->qmax > 12) {
460             av_log(avctx, AV_LOG_ERROR,
461                    "non linear quant only supports qmax <= 12 currently\n");
462             return -1;
463         }
464     }
465
466     if (s->avctx->thread_count > 1         &&
467         s->codec_id != AV_CODEC_ID_MPEG4      &&
468         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
469         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
470         (s->codec_id != AV_CODEC_ID_H263P)) {
471         av_log(avctx, AV_LOG_ERROR,
472                "multi threaded encoding not supported by codec\n");
473         return -1;
474     }
475
476     if (s->avctx->thread_count < 1) {
477         av_log(avctx, AV_LOG_ERROR,
478                "automatic thread number detection not supported by codec,"
479                "patch welcome\n");
480         return -1;
481     }
482
483     if (s->avctx->thread_count > 1)
484         s->rtp_mode = 1;
485
486     if (!avctx->time_base.den || !avctx->time_base.num) {
487         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
488         return -1;
489     }
490
491     i = (INT_MAX / 2 + 128) >> 8;
492     if (avctx->mb_threshold >= i) {
493         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
494                i - 1);
495         return -1;
496     }
497
498     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
499         av_log(avctx, AV_LOG_INFO,
500                "notice: b_frame_strategy only affects the first pass\n");
501         avctx->b_frame_strategy = 0;
502     }
503
504     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
505     if (i > 1) {
506         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
507         avctx->time_base.den /= i;
508         avctx->time_base.num /= i;
509         //return -1;
510     }
511
512     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
513         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
514         // (a + x * 3 / 8) / x
515         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
516         s->inter_quant_bias = 0;
517     } else {
518         s->intra_quant_bias = 0;
519         // (a - x / 4) / x
520         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
521     }
522
523     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
524         s->intra_quant_bias = avctx->intra_quant_bias;
525     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
526         s->inter_quant_bias = avctx->inter_quant_bias;
527
528     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
529         s->avctx->time_base.den > (1 << 16) - 1) {
530         av_log(avctx, AV_LOG_ERROR,
531                "timebase %d/%d not supported by MPEG 4 standard, "
532                "the maximum admitted value for the timebase denominator "
533                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
534                (1 << 16) - 1);
535         return -1;
536     }
537     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
538
539     switch (avctx->codec->id) {
540     case AV_CODEC_ID_MPEG1VIDEO:
541         s->out_format = FMT_MPEG1;
542         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
543         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
544         break;
545     case AV_CODEC_ID_MPEG2VIDEO:
546         s->out_format = FMT_MPEG1;
547         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
548         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
549         s->rtp_mode   = 1;
550         break;
551     case AV_CODEC_ID_MJPEG:
552         s->out_format = FMT_MJPEG;
553         s->intra_only = 1; /* force intra only for jpeg */
554         if (!CONFIG_MJPEG_ENCODER ||
555             ff_mjpeg_encode_init(s) < 0)
556             return -1;
557         avctx->delay = 0;
558         s->low_delay = 1;
559         break;
560     case AV_CODEC_ID_H261:
561         if (!CONFIG_H261_ENCODER)
562             return -1;
563         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
564             av_log(avctx, AV_LOG_ERROR,
565                    "The specified picture size of %dx%d is not valid for the "
566                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
567                     s->width, s->height);
568             return -1;
569         }
570         s->out_format = FMT_H261;
571         avctx->delay  = 0;
572         s->low_delay  = 1;
573         break;
574     case AV_CODEC_ID_H263:
575         if (!CONFIG_H263_ENCODER)
576         return -1;
577         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
578                              s->width, s->height) == 8) {
579             av_log(avctx, AV_LOG_INFO,
580                    "The specified picture size of %dx%d is not valid for "
581                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
582                    "352x288, 704x576, and 1408x1152."
583                    "Try H.263+.\n", s->width, s->height);
584             return -1;
585         }
586         s->out_format = FMT_H263;
587         avctx->delay  = 0;
588         s->low_delay  = 1;
589         break;
590     case AV_CODEC_ID_H263P:
591         s->out_format = FMT_H263;
592         s->h263_plus  = 1;
593         /* Fx */
594         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
595         s->modified_quant  = s->h263_aic;
596         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
597         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
598
599         /* /Fx */
600         /* These are just to be sure */
601         avctx->delay = 0;
602         s->low_delay = 1;
603         break;
604     case AV_CODEC_ID_FLV1:
605         s->out_format      = FMT_H263;
606         s->h263_flv        = 2; /* format = 1; 11-bit codes */
607         s->unrestricted_mv = 1;
608         s->rtp_mode  = 0; /* don't allow GOB */
609         avctx->delay = 0;
610         s->low_delay = 1;
611         break;
612     case AV_CODEC_ID_RV10:
613         s->out_format = FMT_H263;
614         avctx->delay  = 0;
615         s->low_delay  = 1;
616         break;
617     case AV_CODEC_ID_RV20:
618         s->out_format      = FMT_H263;
619         avctx->delay       = 0;
620         s->low_delay       = 1;
621         s->modified_quant  = 1;
622         s->h263_aic        = 1;
623         s->h263_plus       = 1;
624         s->loop_filter     = 1;
625         s->unrestricted_mv = 0;
626         break;
627     case AV_CODEC_ID_MPEG4:
628         s->out_format      = FMT_H263;
629         s->h263_pred       = 1;
630         s->unrestricted_mv = 1;
631         s->low_delay       = s->max_b_frames ? 0 : 1;
632         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
633         break;
634     case AV_CODEC_ID_MSMPEG4V2:
635         s->out_format      = FMT_H263;
636         s->h263_pred       = 1;
637         s->unrestricted_mv = 1;
638         s->msmpeg4_version = 2;
639         avctx->delay       = 0;
640         s->low_delay       = 1;
641         break;
642     case AV_CODEC_ID_MSMPEG4V3:
643         s->out_format        = FMT_H263;
644         s->h263_pred         = 1;
645         s->unrestricted_mv   = 1;
646         s->msmpeg4_version   = 3;
647         s->flipflop_rounding = 1;
648         avctx->delay         = 0;
649         s->low_delay         = 1;
650         break;
651     case AV_CODEC_ID_WMV1:
652         s->out_format        = FMT_H263;
653         s->h263_pred         = 1;
654         s->unrestricted_mv   = 1;
655         s->msmpeg4_version   = 4;
656         s->flipflop_rounding = 1;
657         avctx->delay         = 0;
658         s->low_delay         = 1;
659         break;
660     case AV_CODEC_ID_WMV2:
661         s->out_format        = FMT_H263;
662         s->h263_pred         = 1;
663         s->unrestricted_mv   = 1;
664         s->msmpeg4_version   = 5;
665         s->flipflop_rounding = 1;
666         avctx->delay         = 0;
667         s->low_delay         = 1;
668         break;
669     default:
670         return -1;
671     }
672
673     avctx->has_b_frames = !s->low_delay;
674
675     s->encoding = 1;
676
677     s->progressive_frame    =
678     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
679                                                 CODEC_FLAG_INTERLACED_ME) ||
680                                 s->alternate_scan);
681
682     /* init */
683     if (ff_MPV_common_init(s) < 0)
684         return -1;
685
686     if (ARCH_X86)
687         ff_MPV_encode_init_x86(s);
688
689     s->avctx->coded_frame = &s->current_picture.f;
690
691     if (s->msmpeg4_version) {
692         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
693                           2 * 2 * (MAX_LEVEL + 1) *
694                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
695     }
696     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
697
698     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
699     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
700     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
701     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
702     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
703                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
704     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
705                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
706
707     if (s->avctx->noise_reduction) {
708         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
709                           2 * 64 * sizeof(uint16_t), fail);
710     }
711
712     if (CONFIG_H263_ENCODER)
713         ff_h263dsp_init(&s->h263dsp);
714     if (!s->dct_quantize)
715         s->dct_quantize = ff_dct_quantize_c;
716     if (!s->denoise_dct)
717         s->denoise_dct  = denoise_dct_c;
718     s->fast_dct_quantize = s->dct_quantize;
719     if (avctx->trellis)
720         s->dct_quantize  = dct_quantize_trellis_c;
721
722     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
723         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
724
725     s->quant_precision = 5;
726
727     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
728     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
729
730     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
731         ff_h261_encode_init(s);
732     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
733         ff_h263_encode_init(s);
734     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
735         ff_msmpeg4_encode_init(s);
736     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
737         && s->out_format == FMT_MPEG1)
738         ff_mpeg1_encode_init(s);
739
740     /* init q matrix */
741     for (i = 0; i < 64; i++) {
742         int j = s->dsp.idct_permutation[i];
743         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
744             s->mpeg_quant) {
745             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
746             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
747         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
748             s->intra_matrix[j] =
749             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
750         } else {
751             /* mpeg1/2 */
752             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
753             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
754         }
755         if (s->avctx->intra_matrix)
756             s->intra_matrix[j] = s->avctx->intra_matrix[i];
757         if (s->avctx->inter_matrix)
758             s->inter_matrix[j] = s->avctx->inter_matrix[i];
759     }
760
761     /* precompute matrix */
762     /* for mjpeg, we do include qscale in the matrix */
763     if (s->out_format != FMT_MJPEG) {
764         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
765                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
766                           31, 1);
767         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
768                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
769                           31, 0);
770     }
771
772     if (ff_rate_control_init(s) < 0)
773         return -1;
774
775 #if FF_API_ERROR_RATE
776     FF_DISABLE_DEPRECATION_WARNINGS
777     if (avctx->error_rate)
778         s->error_rate = avctx->error_rate;
779     FF_ENABLE_DEPRECATION_WARNINGS;
780 #endif
781
782     if (avctx->b_frame_strategy == 2) {
783         for (i = 0; i < s->max_b_frames + 2; i++) {
784             s->tmp_frames[i] = av_frame_alloc();
785             if (!s->tmp_frames[i])
786                 return AVERROR(ENOMEM);
787
788             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
789             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
790             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
791
792             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
793             if (ret < 0)
794                 return ret;
795         }
796     }
797
798     return 0;
799 fail:
800     ff_MPV_encode_end(avctx);
801     return AVERROR_UNKNOWN;
802 }
803
804 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
805 {
806     MpegEncContext *s = avctx->priv_data;
807     int i;
808
809     ff_rate_control_uninit(s);
810
811     ff_MPV_common_end(s);
812     if (CONFIG_MJPEG_ENCODER &&
813         s->out_format == FMT_MJPEG)
814         ff_mjpeg_encode_close(s);
815
816     av_freep(&avctx->extradata);
817
818     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
819         av_frame_free(&s->tmp_frames[i]);
820
821     ff_free_picture_tables(&s->new_picture);
822     ff_mpeg_unref_picture(s, &s->new_picture);
823
824     av_freep(&s->avctx->stats_out);
825     av_freep(&s->ac_stats);
826
827     av_freep(&s->q_intra_matrix);
828     av_freep(&s->q_inter_matrix);
829     av_freep(&s->q_intra_matrix16);
830     av_freep(&s->q_inter_matrix16);
831     av_freep(&s->input_picture);
832     av_freep(&s->reordered_input_picture);
833     av_freep(&s->dct_offset);
834
835     return 0;
836 }
837
838 static int get_sae(uint8_t *src, int ref, int stride)
839 {
840     int x,y;
841     int acc = 0;
842
843     for (y = 0; y < 16; y++) {
844         for (x = 0; x < 16; x++) {
845             acc += FFABS(src[x + y * stride] - ref);
846         }
847     }
848
849     return acc;
850 }
851
852 static int get_intra_count(MpegEncContext *s, uint8_t *src,
853                            uint8_t *ref, int stride)
854 {
855     int x, y, w, h;
856     int acc = 0;
857
858     w = s->width  & ~15;
859     h = s->height & ~15;
860
861     for (y = 0; y < h; y += 16) {
862         for (x = 0; x < w; x += 16) {
863             int offset = x + y * stride;
864             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
865                                      16);
866             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
867             int sae  = get_sae(src + offset, mean, stride);
868
869             acc += sae + 500 < sad;
870         }
871     }
872     return acc;
873 }
874
875
876 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
877 {
878     Picture *pic = NULL;
879     int64_t pts;
880     int i, display_picture_number = 0, ret;
881     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
882                                                  (s->low_delay ? 0 : 1);
883     int direct = 1;
884
885     if (pic_arg) {
886         pts = pic_arg->pts;
887         display_picture_number = s->input_picture_number++;
888
889         if (pts != AV_NOPTS_VALUE) {
890             if (s->user_specified_pts != AV_NOPTS_VALUE) {
891                 int64_t time = pts;
892                 int64_t last = s->user_specified_pts;
893
894                 if (time <= last) {
895                     av_log(s->avctx, AV_LOG_ERROR,
896                            "Error, Invalid timestamp=%"PRId64", "
897                            "last=%"PRId64"\n", pts, s->user_specified_pts);
898                     return -1;
899                 }
900
901                 if (!s->low_delay && display_picture_number == 1)
902                     s->dts_delta = time - last;
903             }
904             s->user_specified_pts = pts;
905         } else {
906             if (s->user_specified_pts != AV_NOPTS_VALUE) {
907                 s->user_specified_pts =
908                 pts = s->user_specified_pts + 1;
909                 av_log(s->avctx, AV_LOG_INFO,
910                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
911                        pts);
912             } else {
913                 pts = display_picture_number;
914             }
915         }
916     }
917
918     if (pic_arg) {
919         if (!pic_arg->buf[0]);
920             direct = 0;
921         if (pic_arg->linesize[0] != s->linesize)
922             direct = 0;
923         if (pic_arg->linesize[1] != s->uvlinesize)
924             direct = 0;
925         if (pic_arg->linesize[2] != s->uvlinesize)
926             direct = 0;
927
928         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
929                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
930
931         if (direct) {
932             i = ff_find_unused_picture(s, 1);
933             if (i < 0)
934                 return i;
935
936             pic = &s->picture[i];
937             pic->reference = 3;
938
939             if ((ret = av_frame_ref(&pic->f, pic_arg)) < 0)
940                 return ret;
941             if (ff_alloc_picture(s, pic, 1) < 0) {
942                 return -1;
943             }
944         } else {
945             i = ff_find_unused_picture(s, 0);
946             if (i < 0)
947                 return i;
948
949             pic = &s->picture[i];
950             pic->reference = 3;
951
952             if (ff_alloc_picture(s, pic, 0) < 0) {
953                 return -1;
954             }
955
956             if (pic->f.data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
957                 pic->f.data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
958                 pic->f.data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
959                 // empty
960             } else {
961                 int h_chroma_shift, v_chroma_shift;
962                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
963                                                  &h_chroma_shift,
964                                                  &v_chroma_shift);
965
966                 for (i = 0; i < 3; i++) {
967                     int src_stride = pic_arg->linesize[i];
968                     int dst_stride = i ? s->uvlinesize : s->linesize;
969                     int h_shift = i ? h_chroma_shift : 0;
970                     int v_shift = i ? v_chroma_shift : 0;
971                     int w = s->width  >> h_shift;
972                     int h = s->height >> v_shift;
973                     uint8_t *src = pic_arg->data[i];
974                     uint8_t *dst = pic->f.data[i];
975
976                     if (!s->avctx->rc_buffer_size)
977                         dst += INPLACE_OFFSET;
978
979                     if (src_stride == dst_stride)
980                         memcpy(dst, src, src_stride * h);
981                     else {
982                         while (h--) {
983                             memcpy(dst, src, w);
984                             dst += dst_stride;
985                             src += src_stride;
986                         }
987                     }
988                 }
989             }
990         }
991         ret = av_frame_copy_props(&pic->f, pic_arg);
992         if (ret < 0)
993             return ret;
994
995         pic->f.display_picture_number = display_picture_number;
996         pic->f.pts = pts; // we set this here to avoid modifiying pic_arg
997     }
998
999     /* shift buffer entries */
1000     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1001         s->input_picture[i - 1] = s->input_picture[i];
1002
1003     s->input_picture[encoding_delay] = (Picture*) pic;
1004
1005     return 0;
1006 }
1007
1008 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1009 {
1010     int x, y, plane;
1011     int score = 0;
1012     int64_t score64 = 0;
1013
1014     for (plane = 0; plane < 3; plane++) {
1015         const int stride = p->f.linesize[plane];
1016         const int bw = plane ? 1 : 2;
1017         for (y = 0; y < s->mb_height * bw; y++) {
1018             for (x = 0; x < s->mb_width * bw; x++) {
1019                 int off = p->shared ? 0 : 16;
1020                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1021                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1022                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1023
1024                 switch (s->avctx->frame_skip_exp) {
1025                 case 0: score    =  FFMAX(score, v);          break;
1026                 case 1: score   += FFABS(v);                  break;
1027                 case 2: score   += v * v;                     break;
1028                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1029                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1030                 }
1031             }
1032         }
1033     }
1034
1035     if (score)
1036         score64 = score;
1037
1038     if (score64 < s->avctx->frame_skip_threshold)
1039         return 1;
1040     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1041         return 1;
1042     return 0;
1043 }
1044
1045 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1046 {
1047     AVPacket pkt = { 0 };
1048     int ret, got_output;
1049
1050     av_init_packet(&pkt);
1051     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1052     if (ret < 0)
1053         return ret;
1054
1055     ret = pkt.size;
1056     av_free_packet(&pkt);
1057     return ret;
1058 }
1059
1060 static int estimate_best_b_count(MpegEncContext *s)
1061 {
1062     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1063     AVCodecContext *c = avcodec_alloc_context3(NULL);
1064     const int scale = s->avctx->brd_scale;
1065     int i, j, out_size, p_lambda, b_lambda, lambda2;
1066     int64_t best_rd  = INT64_MAX;
1067     int best_b_count = -1;
1068
1069     assert(scale >= 0 && scale <= 3);
1070
1071     //emms_c();
1072     //s->next_picture_ptr->quality;
1073     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1074     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1075     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1076     if (!b_lambda) // FIXME we should do this somewhere else
1077         b_lambda = p_lambda;
1078     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1079                FF_LAMBDA_SHIFT;
1080
1081     c->width        = s->width  >> scale;
1082     c->height       = s->height >> scale;
1083     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1084                       CODEC_FLAG_INPUT_PRESERVED;
1085     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1086     c->mb_decision  = s->avctx->mb_decision;
1087     c->me_cmp       = s->avctx->me_cmp;
1088     c->mb_cmp       = s->avctx->mb_cmp;
1089     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1090     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1091     c->time_base    = s->avctx->time_base;
1092     c->max_b_frames = s->max_b_frames;
1093
1094     if (avcodec_open2(c, codec, NULL) < 0)
1095         return -1;
1096
1097     for (i = 0; i < s->max_b_frames + 2; i++) {
1098         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1099                                                 s->next_picture_ptr;
1100
1101         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1102             pre_input = *pre_input_ptr;
1103
1104             if (!pre_input.shared && i) {
1105                 pre_input.f.data[0] += INPLACE_OFFSET;
1106                 pre_input.f.data[1] += INPLACE_OFFSET;
1107                 pre_input.f.data[2] += INPLACE_OFFSET;
1108             }
1109
1110             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1111                                  pre_input.f.data[0], pre_input.f.linesize[0],
1112                                  c->width,      c->height);
1113             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1114                                  pre_input.f.data[1], pre_input.f.linesize[1],
1115                                  c->width >> 1, c->height >> 1);
1116             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1117                                  pre_input.f.data[2], pre_input.f.linesize[2],
1118                                  c->width >> 1, c->height >> 1);
1119         }
1120     }
1121
1122     for (j = 0; j < s->max_b_frames + 1; j++) {
1123         int64_t rd = 0;
1124
1125         if (!s->input_picture[j])
1126             break;
1127
1128         c->error[0] = c->error[1] = c->error[2] = 0;
1129
1130         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1131         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1132
1133         out_size = encode_frame(c, s->tmp_frames[0]);
1134
1135         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1136
1137         for (i = 0; i < s->max_b_frames + 1; i++) {
1138             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1139
1140             s->tmp_frames[i + 1]->pict_type = is_p ?
1141                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1142             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1143
1144             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1145
1146             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1147         }
1148
1149         /* get the delayed frames */
1150         while (out_size) {
1151             out_size = encode_frame(c, NULL);
1152             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1153         }
1154
1155         rd += c->error[0] + c->error[1] + c->error[2];
1156
1157         if (rd < best_rd) {
1158             best_rd = rd;
1159             best_b_count = j;
1160         }
1161     }
1162
1163     avcodec_close(c);
1164     av_freep(&c);
1165
1166     return best_b_count;
1167 }
1168
1169 static int select_input_picture(MpegEncContext *s)
1170 {
1171     int i, ret;
1172
1173     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1174         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1175     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1176
1177     /* set next picture type & ordering */
1178     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1179         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1180             s->next_picture_ptr == NULL || s->intra_only) {
1181             s->reordered_input_picture[0] = s->input_picture[0];
1182             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1183             s->reordered_input_picture[0]->f.coded_picture_number =
1184                 s->coded_picture_number++;
1185         } else {
1186             int b_frames;
1187
1188             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1189                 if (s->picture_in_gop_number < s->gop_size &&
1190                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1191                     // FIXME check that te gop check above is +-1 correct
1192                     av_frame_unref(&s->input_picture[0]->f);
1193
1194                     emms_c();
1195                     ff_vbv_update(s, 0);
1196
1197                     goto no_output_pic;
1198                 }
1199             }
1200
1201             if (s->flags & CODEC_FLAG_PASS2) {
1202                 for (i = 0; i < s->max_b_frames + 1; i++) {
1203                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1204
1205                     if (pict_num >= s->rc_context.num_entries)
1206                         break;
1207                     if (!s->input_picture[i]) {
1208                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1209                         break;
1210                     }
1211
1212                     s->input_picture[i]->f.pict_type =
1213                         s->rc_context.entry[pict_num].new_pict_type;
1214                 }
1215             }
1216
1217             if (s->avctx->b_frame_strategy == 0) {
1218                 b_frames = s->max_b_frames;
1219                 while (b_frames && !s->input_picture[b_frames])
1220                     b_frames--;
1221             } else if (s->avctx->b_frame_strategy == 1) {
1222                 for (i = 1; i < s->max_b_frames + 1; i++) {
1223                     if (s->input_picture[i] &&
1224                         s->input_picture[i]->b_frame_score == 0) {
1225                         s->input_picture[i]->b_frame_score =
1226                             get_intra_count(s,
1227                                             s->input_picture[i    ]->f.data[0],
1228                                             s->input_picture[i - 1]->f.data[0],
1229                                             s->linesize) + 1;
1230                     }
1231                 }
1232                 for (i = 0; i < s->max_b_frames + 1; i++) {
1233                     if (s->input_picture[i] == NULL ||
1234                         s->input_picture[i]->b_frame_score - 1 >
1235                             s->mb_num / s->avctx->b_sensitivity)
1236                         break;
1237                 }
1238
1239                 b_frames = FFMAX(0, i - 1);
1240
1241                 /* reset scores */
1242                 for (i = 0; i < b_frames + 1; i++) {
1243                     s->input_picture[i]->b_frame_score = 0;
1244                 }
1245             } else if (s->avctx->b_frame_strategy == 2) {
1246                 b_frames = estimate_best_b_count(s);
1247             } else {
1248                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1249                 b_frames = 0;
1250             }
1251
1252             emms_c();
1253
1254             for (i = b_frames - 1; i >= 0; i--) {
1255                 int type = s->input_picture[i]->f.pict_type;
1256                 if (type && type != AV_PICTURE_TYPE_B)
1257                     b_frames = i;
1258             }
1259             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1260                 b_frames == s->max_b_frames) {
1261                 av_log(s->avctx, AV_LOG_ERROR,
1262                        "warning, too many b frames in a row\n");
1263             }
1264
1265             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1266                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1267                     s->gop_size > s->picture_in_gop_number) {
1268                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1269                 } else {
1270                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1271                         b_frames = 0;
1272                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1273                 }
1274             }
1275
1276             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1277                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1278                 b_frames--;
1279
1280             s->reordered_input_picture[0] = s->input_picture[b_frames];
1281             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1282                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1283             s->reordered_input_picture[0]->f.coded_picture_number =
1284                 s->coded_picture_number++;
1285             for (i = 0; i < b_frames; i++) {
1286                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1287                 s->reordered_input_picture[i + 1]->f.pict_type =
1288                     AV_PICTURE_TYPE_B;
1289                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1290                     s->coded_picture_number++;
1291             }
1292         }
1293     }
1294 no_output_pic:
1295     if (s->reordered_input_picture[0]) {
1296         s->reordered_input_picture[0]->reference =
1297            s->reordered_input_picture[0]->f.pict_type !=
1298                AV_PICTURE_TYPE_B ? 3 : 0;
1299
1300         ff_mpeg_unref_picture(s, &s->new_picture);
1301         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1302             return ret;
1303
1304         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1305             // input is a shared pix, so we can't modifiy it -> alloc a new
1306             // one & ensure that the shared one is reuseable
1307
1308             Picture *pic;
1309             int i = ff_find_unused_picture(s, 0);
1310             if (i < 0)
1311                 return i;
1312             pic = &s->picture[i];
1313
1314             pic->reference = s->reordered_input_picture[0]->reference;
1315             if (ff_alloc_picture(s, pic, 0) < 0) {
1316                 return -1;
1317             }
1318
1319             ret = av_frame_copy_props(&pic->f, &s->reordered_input_picture[0]->f);
1320             if (ret < 0)
1321                 return ret;
1322
1323             /* mark us unused / free shared pic */
1324             av_frame_unref(&s->reordered_input_picture[0]->f);
1325             s->reordered_input_picture[0]->shared = 0;
1326
1327             s->current_picture_ptr = pic;
1328         } else {
1329             // input is not a shared pix -> reuse buffer for current_pix
1330             s->current_picture_ptr = s->reordered_input_picture[0];
1331             for (i = 0; i < 4; i++) {
1332                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1333             }
1334         }
1335         ff_mpeg_unref_picture(s, &s->current_picture);
1336         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1337                                        s->current_picture_ptr)) < 0)
1338             return ret;
1339
1340         s->picture_number = s->new_picture.f.display_picture_number;
1341     } else {
1342         ff_mpeg_unref_picture(s, &s->new_picture);
1343     }
1344     return 0;
1345 }
1346
1347 static void frame_end(MpegEncContext *s)
1348 {
1349     int i;
1350
1351     if (s->unrestricted_mv &&
1352         s->current_picture.reference &&
1353         !s->intra_only) {
1354         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1355         int hshift = desc->log2_chroma_w;
1356         int vshift = desc->log2_chroma_h;
1357         s->dsp.draw_edges(s->current_picture.f.data[0], s->linesize,
1358                           s->h_edge_pos, s->v_edge_pos,
1359                           EDGE_WIDTH, EDGE_WIDTH,
1360                           EDGE_TOP | EDGE_BOTTOM);
1361         s->dsp.draw_edges(s->current_picture.f.data[1], s->uvlinesize,
1362                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1363                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1364                           EDGE_TOP | EDGE_BOTTOM);
1365         s->dsp.draw_edges(s->current_picture.f.data[2], s->uvlinesize,
1366                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1367                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1368                           EDGE_TOP | EDGE_BOTTOM);
1369     }
1370
1371     emms_c();
1372
1373     s->last_pict_type                 = s->pict_type;
1374     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f.quality;
1375     if (s->pict_type!= AV_PICTURE_TYPE_B)
1376         s->last_non_b_pict_type = s->pict_type;
1377
1378     if (s->encoding) {
1379         /* release non-reference frames */
1380         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1381             if (!s->picture[i].reference)
1382                 ff_mpeg_unref_picture(s, &s->picture[i]);
1383         }
1384     }
1385
1386     s->avctx->coded_frame = &s->current_picture_ptr->f;
1387
1388 }
1389
1390 static void update_noise_reduction(MpegEncContext *s)
1391 {
1392     int intra, i;
1393
1394     for (intra = 0; intra < 2; intra++) {
1395         if (s->dct_count[intra] > (1 << 16)) {
1396             for (i = 0; i < 64; i++) {
1397                 s->dct_error_sum[intra][i] >>= 1;
1398             }
1399             s->dct_count[intra] >>= 1;
1400         }
1401
1402         for (i = 0; i < 64; i++) {
1403             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1404                                        s->dct_count[intra] +
1405                                        s->dct_error_sum[intra][i] / 2) /
1406                                       (s->dct_error_sum[intra][i] + 1);
1407         }
1408     }
1409 }
1410
1411 static int frame_start(MpegEncContext *s)
1412 {
1413     int ret;
1414
1415     /* mark & release old frames */
1416     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1417         s->last_picture_ptr != s->next_picture_ptr &&
1418         s->last_picture_ptr->f.buf[0]) {
1419         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1420     }
1421
1422     s->current_picture_ptr->f.pict_type = s->pict_type;
1423     s->current_picture_ptr->f.key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1424
1425     ff_mpeg_unref_picture(s, &s->current_picture);
1426     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1427                                    s->current_picture_ptr)) < 0)
1428         return ret;
1429
1430     if (s->pict_type != AV_PICTURE_TYPE_B) {
1431         s->last_picture_ptr = s->next_picture_ptr;
1432         if (!s->droppable)
1433             s->next_picture_ptr = s->current_picture_ptr;
1434     }
1435
1436     if (s->last_picture_ptr) {
1437         ff_mpeg_unref_picture(s, &s->last_picture);
1438         if (s->last_picture_ptr->f.buf[0] &&
1439             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1440                                        s->last_picture_ptr)) < 0)
1441             return ret;
1442     }
1443     if (s->next_picture_ptr) {
1444         ff_mpeg_unref_picture(s, &s->next_picture);
1445         if (s->next_picture_ptr->f.buf[0] &&
1446             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1447                                        s->next_picture_ptr)) < 0)
1448             return ret;
1449     }
1450
1451     if (s->picture_structure!= PICT_FRAME) {
1452         int i;
1453         for (i = 0; i < 4; i++) {
1454             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1455                 s->current_picture.f.data[i] +=
1456                     s->current_picture.f.linesize[i];
1457             }
1458             s->current_picture.f.linesize[i] *= 2;
1459             s->last_picture.f.linesize[i]    *= 2;
1460             s->next_picture.f.linesize[i]    *= 2;
1461         }
1462     }
1463
1464     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1465         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1466         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1467     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1468         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1469         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1470     } else {
1471         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1472         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1473     }
1474
1475     if (s->dct_error_sum) {
1476         assert(s->avctx->noise_reduction && s->encoding);
1477         update_noise_reduction(s);
1478     }
1479
1480     return 0;
1481 }
1482
1483 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1484                           const AVFrame *pic_arg, int *got_packet)
1485 {
1486     MpegEncContext *s = avctx->priv_data;
1487     int i, stuffing_count, ret;
1488     int context_count = s->slice_context_count;
1489
1490     s->picture_in_gop_number++;
1491
1492     if (load_input_picture(s, pic_arg) < 0)
1493         return -1;
1494
1495     if (select_input_picture(s) < 0) {
1496         return -1;
1497     }
1498
1499     /* output? */
1500     if (s->new_picture.f.data[0]) {
1501         if (!pkt->data &&
1502             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1503             return ret;
1504         if (s->mb_info) {
1505             s->mb_info_ptr = av_packet_new_side_data(pkt,
1506                                  AV_PKT_DATA_H263_MB_INFO,
1507                                  s->mb_width*s->mb_height*12);
1508             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1509         }
1510
1511         for (i = 0; i < context_count; i++) {
1512             int start_y = s->thread_context[i]->start_mb_y;
1513             int   end_y = s->thread_context[i]->  end_mb_y;
1514             int h       = s->mb_height;
1515             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1516             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1517
1518             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1519         }
1520
1521         s->pict_type = s->new_picture.f.pict_type;
1522         //emms_c();
1523         ret = frame_start(s);
1524         if (ret < 0)
1525             return ret;
1526 vbv_retry:
1527         if (encode_picture(s, s->picture_number) < 0)
1528             return -1;
1529
1530         avctx->header_bits = s->header_bits;
1531         avctx->mv_bits     = s->mv_bits;
1532         avctx->misc_bits   = s->misc_bits;
1533         avctx->i_tex_bits  = s->i_tex_bits;
1534         avctx->p_tex_bits  = s->p_tex_bits;
1535         avctx->i_count     = s->i_count;
1536         // FIXME f/b_count in avctx
1537         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1538         avctx->skip_count  = s->skip_count;
1539
1540         frame_end(s);
1541
1542         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1543             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1544
1545         if (avctx->rc_buffer_size) {
1546             RateControlContext *rcc = &s->rc_context;
1547             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1548
1549             if (put_bits_count(&s->pb) > max_size &&
1550                 s->lambda < s->avctx->lmax) {
1551                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1552                                        (s->qscale + 1) / s->qscale);
1553                 if (s->adaptive_quant) {
1554                     int i;
1555                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1556                         s->lambda_table[i] =
1557                             FFMAX(s->lambda_table[i] + 1,
1558                                   s->lambda_table[i] * (s->qscale + 1) /
1559                                   s->qscale);
1560                 }
1561                 s->mb_skipped = 0;        // done in frame_start()
1562                 // done in encode_picture() so we must undo it
1563                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1564                     if (s->flipflop_rounding          ||
1565                         s->codec_id == AV_CODEC_ID_H263P ||
1566                         s->codec_id == AV_CODEC_ID_MPEG4)
1567                         s->no_rounding ^= 1;
1568                 }
1569                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1570                     s->time_base       = s->last_time_base;
1571                     s->last_non_b_time = s->time - s->pp_time;
1572                 }
1573                 for (i = 0; i < context_count; i++) {
1574                     PutBitContext *pb = &s->thread_context[i]->pb;
1575                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1576                 }
1577                 goto vbv_retry;
1578             }
1579
1580             assert(s->avctx->rc_max_rate);
1581         }
1582
1583         if (s->flags & CODEC_FLAG_PASS1)
1584             ff_write_pass1_stats(s);
1585
1586         for (i = 0; i < 4; i++) {
1587             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1588             avctx->error[i] += s->current_picture_ptr->f.error[i];
1589         }
1590
1591         if (s->flags & CODEC_FLAG_PASS1)
1592             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1593                    avctx->i_tex_bits + avctx->p_tex_bits ==
1594                        put_bits_count(&s->pb));
1595         flush_put_bits(&s->pb);
1596         s->frame_bits  = put_bits_count(&s->pb);
1597
1598         stuffing_count = ff_vbv_update(s, s->frame_bits);
1599         if (stuffing_count) {
1600             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1601                     stuffing_count + 50) {
1602                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1603                 return -1;
1604             }
1605
1606             switch (s->codec_id) {
1607             case AV_CODEC_ID_MPEG1VIDEO:
1608             case AV_CODEC_ID_MPEG2VIDEO:
1609                 while (stuffing_count--) {
1610                     put_bits(&s->pb, 8, 0);
1611                 }
1612             break;
1613             case AV_CODEC_ID_MPEG4:
1614                 put_bits(&s->pb, 16, 0);
1615                 put_bits(&s->pb, 16, 0x1C3);
1616                 stuffing_count -= 4;
1617                 while (stuffing_count--) {
1618                     put_bits(&s->pb, 8, 0xFF);
1619                 }
1620             break;
1621             default:
1622                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1623             }
1624             flush_put_bits(&s->pb);
1625             s->frame_bits  = put_bits_count(&s->pb);
1626         }
1627
1628         /* update mpeg1/2 vbv_delay for CBR */
1629         if (s->avctx->rc_max_rate                          &&
1630             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1631             s->out_format == FMT_MPEG1                     &&
1632             90000LL * (avctx->rc_buffer_size - 1) <=
1633                 s->avctx->rc_max_rate * 0xFFFFLL) {
1634             int vbv_delay, min_delay;
1635             double inbits  = s->avctx->rc_max_rate *
1636                              av_q2d(s->avctx->time_base);
1637             int    minbits = s->frame_bits - 8 *
1638                              (s->vbv_delay_ptr - s->pb.buf - 1);
1639             double bits    = s->rc_context.buffer_index + minbits - inbits;
1640
1641             if (bits < 0)
1642                 av_log(s->avctx, AV_LOG_ERROR,
1643                        "Internal error, negative bits\n");
1644
1645             assert(s->repeat_first_field == 0);
1646
1647             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1648             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1649                         s->avctx->rc_max_rate;
1650
1651             vbv_delay = FFMAX(vbv_delay, min_delay);
1652
1653             assert(vbv_delay < 0xFFFF);
1654
1655             s->vbv_delay_ptr[0] &= 0xF8;
1656             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1657             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1658             s->vbv_delay_ptr[2] &= 0x07;
1659             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1660             avctx->vbv_delay     = vbv_delay * 300;
1661         }
1662         s->total_bits     += s->frame_bits;
1663         avctx->frame_bits  = s->frame_bits;
1664
1665         pkt->pts = s->current_picture.f.pts;
1666         if (!s->low_delay) {
1667             if (!s->current_picture.f.coded_picture_number)
1668                 pkt->dts = pkt->pts - s->dts_delta;
1669             else
1670                 pkt->dts = s->reordered_pts;
1671             s->reordered_pts = s->input_picture[0]->f.pts;
1672         } else
1673             pkt->dts = pkt->pts;
1674         if (s->current_picture.f.key_frame)
1675             pkt->flags |= AV_PKT_FLAG_KEY;
1676         if (s->mb_info)
1677             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1678     } else {
1679         s->frame_bits = 0;
1680     }
1681     assert((s->frame_bits & 7) == 0);
1682
1683     pkt->size = s->frame_bits / 8;
1684     *got_packet = !!pkt->size;
1685     return 0;
1686 }
1687
1688 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1689                                                 int n, int threshold)
1690 {
1691     static const char tab[64] = {
1692         3, 2, 2, 1, 1, 1, 1, 1,
1693         1, 1, 1, 1, 1, 1, 1, 1,
1694         1, 1, 1, 1, 1, 1, 1, 1,
1695         0, 0, 0, 0, 0, 0, 0, 0,
1696         0, 0, 0, 0, 0, 0, 0, 0,
1697         0, 0, 0, 0, 0, 0, 0, 0,
1698         0, 0, 0, 0, 0, 0, 0, 0,
1699         0, 0, 0, 0, 0, 0, 0, 0
1700     };
1701     int score = 0;
1702     int run = 0;
1703     int i;
1704     int16_t *block = s->block[n];
1705     const int last_index = s->block_last_index[n];
1706     int skip_dc;
1707
1708     if (threshold < 0) {
1709         skip_dc = 0;
1710         threshold = -threshold;
1711     } else
1712         skip_dc = 1;
1713
1714     /* Are all we could set to zero already zero? */
1715     if (last_index <= skip_dc - 1)
1716         return;
1717
1718     for (i = 0; i <= last_index; i++) {
1719         const int j = s->intra_scantable.permutated[i];
1720         const int level = FFABS(block[j]);
1721         if (level == 1) {
1722             if (skip_dc && i == 0)
1723                 continue;
1724             score += tab[run];
1725             run = 0;
1726         } else if (level > 1) {
1727             return;
1728         } else {
1729             run++;
1730         }
1731     }
1732     if (score >= threshold)
1733         return;
1734     for (i = skip_dc; i <= last_index; i++) {
1735         const int j = s->intra_scantable.permutated[i];
1736         block[j] = 0;
1737     }
1738     if (block[0])
1739         s->block_last_index[n] = 0;
1740     else
1741         s->block_last_index[n] = -1;
1742 }
1743
1744 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1745                                int last_index)
1746 {
1747     int i;
1748     const int maxlevel = s->max_qcoeff;
1749     const int minlevel = s->min_qcoeff;
1750     int overflow = 0;
1751
1752     if (s->mb_intra) {
1753         i = 1; // skip clipping of intra dc
1754     } else
1755         i = 0;
1756
1757     for (; i <= last_index; i++) {
1758         const int j = s->intra_scantable.permutated[i];
1759         int level = block[j];
1760
1761         if (level > maxlevel) {
1762             level = maxlevel;
1763             overflow++;
1764         } else if (level < minlevel) {
1765             level = minlevel;
1766             overflow++;
1767         }
1768
1769         block[j] = level;
1770     }
1771
1772     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1773         av_log(s->avctx, AV_LOG_INFO,
1774                "warning, clipping %d dct coefficients to %d..%d\n",
1775                overflow, minlevel, maxlevel);
1776 }
1777
1778 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1779 {
1780     int x, y;
1781     // FIXME optimize
1782     for (y = 0; y < 8; y++) {
1783         for (x = 0; x < 8; x++) {
1784             int x2, y2;
1785             int sum = 0;
1786             int sqr = 0;
1787             int count = 0;
1788
1789             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1790                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1791                     int v = ptr[x2 + y2 * stride];
1792                     sum += v;
1793                     sqr += v * v;
1794                     count++;
1795                 }
1796             }
1797             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1798         }
1799     }
1800 }
1801
1802 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1803                                                 int motion_x, int motion_y,
1804                                                 int mb_block_height,
1805                                                 int mb_block_count)
1806 {
1807     int16_t weight[8][64];
1808     int16_t orig[8][64];
1809     const int mb_x = s->mb_x;
1810     const int mb_y = s->mb_y;
1811     int i;
1812     int skip_dct[8];
1813     int dct_offset = s->linesize * 8; // default for progressive frames
1814     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1815     ptrdiff_t wrap_y, wrap_c;
1816
1817     for (i = 0; i < mb_block_count; i++)
1818         skip_dct[i] = s->skipdct;
1819
1820     if (s->adaptive_quant) {
1821         const int last_qp = s->qscale;
1822         const int mb_xy = mb_x + mb_y * s->mb_stride;
1823
1824         s->lambda = s->lambda_table[mb_xy];
1825         update_qscale(s);
1826
1827         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1828             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1829             s->dquant = s->qscale - last_qp;
1830
1831             if (s->out_format == FMT_H263) {
1832                 s->dquant = av_clip(s->dquant, -2, 2);
1833
1834                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1835                     if (!s->mb_intra) {
1836                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1837                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1838                                 s->dquant = 0;
1839                         }
1840                         if (s->mv_type == MV_TYPE_8X8)
1841                             s->dquant = 0;
1842                     }
1843                 }
1844             }
1845         }
1846         ff_set_qscale(s, last_qp + s->dquant);
1847     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1848         ff_set_qscale(s, s->qscale + s->dquant);
1849
1850     wrap_y = s->linesize;
1851     wrap_c = s->uvlinesize;
1852     ptr_y  = s->new_picture.f.data[0] +
1853              (mb_y * 16 * wrap_y)              + mb_x * 16;
1854     ptr_cb = s->new_picture.f.data[1] +
1855              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1856     ptr_cr = s->new_picture.f.data[2] +
1857              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1858
1859     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1860         uint8_t *ebuf = s->edge_emu_buffer + 32;
1861         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1862                                  wrap_y, wrap_y,
1863                                  16, 16, mb_x * 16, mb_y * 16,
1864                                  s->width, s->height);
1865         ptr_y = ebuf;
1866         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1867                                  wrap_c, wrap_c,
1868                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1869                                  s->width >> 1, s->height >> 1);
1870         ptr_cb = ebuf + 18 * wrap_y;
1871         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1872                                  wrap_c, wrap_c,
1873                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1874                                  s->width >> 1, s->height >> 1);
1875         ptr_cr = ebuf + 18 * wrap_y + 8;
1876     }
1877
1878     if (s->mb_intra) {
1879         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1880             int progressive_score, interlaced_score;
1881
1882             s->interlaced_dct = 0;
1883             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1884                                                     NULL, wrap_y, 8) +
1885                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1886                                                     NULL, wrap_y, 8) - 400;
1887
1888             if (progressive_score > 0) {
1889                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1890                                                        NULL, wrap_y * 2, 8) +
1891                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1892                                                        NULL, wrap_y * 2, 8);
1893                 if (progressive_score > interlaced_score) {
1894                     s->interlaced_dct = 1;
1895
1896                     dct_offset = wrap_y;
1897                     wrap_y <<= 1;
1898                     if (s->chroma_format == CHROMA_422)
1899                         wrap_c <<= 1;
1900                 }
1901             }
1902         }
1903
1904         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1905         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1906         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1907         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1908
1909         if (s->flags & CODEC_FLAG_GRAY) {
1910             skip_dct[4] = 1;
1911             skip_dct[5] = 1;
1912         } else {
1913             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1914             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1915             if (!s->chroma_y_shift) { /* 422 */
1916                 s->dsp.get_pixels(s->block[6],
1917                                   ptr_cb + (dct_offset >> 1), wrap_c);
1918                 s->dsp.get_pixels(s->block[7],
1919                                   ptr_cr + (dct_offset >> 1), wrap_c);
1920             }
1921         }
1922     } else {
1923         op_pixels_func (*op_pix)[4];
1924         qpel_mc_func (*op_qpix)[16];
1925         uint8_t *dest_y, *dest_cb, *dest_cr;
1926
1927         dest_y  = s->dest[0];
1928         dest_cb = s->dest[1];
1929         dest_cr = s->dest[2];
1930
1931         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1932             op_pix  = s->hdsp.put_pixels_tab;
1933             op_qpix = s->dsp.put_qpel_pixels_tab;
1934         } else {
1935             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1936             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1937         }
1938
1939         if (s->mv_dir & MV_DIR_FORWARD) {
1940             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1941                           s->last_picture.f.data,
1942                           op_pix, op_qpix);
1943             op_pix  = s->hdsp.avg_pixels_tab;
1944             op_qpix = s->dsp.avg_qpel_pixels_tab;
1945         }
1946         if (s->mv_dir & MV_DIR_BACKWARD) {
1947             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1948                           s->next_picture.f.data,
1949                           op_pix, op_qpix);
1950         }
1951
1952         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1953             int progressive_score, interlaced_score;
1954
1955             s->interlaced_dct = 0;
1956             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1957                                                     ptr_y,              wrap_y,
1958                                                     8) +
1959                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1960                                                     ptr_y + wrap_y * 8, wrap_y,
1961                                                     8) - 400;
1962
1963             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1964                 progressive_score -= 400;
1965
1966             if (progressive_score > 0) {
1967                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1968                                                        ptr_y,
1969                                                        wrap_y * 2, 8) +
1970                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1971                                                        ptr_y + wrap_y,
1972                                                        wrap_y * 2, 8);
1973
1974                 if (progressive_score > interlaced_score) {
1975                     s->interlaced_dct = 1;
1976
1977                     dct_offset = wrap_y;
1978                     wrap_y <<= 1;
1979                     if (s->chroma_format == CHROMA_422)
1980                         wrap_c <<= 1;
1981                 }
1982             }
1983         }
1984
1985         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1986         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1987         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1988                            dest_y + dct_offset, wrap_y);
1989         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1990                            dest_y + dct_offset + 8, wrap_y);
1991
1992         if (s->flags & CODEC_FLAG_GRAY) {
1993             skip_dct[4] = 1;
1994             skip_dct[5] = 1;
1995         } else {
1996             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1997             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1998             if (!s->chroma_y_shift) { /* 422 */
1999                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2000                                    dest_cb + (dct_offset >> 1), wrap_c);
2001                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2002                                    dest_cr + (dct_offset >> 1), wrap_c);
2003             }
2004         }
2005         /* pre quantization */
2006         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2007                 2 * s->qscale * s->qscale) {
2008             // FIXME optimize
2009             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2010                               wrap_y, 8) < 20 * s->qscale)
2011                 skip_dct[0] = 1;
2012             if (s->dsp.sad[1](NULL, ptr_y + 8,
2013                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2014                 skip_dct[1] = 1;
2015             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2016                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2017                 skip_dct[2] = 1;
2018             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2019                               dest_y + dct_offset + 8,
2020                               wrap_y, 8) < 20 * s->qscale)
2021                 skip_dct[3] = 1;
2022             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2023                               wrap_c, 8) < 20 * s->qscale)
2024                 skip_dct[4] = 1;
2025             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2026                               wrap_c, 8) < 20 * s->qscale)
2027                 skip_dct[5] = 1;
2028             if (!s->chroma_y_shift) { /* 422 */
2029                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2030                                   dest_cb + (dct_offset >> 1),
2031                                   wrap_c, 8) < 20 * s->qscale)
2032                     skip_dct[6] = 1;
2033                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2034                                   dest_cr + (dct_offset >> 1),
2035                                   wrap_c, 8) < 20 * s->qscale)
2036                     skip_dct[7] = 1;
2037             }
2038         }
2039     }
2040
2041     if (s->quantizer_noise_shaping) {
2042         if (!skip_dct[0])
2043             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2044         if (!skip_dct[1])
2045             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2046         if (!skip_dct[2])
2047             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2048         if (!skip_dct[3])
2049             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2050         if (!skip_dct[4])
2051             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2052         if (!skip_dct[5])
2053             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2054         if (!s->chroma_y_shift) { /* 422 */
2055             if (!skip_dct[6])
2056                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2057                                   wrap_c);
2058             if (!skip_dct[7])
2059                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2060                                   wrap_c);
2061         }
2062         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2063     }
2064
2065     /* DCT & quantize */
2066     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2067     {
2068         for (i = 0; i < mb_block_count; i++) {
2069             if (!skip_dct[i]) {
2070                 int overflow;
2071                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2072                 // FIXME we could decide to change to quantizer instead of
2073                 // clipping
2074                 // JS: I don't think that would be a good idea it could lower
2075                 //     quality instead of improve it. Just INTRADC clipping
2076                 //     deserves changes in quantizer
2077                 if (overflow)
2078                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2079             } else
2080                 s->block_last_index[i] = -1;
2081         }
2082         if (s->quantizer_noise_shaping) {
2083             for (i = 0; i < mb_block_count; i++) {
2084                 if (!skip_dct[i]) {
2085                     s->block_last_index[i] =
2086                         dct_quantize_refine(s, s->block[i], weight[i],
2087                                             orig[i], i, s->qscale);
2088                 }
2089             }
2090         }
2091
2092         if (s->luma_elim_threshold && !s->mb_intra)
2093             for (i = 0; i < 4; i++)
2094                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2095         if (s->chroma_elim_threshold && !s->mb_intra)
2096             for (i = 4; i < mb_block_count; i++)
2097                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2098
2099         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2100             for (i = 0; i < mb_block_count; i++) {
2101                 if (s->block_last_index[i] == -1)
2102                     s->coded_score[i] = INT_MAX / 256;
2103             }
2104         }
2105     }
2106
2107     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2108         s->block_last_index[4] =
2109         s->block_last_index[5] = 0;
2110         s->block[4][0] =
2111         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2112     }
2113
2114     // non c quantize code returns incorrect block_last_index FIXME
2115     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2116         for (i = 0; i < mb_block_count; i++) {
2117             int j;
2118             if (s->block_last_index[i] > 0) {
2119                 for (j = 63; j > 0; j--) {
2120                     if (s->block[i][s->intra_scantable.permutated[j]])
2121                         break;
2122                 }
2123                 s->block_last_index[i] = j;
2124             }
2125         }
2126     }
2127
2128     /* huffman encode */
2129     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2130     case AV_CODEC_ID_MPEG1VIDEO:
2131     case AV_CODEC_ID_MPEG2VIDEO:
2132         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2133             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2134         break;
2135     case AV_CODEC_ID_MPEG4:
2136         if (CONFIG_MPEG4_ENCODER)
2137             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2138         break;
2139     case AV_CODEC_ID_MSMPEG4V2:
2140     case AV_CODEC_ID_MSMPEG4V3:
2141     case AV_CODEC_ID_WMV1:
2142         if (CONFIG_MSMPEG4_ENCODER)
2143             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2144         break;
2145     case AV_CODEC_ID_WMV2:
2146         if (CONFIG_WMV2_ENCODER)
2147             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2148         break;
2149     case AV_CODEC_ID_H261:
2150         if (CONFIG_H261_ENCODER)
2151             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2152         break;
2153     case AV_CODEC_ID_H263:
2154     case AV_CODEC_ID_H263P:
2155     case AV_CODEC_ID_FLV1:
2156     case AV_CODEC_ID_RV10:
2157     case AV_CODEC_ID_RV20:
2158         if (CONFIG_H263_ENCODER)
2159             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2160         break;
2161     case AV_CODEC_ID_MJPEG:
2162         if (CONFIG_MJPEG_ENCODER)
2163             ff_mjpeg_encode_mb(s, s->block);
2164         break;
2165     default:
2166         assert(0);
2167     }
2168 }
2169
2170 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2171 {
2172     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2173     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2174 }
2175
2176 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2177     int i;
2178
2179     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2180
2181     /* mpeg1 */
2182     d->mb_skip_run= s->mb_skip_run;
2183     for(i=0; i<3; i++)
2184         d->last_dc[i] = s->last_dc[i];
2185
2186     /* statistics */
2187     d->mv_bits= s->mv_bits;
2188     d->i_tex_bits= s->i_tex_bits;
2189     d->p_tex_bits= s->p_tex_bits;
2190     d->i_count= s->i_count;
2191     d->f_count= s->f_count;
2192     d->b_count= s->b_count;
2193     d->skip_count= s->skip_count;
2194     d->misc_bits= s->misc_bits;
2195     d->last_bits= 0;
2196
2197     d->mb_skipped= 0;
2198     d->qscale= s->qscale;
2199     d->dquant= s->dquant;
2200
2201     d->esc3_level_length= s->esc3_level_length;
2202 }
2203
2204 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2205     int i;
2206
2207     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2208     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2209
2210     /* mpeg1 */
2211     d->mb_skip_run= s->mb_skip_run;
2212     for(i=0; i<3; i++)
2213         d->last_dc[i] = s->last_dc[i];
2214
2215     /* statistics */
2216     d->mv_bits= s->mv_bits;
2217     d->i_tex_bits= s->i_tex_bits;
2218     d->p_tex_bits= s->p_tex_bits;
2219     d->i_count= s->i_count;
2220     d->f_count= s->f_count;
2221     d->b_count= s->b_count;
2222     d->skip_count= s->skip_count;
2223     d->misc_bits= s->misc_bits;
2224
2225     d->mb_intra= s->mb_intra;
2226     d->mb_skipped= s->mb_skipped;
2227     d->mv_type= s->mv_type;
2228     d->mv_dir= s->mv_dir;
2229     d->pb= s->pb;
2230     if(s->data_partitioning){
2231         d->pb2= s->pb2;
2232         d->tex_pb= s->tex_pb;
2233     }
2234     d->block= s->block;
2235     for(i=0; i<8; i++)
2236         d->block_last_index[i]= s->block_last_index[i];
2237     d->interlaced_dct= s->interlaced_dct;
2238     d->qscale= s->qscale;
2239
2240     d->esc3_level_length= s->esc3_level_length;
2241 }
2242
2243 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2244                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2245                            int *dmin, int *next_block, int motion_x, int motion_y)
2246 {
2247     int score;
2248     uint8_t *dest_backup[3];
2249
2250     copy_context_before_encode(s, backup, type);
2251
2252     s->block= s->blocks[*next_block];
2253     s->pb= pb[*next_block];
2254     if(s->data_partitioning){
2255         s->pb2   = pb2   [*next_block];
2256         s->tex_pb= tex_pb[*next_block];
2257     }
2258
2259     if(*next_block){
2260         memcpy(dest_backup, s->dest, sizeof(s->dest));
2261         s->dest[0] = s->rd_scratchpad;
2262         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2263         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2264         assert(s->linesize >= 32); //FIXME
2265     }
2266
2267     encode_mb(s, motion_x, motion_y);
2268
2269     score= put_bits_count(&s->pb);
2270     if(s->data_partitioning){
2271         score+= put_bits_count(&s->pb2);
2272         score+= put_bits_count(&s->tex_pb);
2273     }
2274
2275     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2276         ff_MPV_decode_mb(s, s->block);
2277
2278         score *= s->lambda2;
2279         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2280     }
2281
2282     if(*next_block){
2283         memcpy(s->dest, dest_backup, sizeof(s->dest));
2284     }
2285
2286     if(score<*dmin){
2287         *dmin= score;
2288         *next_block^=1;
2289
2290         copy_context_after_encode(best, s, type);
2291     }
2292 }
2293
2294 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2295     uint32_t *sq = ff_square_tab + 256;
2296     int acc=0;
2297     int x,y;
2298
2299     if(w==16 && h==16)
2300         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2301     else if(w==8 && h==8)
2302         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2303
2304     for(y=0; y<h; y++){
2305         for(x=0; x<w; x++){
2306             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2307         }
2308     }
2309
2310     assert(acc>=0);
2311
2312     return acc;
2313 }
2314
2315 static int sse_mb(MpegEncContext *s){
2316     int w= 16;
2317     int h= 16;
2318
2319     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2320     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2321
2322     if(w==16 && h==16)
2323       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2324         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2325                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2326                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2327       }else{
2328         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2329                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2330                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2331       }
2332     else
2333         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2334                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2335                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2336 }
2337
2338 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2339     MpegEncContext *s= *(void**)arg;
2340
2341
2342     s->me.pre_pass=1;
2343     s->me.dia_size= s->avctx->pre_dia_size;
2344     s->first_slice_line=1;
2345     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2346         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2347             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2348         }
2349         s->first_slice_line=0;
2350     }
2351
2352     s->me.pre_pass=0;
2353
2354     return 0;
2355 }
2356
2357 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2358     MpegEncContext *s= *(void**)arg;
2359
2360     s->me.dia_size= s->avctx->dia_size;
2361     s->first_slice_line=1;
2362     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2363         s->mb_x=0; //for block init below
2364         ff_init_block_index(s);
2365         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2366             s->block_index[0]+=2;
2367             s->block_index[1]+=2;
2368             s->block_index[2]+=2;
2369             s->block_index[3]+=2;
2370
2371             /* compute motion vector & mb_type and store in context */
2372             if(s->pict_type==AV_PICTURE_TYPE_B)
2373                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2374             else
2375                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2376         }
2377         s->first_slice_line=0;
2378     }
2379     return 0;
2380 }
2381
2382 static int mb_var_thread(AVCodecContext *c, void *arg){
2383     MpegEncContext *s= *(void**)arg;
2384     int mb_x, mb_y;
2385
2386     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2387         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2388             int xx = mb_x * 16;
2389             int yy = mb_y * 16;
2390             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2391             int varc;
2392             int sum = s->dsp.pix_sum(pix, s->linesize);
2393
2394             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2395
2396             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2397             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2398             s->me.mb_var_sum_temp    += varc;
2399         }
2400     }
2401     return 0;
2402 }
2403
2404 static void write_slice_end(MpegEncContext *s){
2405     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2406         if(s->partitioned_frame){
2407             ff_mpeg4_merge_partitions(s);
2408         }
2409
2410         ff_mpeg4_stuffing(&s->pb);
2411     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2412         ff_mjpeg_encode_stuffing(&s->pb);
2413     }
2414
2415     avpriv_align_put_bits(&s->pb);
2416     flush_put_bits(&s->pb);
2417
2418     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2419         s->misc_bits+= get_bits_diff(s);
2420 }
2421
2422 static void write_mb_info(MpegEncContext *s)
2423 {
2424     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2425     int offset = put_bits_count(&s->pb);
2426     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2427     int gobn = s->mb_y / s->gob_index;
2428     int pred_x, pred_y;
2429     if (CONFIG_H263_ENCODER)
2430         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2431     bytestream_put_le32(&ptr, offset);
2432     bytestream_put_byte(&ptr, s->qscale);
2433     bytestream_put_byte(&ptr, gobn);
2434     bytestream_put_le16(&ptr, mba);
2435     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2436     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2437     /* 4MV not implemented */
2438     bytestream_put_byte(&ptr, 0); /* hmv2 */
2439     bytestream_put_byte(&ptr, 0); /* vmv2 */
2440 }
2441
2442 static void update_mb_info(MpegEncContext *s, int startcode)
2443 {
2444     if (!s->mb_info)
2445         return;
2446     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2447         s->mb_info_size += 12;
2448         s->prev_mb_info = s->last_mb_info;
2449     }
2450     if (startcode) {
2451         s->prev_mb_info = put_bits_count(&s->pb)/8;
2452         /* This might have incremented mb_info_size above, and we return without
2453          * actually writing any info into that slot yet. But in that case,
2454          * this will be called again at the start of the after writing the
2455          * start code, actually writing the mb info. */
2456         return;
2457     }
2458
2459     s->last_mb_info = put_bits_count(&s->pb)/8;
2460     if (!s->mb_info_size)
2461         s->mb_info_size += 12;
2462     write_mb_info(s);
2463 }
2464
2465 static int encode_thread(AVCodecContext *c, void *arg){
2466     MpegEncContext *s= *(void**)arg;
2467     int mb_x, mb_y, pdif = 0;
2468     int chr_h= 16>>s->chroma_y_shift;
2469     int i, j;
2470     MpegEncContext best_s, backup_s;
2471     uint8_t bit_buf[2][MAX_MB_BYTES];
2472     uint8_t bit_buf2[2][MAX_MB_BYTES];
2473     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2474     PutBitContext pb[2], pb2[2], tex_pb[2];
2475
2476     for(i=0; i<2; i++){
2477         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2478         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2479         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2480     }
2481
2482     s->last_bits= put_bits_count(&s->pb);
2483     s->mv_bits=0;
2484     s->misc_bits=0;
2485     s->i_tex_bits=0;
2486     s->p_tex_bits=0;
2487     s->i_count=0;
2488     s->f_count=0;
2489     s->b_count=0;
2490     s->skip_count=0;
2491
2492     for(i=0; i<3; i++){
2493         /* init last dc values */
2494         /* note: quant matrix value (8) is implied here */
2495         s->last_dc[i] = 128 << s->intra_dc_precision;
2496
2497         s->current_picture.f.error[i] = 0;
2498     }
2499     s->mb_skip_run = 0;
2500     memset(s->last_mv, 0, sizeof(s->last_mv));
2501
2502     s->last_mv_dir = 0;
2503
2504     switch(s->codec_id){
2505     case AV_CODEC_ID_H263:
2506     case AV_CODEC_ID_H263P:
2507     case AV_CODEC_ID_FLV1:
2508         if (CONFIG_H263_ENCODER)
2509             s->gob_index = ff_h263_get_gob_height(s);
2510         break;
2511     case AV_CODEC_ID_MPEG4:
2512         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2513             ff_mpeg4_init_partitions(s);
2514         break;
2515     }
2516
2517     s->resync_mb_x=0;
2518     s->resync_mb_y=0;
2519     s->first_slice_line = 1;
2520     s->ptr_lastgob = s->pb.buf;
2521     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2522         s->mb_x=0;
2523         s->mb_y= mb_y;
2524
2525         ff_set_qscale(s, s->qscale);
2526         ff_init_block_index(s);
2527
2528         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2529             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2530             int mb_type= s->mb_type[xy];
2531 //            int d;
2532             int dmin= INT_MAX;
2533             int dir;
2534
2535             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2536                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2537                 return -1;
2538             }
2539             if(s->data_partitioning){
2540                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2541                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2542                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2543                     return -1;
2544                 }
2545             }
2546
2547             s->mb_x = mb_x;
2548             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2549             ff_update_block_index(s);
2550
2551             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2552                 ff_h261_reorder_mb_index(s);
2553                 xy= s->mb_y*s->mb_stride + s->mb_x;
2554                 mb_type= s->mb_type[xy];
2555             }
2556
2557             /* write gob / video packet header  */
2558             if(s->rtp_mode){
2559                 int current_packet_size, is_gob_start;
2560
2561                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2562
2563                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2564
2565                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2566
2567                 switch(s->codec_id){
2568                 case AV_CODEC_ID_H263:
2569                 case AV_CODEC_ID_H263P:
2570                     if(!s->h263_slice_structured)
2571                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2572                     break;
2573                 case AV_CODEC_ID_MPEG2VIDEO:
2574                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2575                 case AV_CODEC_ID_MPEG1VIDEO:
2576                     if(s->mb_skip_run) is_gob_start=0;
2577                     break;
2578                 }
2579
2580                 if(is_gob_start){
2581                     if(s->start_mb_y != mb_y || mb_x!=0){
2582                         write_slice_end(s);
2583
2584                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2585                             ff_mpeg4_init_partitions(s);
2586                         }
2587                     }
2588
2589                     assert((put_bits_count(&s->pb)&7) == 0);
2590                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2591
2592                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2593                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2594                         int d = 100 / s->error_rate;
2595                         if(r % d == 0){
2596                             current_packet_size=0;
2597                             s->pb.buf_ptr= s->ptr_lastgob;
2598                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2599                         }
2600                     }
2601
2602                     if (s->avctx->rtp_callback){
2603                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2604                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2605                     }
2606                     update_mb_info(s, 1);
2607
2608                     switch(s->codec_id){
2609                     case AV_CODEC_ID_MPEG4:
2610                         if (CONFIG_MPEG4_ENCODER) {
2611                             ff_mpeg4_encode_video_packet_header(s);
2612                             ff_mpeg4_clean_buffers(s);
2613                         }
2614                     break;
2615                     case AV_CODEC_ID_MPEG1VIDEO:
2616                     case AV_CODEC_ID_MPEG2VIDEO:
2617                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2618                             ff_mpeg1_encode_slice_header(s);
2619                             ff_mpeg1_clean_buffers(s);
2620                         }
2621                     break;
2622                     case AV_CODEC_ID_H263:
2623                     case AV_CODEC_ID_H263P:
2624                         if (CONFIG_H263_ENCODER)
2625                             ff_h263_encode_gob_header(s, mb_y);
2626                     break;
2627                     }
2628
2629                     if(s->flags&CODEC_FLAG_PASS1){
2630                         int bits= put_bits_count(&s->pb);
2631                         s->misc_bits+= bits - s->last_bits;
2632                         s->last_bits= bits;
2633                     }
2634
2635                     s->ptr_lastgob += current_packet_size;
2636                     s->first_slice_line=1;
2637                     s->resync_mb_x=mb_x;
2638                     s->resync_mb_y=mb_y;
2639                 }
2640             }
2641
2642             if(  (s->resync_mb_x   == s->mb_x)
2643                && s->resync_mb_y+1 == s->mb_y){
2644                 s->first_slice_line=0;
2645             }
2646
2647             s->mb_skipped=0;
2648             s->dquant=0; //only for QP_RD
2649
2650             update_mb_info(s, 0);
2651
2652             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2653                 int next_block=0;
2654                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2655
2656                 copy_context_before_encode(&backup_s, s, -1);
2657                 backup_s.pb= s->pb;
2658                 best_s.data_partitioning= s->data_partitioning;
2659                 best_s.partitioned_frame= s->partitioned_frame;
2660                 if(s->data_partitioning){
2661                     backup_s.pb2= s->pb2;
2662                     backup_s.tex_pb= s->tex_pb;
2663                 }
2664
2665                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2666                     s->mv_dir = MV_DIR_FORWARD;
2667                     s->mv_type = MV_TYPE_16X16;
2668                     s->mb_intra= 0;
2669                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2670                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2671                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2672                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2673                 }
2674                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2675                     s->mv_dir = MV_DIR_FORWARD;
2676                     s->mv_type = MV_TYPE_FIELD;
2677                     s->mb_intra= 0;
2678                     for(i=0; i<2; i++){
2679                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2680                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2681                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2682                     }
2683                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2684                                  &dmin, &next_block, 0, 0);
2685                 }
2686                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2687                     s->mv_dir = MV_DIR_FORWARD;
2688                     s->mv_type = MV_TYPE_16X16;
2689                     s->mb_intra= 0;
2690                     s->mv[0][0][0] = 0;
2691                     s->mv[0][0][1] = 0;
2692                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2693                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2694                 }
2695                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2696                     s->mv_dir = MV_DIR_FORWARD;
2697                     s->mv_type = MV_TYPE_8X8;
2698                     s->mb_intra= 0;
2699                     for(i=0; i<4; i++){
2700                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2701                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2702                     }
2703                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2704                                  &dmin, &next_block, 0, 0);
2705                 }
2706                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2707                     s->mv_dir = MV_DIR_FORWARD;
2708                     s->mv_type = MV_TYPE_16X16;
2709                     s->mb_intra= 0;
2710                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2711                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2712                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2713                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2714                 }
2715                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2716                     s->mv_dir = MV_DIR_BACKWARD;
2717                     s->mv_type = MV_TYPE_16X16;
2718                     s->mb_intra= 0;
2719                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2720                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2721                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2722                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2723                 }
2724                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2725                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2726                     s->mv_type = MV_TYPE_16X16;
2727                     s->mb_intra= 0;
2728                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2729                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2730                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2731                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2732                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2733                                  &dmin, &next_block, 0, 0);
2734                 }
2735                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2736                     s->mv_dir = MV_DIR_FORWARD;
2737                     s->mv_type = MV_TYPE_FIELD;
2738                     s->mb_intra= 0;
2739                     for(i=0; i<2; i++){
2740                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2741                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2742                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2743                     }
2744                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2745                                  &dmin, &next_block, 0, 0);
2746                 }
2747                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2748                     s->mv_dir = MV_DIR_BACKWARD;
2749                     s->mv_type = MV_TYPE_FIELD;
2750                     s->mb_intra= 0;
2751                     for(i=0; i<2; i++){
2752                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2753                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2754                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2755                     }
2756                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2757                                  &dmin, &next_block, 0, 0);
2758                 }
2759                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2760                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2761                     s->mv_type = MV_TYPE_FIELD;
2762                     s->mb_intra= 0;
2763                     for(dir=0; dir<2; dir++){
2764                         for(i=0; i<2; i++){
2765                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2766                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2767                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2768                         }
2769                     }
2770                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2771                                  &dmin, &next_block, 0, 0);
2772                 }
2773                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2774                     s->mv_dir = 0;
2775                     s->mv_type = MV_TYPE_16X16;
2776                     s->mb_intra= 1;
2777                     s->mv[0][0][0] = 0;
2778                     s->mv[0][0][1] = 0;
2779                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2780                                  &dmin, &next_block, 0, 0);
2781                     if(s->h263_pred || s->h263_aic){
2782                         if(best_s.mb_intra)
2783                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2784                         else
2785                             ff_clean_intra_table_entries(s); //old mode?
2786                     }
2787                 }
2788
2789                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2790                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2791                         const int last_qp= backup_s.qscale;
2792                         int qpi, qp, dc[6];
2793                         int16_t ac[6][16];
2794                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2795                         static const int dquant_tab[4]={-1,1,-2,2};
2796
2797                         assert(backup_s.dquant == 0);
2798
2799                         //FIXME intra
2800                         s->mv_dir= best_s.mv_dir;
2801                         s->mv_type = MV_TYPE_16X16;
2802                         s->mb_intra= best_s.mb_intra;
2803                         s->mv[0][0][0] = best_s.mv[0][0][0];
2804                         s->mv[0][0][1] = best_s.mv[0][0][1];
2805                         s->mv[1][0][0] = best_s.mv[1][0][0];
2806                         s->mv[1][0][1] = best_s.mv[1][0][1];
2807
2808                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2809                         for(; qpi<4; qpi++){
2810                             int dquant= dquant_tab[qpi];
2811                             qp= last_qp + dquant;
2812                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2813                                 continue;
2814                             backup_s.dquant= dquant;
2815                             if(s->mb_intra && s->dc_val[0]){
2816                                 for(i=0; i<6; i++){
2817                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2818                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2819                                 }
2820                             }
2821
2822                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2823                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2824                             if(best_s.qscale != qp){
2825                                 if(s->mb_intra && s->dc_val[0]){
2826                                     for(i=0; i<6; i++){
2827                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2828                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2829                                     }
2830                                 }
2831                             }
2832                         }
2833                     }
2834                 }
2835                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2836                     int mx= s->b_direct_mv_table[xy][0];
2837                     int my= s->b_direct_mv_table[xy][1];
2838
2839                     backup_s.dquant = 0;
2840                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2841                     s->mb_intra= 0;
2842                     ff_mpeg4_set_direct_mv(s, mx, my);
2843                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2844                                  &dmin, &next_block, mx, my);
2845                 }
2846                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2847                     backup_s.dquant = 0;
2848                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2849                     s->mb_intra= 0;
2850                     ff_mpeg4_set_direct_mv(s, 0, 0);
2851                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2852                                  &dmin, &next_block, 0, 0);
2853                 }
2854                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2855                     int coded=0;
2856                     for(i=0; i<6; i++)
2857                         coded |= s->block_last_index[i];
2858                     if(coded){
2859                         int mx,my;
2860                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2861                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2862                             mx=my=0; //FIXME find the one we actually used
2863                             ff_mpeg4_set_direct_mv(s, mx, my);
2864                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2865                             mx= s->mv[1][0][0];
2866                             my= s->mv[1][0][1];
2867                         }else{
2868                             mx= s->mv[0][0][0];
2869                             my= s->mv[0][0][1];
2870                         }
2871
2872                         s->mv_dir= best_s.mv_dir;
2873                         s->mv_type = best_s.mv_type;
2874                         s->mb_intra= 0;
2875 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2876                         s->mv[0][0][1] = best_s.mv[0][0][1];
2877                         s->mv[1][0][0] = best_s.mv[1][0][0];
2878                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2879                         backup_s.dquant= 0;
2880                         s->skipdct=1;
2881                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2882                                         &dmin, &next_block, mx, my);
2883                         s->skipdct=0;
2884                     }
2885                 }
2886
2887                 s->current_picture.qscale_table[xy] = best_s.qscale;
2888
2889                 copy_context_after_encode(s, &best_s, -1);
2890
2891                 pb_bits_count= put_bits_count(&s->pb);
2892                 flush_put_bits(&s->pb);
2893                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2894                 s->pb= backup_s.pb;
2895
2896                 if(s->data_partitioning){
2897                     pb2_bits_count= put_bits_count(&s->pb2);
2898                     flush_put_bits(&s->pb2);
2899                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2900                     s->pb2= backup_s.pb2;
2901
2902                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2903                     flush_put_bits(&s->tex_pb);
2904                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2905                     s->tex_pb= backup_s.tex_pb;
2906                 }
2907                 s->last_bits= put_bits_count(&s->pb);
2908
2909                 if (CONFIG_H263_ENCODER &&
2910                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2911                     ff_h263_update_motion_val(s);
2912
2913                 if(next_block==0){ //FIXME 16 vs linesize16
2914                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2915                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2916                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2917                 }
2918
2919                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2920                     ff_MPV_decode_mb(s, s->block);
2921             } else {
2922                 int motion_x = 0, motion_y = 0;
2923                 s->mv_type=MV_TYPE_16X16;
2924                 // only one MB-Type possible
2925
2926                 switch(mb_type){
2927                 case CANDIDATE_MB_TYPE_INTRA:
2928                     s->mv_dir = 0;
2929                     s->mb_intra= 1;
2930                     motion_x= s->mv[0][0][0] = 0;
2931                     motion_y= s->mv[0][0][1] = 0;
2932                     break;
2933                 case CANDIDATE_MB_TYPE_INTER:
2934                     s->mv_dir = MV_DIR_FORWARD;
2935                     s->mb_intra= 0;
2936                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2937                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2938                     break;
2939                 case CANDIDATE_MB_TYPE_INTER_I:
2940                     s->mv_dir = MV_DIR_FORWARD;
2941                     s->mv_type = MV_TYPE_FIELD;
2942                     s->mb_intra= 0;
2943                     for(i=0; i<2; i++){
2944                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2945                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2946                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2947                     }
2948                     break;
2949                 case CANDIDATE_MB_TYPE_INTER4V:
2950                     s->mv_dir = MV_DIR_FORWARD;
2951                     s->mv_type = MV_TYPE_8X8;
2952                     s->mb_intra= 0;
2953                     for(i=0; i<4; i++){
2954                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2955                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2956                     }
2957                     break;
2958                 case CANDIDATE_MB_TYPE_DIRECT:
2959                     if (CONFIG_MPEG4_ENCODER) {
2960                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2961                         s->mb_intra= 0;
2962                         motion_x=s->b_direct_mv_table[xy][0];
2963                         motion_y=s->b_direct_mv_table[xy][1];
2964                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2965                     }
2966                     break;
2967                 case CANDIDATE_MB_TYPE_DIRECT0:
2968                     if (CONFIG_MPEG4_ENCODER) {
2969                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2970                         s->mb_intra= 0;
2971                         ff_mpeg4_set_direct_mv(s, 0, 0);
2972                     }
2973                     break;
2974                 case CANDIDATE_MB_TYPE_BIDIR:
2975                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2976                     s->mb_intra= 0;
2977                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2978                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2979                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2980                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2981                     break;
2982                 case CANDIDATE_MB_TYPE_BACKWARD:
2983                     s->mv_dir = MV_DIR_BACKWARD;
2984                     s->mb_intra= 0;
2985                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2986                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2987                     break;
2988                 case CANDIDATE_MB_TYPE_FORWARD:
2989                     s->mv_dir = MV_DIR_FORWARD;
2990                     s->mb_intra= 0;
2991                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2992                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2993                     break;
2994                 case CANDIDATE_MB_TYPE_FORWARD_I:
2995                     s->mv_dir = MV_DIR_FORWARD;
2996                     s->mv_type = MV_TYPE_FIELD;
2997                     s->mb_intra= 0;
2998                     for(i=0; i<2; i++){
2999                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3000                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3001                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3002                     }
3003                     break;
3004                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3005                     s->mv_dir = MV_DIR_BACKWARD;
3006                     s->mv_type = MV_TYPE_FIELD;
3007                     s->mb_intra= 0;
3008                     for(i=0; i<2; i++){
3009                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3010                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3011                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3012                     }
3013                     break;
3014                 case CANDIDATE_MB_TYPE_BIDIR_I:
3015                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3016                     s->mv_type = MV_TYPE_FIELD;
3017                     s->mb_intra= 0;
3018                     for(dir=0; dir<2; dir++){
3019                         for(i=0; i<2; i++){
3020                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3021                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3022                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3023                         }
3024                     }
3025                     break;
3026                 default:
3027                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3028                 }
3029
3030                 encode_mb(s, motion_x, motion_y);
3031
3032                 // RAL: Update last macroblock type
3033                 s->last_mv_dir = s->mv_dir;
3034
3035                 if (CONFIG_H263_ENCODER &&
3036                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3037                     ff_h263_update_motion_val(s);
3038
3039                 ff_MPV_decode_mb(s, s->block);
3040             }
3041
3042             /* clean the MV table in IPS frames for direct mode in B frames */
3043             if(s->mb_intra /* && I,P,S_TYPE */){
3044                 s->p_mv_table[xy][0]=0;
3045                 s->p_mv_table[xy][1]=0;
3046             }
3047
3048             if(s->flags&CODEC_FLAG_PSNR){
3049                 int w= 16;
3050                 int h= 16;
3051
3052                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3053                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3054
3055                 s->current_picture.f.error[0] += sse(
3056                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3057                     s->dest[0], w, h, s->linesize);
3058                 s->current_picture.f.error[1] += sse(
3059                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3060                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3061                 s->current_picture.f.error[2] += sse(
3062                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3063                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3064             }
3065             if(s->loop_filter){
3066                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3067                     ff_h263_loop_filter(s);
3068             }
3069             av_dlog(s->avctx, "MB %d %d bits\n",
3070                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3071         }
3072     }
3073
3074     //not beautiful here but we must write it before flushing so it has to be here
3075     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3076         ff_msmpeg4_encode_ext_header(s);
3077
3078     write_slice_end(s);
3079
3080     /* Send the last GOB if RTP */
3081     if (s->avctx->rtp_callback) {
3082         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3083         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3084         /* Call the RTP callback to send the last GOB */
3085         emms_c();
3086         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3087     }
3088
3089     return 0;
3090 }
3091
3092 #define MERGE(field) dst->field += src->field; src->field=0
3093 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3094     MERGE(me.scene_change_score);
3095     MERGE(me.mc_mb_var_sum_temp);
3096     MERGE(me.mb_var_sum_temp);
3097 }
3098
3099 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3100     int i;
3101
3102     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3103     MERGE(dct_count[1]);
3104     MERGE(mv_bits);
3105     MERGE(i_tex_bits);
3106     MERGE(p_tex_bits);
3107     MERGE(i_count);
3108     MERGE(f_count);
3109     MERGE(b_count);
3110     MERGE(skip_count);
3111     MERGE(misc_bits);
3112     MERGE(er.error_count);
3113     MERGE(padding_bug_score);
3114     MERGE(current_picture.f.error[0]);
3115     MERGE(current_picture.f.error[1]);
3116     MERGE(current_picture.f.error[2]);
3117
3118     if(dst->avctx->noise_reduction){
3119         for(i=0; i<64; i++){
3120             MERGE(dct_error_sum[0][i]);
3121             MERGE(dct_error_sum[1][i]);
3122         }
3123     }
3124
3125     assert(put_bits_count(&src->pb) % 8 ==0);
3126     assert(put_bits_count(&dst->pb) % 8 ==0);
3127     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3128     flush_put_bits(&dst->pb);
3129 }
3130
3131 static int estimate_qp(MpegEncContext *s, int dry_run){
3132     if (s->next_lambda){
3133         s->current_picture_ptr->f.quality =
3134         s->current_picture.f.quality = s->next_lambda;
3135         if(!dry_run) s->next_lambda= 0;
3136     } else if (!s->fixed_qscale) {
3137         s->current_picture_ptr->f.quality =
3138         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3139         if (s->current_picture.f.quality < 0)
3140             return -1;
3141     }
3142
3143     if(s->adaptive_quant){
3144         switch(s->codec_id){
3145         case AV_CODEC_ID_MPEG4:
3146             if (CONFIG_MPEG4_ENCODER)
3147                 ff_clean_mpeg4_qscales(s);
3148             break;
3149         case AV_CODEC_ID_H263:
3150         case AV_CODEC_ID_H263P:
3151         case AV_CODEC_ID_FLV1:
3152             if (CONFIG_H263_ENCODER)
3153                 ff_clean_h263_qscales(s);
3154             break;
3155         default:
3156             ff_init_qscale_tab(s);
3157         }
3158
3159         s->lambda= s->lambda_table[0];
3160         //FIXME broken
3161     }else
3162         s->lambda = s->current_picture.f.quality;
3163     update_qscale(s);
3164     return 0;
3165 }
3166
3167 /* must be called before writing the header */
3168 static void set_frame_distances(MpegEncContext * s){
3169     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3170     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3171
3172     if(s->pict_type==AV_PICTURE_TYPE_B){
3173         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3174         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3175     }else{
3176         s->pp_time= s->time - s->last_non_b_time;
3177         s->last_non_b_time= s->time;
3178         assert(s->picture_number==0 || s->pp_time > 0);
3179     }
3180 }
3181
3182 static int encode_picture(MpegEncContext *s, int picture_number)
3183 {
3184     int i, ret;
3185     int bits;
3186     int context_count = s->slice_context_count;
3187
3188     s->picture_number = picture_number;
3189
3190     /* Reset the average MB variance */
3191     s->me.mb_var_sum_temp    =
3192     s->me.mc_mb_var_sum_temp = 0;
3193
3194     /* we need to initialize some time vars before we can encode b-frames */
3195     // RAL: Condition added for MPEG1VIDEO
3196     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3197         set_frame_distances(s);
3198     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3199         ff_set_mpeg4_time(s);
3200
3201     s->me.scene_change_score=0;
3202
3203 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3204
3205     if(s->pict_type==AV_PICTURE_TYPE_I){
3206         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3207         else                        s->no_rounding=0;
3208     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3209         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3210             s->no_rounding ^= 1;
3211     }
3212
3213     if(s->flags & CODEC_FLAG_PASS2){
3214         if (estimate_qp(s,1) < 0)
3215             return -1;
3216         ff_get_2pass_fcode(s);
3217     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3218         if(s->pict_type==AV_PICTURE_TYPE_B)
3219             s->lambda= s->last_lambda_for[s->pict_type];
3220         else
3221             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3222         update_qscale(s);
3223     }
3224
3225     s->mb_intra=0; //for the rate distortion & bit compare functions
3226     for(i=1; i<context_count; i++){
3227         ret = ff_update_duplicate_context(s->thread_context[i], s);
3228         if (ret < 0)
3229             return ret;
3230     }
3231
3232     if(ff_init_me(s)<0)
3233         return -1;
3234
3235     /* Estimate motion for every MB */
3236     if(s->pict_type != AV_PICTURE_TYPE_I){
3237         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3238         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3239         if (s->pict_type != AV_PICTURE_TYPE_B) {
3240             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3241                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3242             }
3243         }
3244
3245         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3246     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3247         /* I-Frame */
3248         for(i=0; i<s->mb_stride*s->mb_height; i++)
3249             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3250
3251         if(!s->fixed_qscale){
3252             /* finding spatial complexity for I-frame rate control */
3253             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3254         }
3255     }
3256     for(i=1; i<context_count; i++){
3257         merge_context_after_me(s, s->thread_context[i]);
3258     }
3259     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3260     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3261     emms_c();
3262
3263     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3264         s->pict_type= AV_PICTURE_TYPE_I;
3265         for(i=0; i<s->mb_stride*s->mb_height; i++)
3266             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3267         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3268                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3269     }
3270
3271     if(!s->umvplus){
3272         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3273             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3274
3275             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3276                 int a,b;
3277                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3278                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3279                 s->f_code= FFMAX3(s->f_code, a, b);
3280             }
3281
3282             ff_fix_long_p_mvs(s);
3283             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3284             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3285                 int j;
3286                 for(i=0; i<2; i++){
3287                     for(j=0; j<2; j++)
3288                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3289                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3290                 }
3291             }
3292         }
3293
3294         if(s->pict_type==AV_PICTURE_TYPE_B){
3295             int a, b;
3296
3297             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3298             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3299             s->f_code = FFMAX(a, b);
3300
3301             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3302             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3303             s->b_code = FFMAX(a, b);
3304
3305             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3306             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3307             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3308             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3309             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3310                 int dir, j;
3311                 for(dir=0; dir<2; dir++){
3312                     for(i=0; i<2; i++){
3313                         for(j=0; j<2; j++){
3314                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3315                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3316                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3317                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3318                         }
3319                     }
3320                 }
3321             }
3322         }
3323     }
3324
3325     if (estimate_qp(s, 0) < 0)
3326         return -1;
3327
3328     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3329         s->qscale= 3; //reduce clipping problems
3330
3331     if (s->out_format == FMT_MJPEG) {
3332         /* for mjpeg, we do include qscale in the matrix */
3333         for(i=1;i<64;i++){
3334             int j= s->dsp.idct_permutation[i];
3335
3336             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3337         }
3338         s->y_dc_scale_table=
3339         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3340         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3341         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3342                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3343         s->qscale= 8;
3344     }
3345
3346     //FIXME var duplication
3347     s->current_picture_ptr->f.key_frame =
3348     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3349     s->current_picture_ptr->f.pict_type =
3350     s->current_picture.f.pict_type = s->pict_type;
3351
3352     if (s->current_picture.f.key_frame)
3353         s->picture_in_gop_number=0;
3354
3355     s->last_bits= put_bits_count(&s->pb);
3356     switch(s->out_format) {
3357     case FMT_MJPEG:
3358         if (CONFIG_MJPEG_ENCODER)
3359             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3360                                            s->intra_matrix);
3361         break;
3362     case FMT_H261:
3363         if (CONFIG_H261_ENCODER)
3364             ff_h261_encode_picture_header(s, picture_number);
3365         break;
3366     case FMT_H263:
3367         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3368             ff_wmv2_encode_picture_header(s, picture_number);
3369         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3370             ff_msmpeg4_encode_picture_header(s, picture_number);
3371         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3372             ff_mpeg4_encode_picture_header(s, picture_number);
3373         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3374             ff_rv10_encode_picture_header(s, picture_number);
3375         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3376             ff_rv20_encode_picture_header(s, picture_number);
3377         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3378             ff_flv_encode_picture_header(s, picture_number);
3379         else if (CONFIG_H263_ENCODER)
3380             ff_h263_encode_picture_header(s, picture_number);
3381         break;
3382     case FMT_MPEG1:
3383         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3384             ff_mpeg1_encode_picture_header(s, picture_number);
3385         break;
3386     default:
3387         assert(0);
3388     }
3389     bits= put_bits_count(&s->pb);
3390     s->header_bits= bits - s->last_bits;
3391
3392     for(i=1; i<context_count; i++){
3393         update_duplicate_context_after_me(s->thread_context[i], s);
3394     }
3395     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3396     for(i=1; i<context_count; i++){
3397         merge_context_after_encode(s, s->thread_context[i]);
3398     }
3399     emms_c();
3400     return 0;
3401 }
3402
3403 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3404     const int intra= s->mb_intra;
3405     int i;
3406
3407     s->dct_count[intra]++;
3408
3409     for(i=0; i<64; i++){
3410         int level= block[i];
3411
3412         if(level){
3413             if(level>0){
3414                 s->dct_error_sum[intra][i] += level;
3415                 level -= s->dct_offset[intra][i];
3416                 if(level<0) level=0;
3417             }else{
3418                 s->dct_error_sum[intra][i] -= level;
3419                 level += s->dct_offset[intra][i];
3420                 if(level>0) level=0;
3421             }
3422             block[i]= level;
3423         }
3424     }
3425 }
3426
3427 static int dct_quantize_trellis_c(MpegEncContext *s,
3428                                   int16_t *block, int n,
3429                                   int qscale, int *overflow){
3430     const int *qmat;
3431     const uint8_t *scantable= s->intra_scantable.scantable;
3432     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3433     int max=0;
3434     unsigned int threshold1, threshold2;
3435     int bias=0;
3436     int run_tab[65];
3437     int level_tab[65];
3438     int score_tab[65];
3439     int survivor[65];
3440     int survivor_count;
3441     int last_run=0;
3442     int last_level=0;
3443     int last_score= 0;
3444     int last_i;
3445     int coeff[2][64];
3446     int coeff_count[64];
3447     int qmul, qadd, start_i, last_non_zero, i, dc;
3448     const int esc_length= s->ac_esc_length;
3449     uint8_t * length;
3450     uint8_t * last_length;
3451     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3452
3453     s->dsp.fdct (block);
3454
3455     if(s->dct_error_sum)
3456         s->denoise_dct(s, block);
3457     qmul= qscale*16;
3458     qadd= ((qscale-1)|1)*8;
3459
3460     if (s->mb_intra) {
3461         int q;
3462         if (!s->h263_aic) {
3463             if (n < 4)
3464                 q = s->y_dc_scale;
3465             else
3466                 q = s->c_dc_scale;
3467             q = q << 3;
3468         } else{
3469             /* For AIC we skip quant/dequant of INTRADC */
3470             q = 1 << 3;
3471             qadd=0;
3472         }
3473
3474         /* note: block[0] is assumed to be positive */
3475         block[0] = (block[0] + (q >> 1)) / q;
3476         start_i = 1;
3477         last_non_zero = 0;
3478         qmat = s->q_intra_matrix[qscale];
3479         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3480             bias= 1<<(QMAT_SHIFT-1);
3481         length     = s->intra_ac_vlc_length;
3482         last_length= s->intra_ac_vlc_last_length;
3483     } else {
3484         start_i = 0;
3485         last_non_zero = -1;
3486         qmat = s->q_inter_matrix[qscale];
3487         length     = s->inter_ac_vlc_length;
3488         last_length= s->inter_ac_vlc_last_length;
3489     }
3490     last_i= start_i;
3491
3492     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3493     threshold2= (threshold1<<1);
3494
3495     for(i=63; i>=start_i; i--) {
3496         const int j = scantable[i];
3497         int level = block[j] * qmat[j];
3498
3499         if(((unsigned)(level+threshold1))>threshold2){
3500             last_non_zero = i;
3501             break;
3502         }
3503     }
3504
3505     for(i=start_i; i<=last_non_zero; i++) {
3506         const int j = scantable[i];
3507         int level = block[j] * qmat[j];
3508
3509 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3510 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3511         if(((unsigned)(level+threshold1))>threshold2){
3512             if(level>0){
3513                 level= (bias + level)>>QMAT_SHIFT;
3514                 coeff[0][i]= level;
3515                 coeff[1][i]= level-1;
3516 //                coeff[2][k]= level-2;
3517             }else{
3518                 level= (bias - level)>>QMAT_SHIFT;
3519                 coeff[0][i]= -level;
3520                 coeff[1][i]= -level+1;
3521 //                coeff[2][k]= -level+2;
3522             }
3523             coeff_count[i]= FFMIN(level, 2);
3524             assert(coeff_count[i]);
3525             max |=level;
3526         }else{
3527             coeff[0][i]= (level>>31)|1;
3528             coeff_count[i]= 1;
3529         }
3530     }
3531
3532     *overflow= s->max_qcoeff < max; //overflow might have happened
3533
3534     if(last_non_zero < start_i){
3535         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3536         return last_non_zero;
3537     }
3538
3539     score_tab[start_i]= 0;
3540     survivor[0]= start_i;
3541     survivor_count= 1;
3542
3543     for(i=start_i; i<=last_non_zero; i++){
3544         int level_index, j, zero_distortion;
3545         int dct_coeff= FFABS(block[ scantable[i] ]);
3546         int best_score=256*256*256*120;
3547
3548         if (s->dsp.fdct == ff_fdct_ifast)
3549             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3550         zero_distortion= dct_coeff*dct_coeff;
3551
3552         for(level_index=0; level_index < coeff_count[i]; level_index++){
3553             int distortion;
3554             int level= coeff[level_index][i];
3555             const int alevel= FFABS(level);
3556             int unquant_coeff;
3557
3558             assert(level);
3559
3560             if(s->out_format == FMT_H263){
3561                 unquant_coeff= alevel*qmul + qadd;
3562             }else{ //MPEG1
3563                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3564                 if(s->mb_intra){
3565                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3566                         unquant_coeff =   (unquant_coeff - 1) | 1;
3567                 }else{
3568                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3569                         unquant_coeff =   (unquant_coeff - 1) | 1;
3570                 }
3571                 unquant_coeff<<= 3;
3572             }
3573
3574             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3575             level+=64;
3576             if((level&(~127)) == 0){
3577                 for(j=survivor_count-1; j>=0; j--){
3578                     int run= i - survivor[j];
3579                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3580                     score += score_tab[i-run];
3581
3582                     if(score < best_score){
3583                         best_score= score;
3584                         run_tab[i+1]= run;
3585                         level_tab[i+1]= level-64;
3586                     }
3587                 }
3588
3589                 if(s->out_format == FMT_H263){
3590                     for(j=survivor_count-1; j>=0; j--){
3591                         int run= i - survivor[j];
3592                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3593                         score += score_tab[i-run];
3594                         if(score < last_score){
3595                             last_score= score;
3596                             last_run= run;
3597                             last_level= level-64;
3598                             last_i= i+1;
3599                         }
3600                     }
3601                 }
3602             }else{
3603                 distortion += esc_length*lambda;
3604                 for(j=survivor_count-1; j>=0; j--){
3605                     int run= i - survivor[j];
3606                     int score= distortion + score_tab[i-run];
3607
3608                     if(score < best_score){
3609                         best_score= score;
3610                         run_tab[i+1]= run;
3611                         level_tab[i+1]= level-64;
3612                     }
3613                 }
3614
3615                 if(s->out_format == FMT_H263){
3616                   for(j=survivor_count-1; j>=0; j--){
3617                         int run= i - survivor[j];
3618                         int score= distortion + score_tab[i-run];
3619                         if(score < last_score){
3620                             last_score= score;
3621                             last_run= run;
3622                             last_level= level-64;
3623                             last_i= i+1;
3624                         }
3625                     }
3626                 }
3627             }
3628         }
3629
3630         score_tab[i+1]= best_score;
3631
3632         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3633         if(last_non_zero <= 27){
3634             for(; survivor_count; survivor_count--){
3635                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3636                     break;
3637             }
3638         }else{
3639             for(; survivor_count; survivor_count--){
3640                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3641                     break;
3642             }
3643         }
3644
3645         survivor[ survivor_count++ ]= i+1;
3646     }
3647
3648     if(s->out_format != FMT_H263){
3649         last_score= 256*256*256*120;
3650         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3651             int score= score_tab[i];
3652             if(i) score += lambda*2; //FIXME exacter?
3653
3654             if(score < last_score){
3655                 last_score= score;
3656                 last_i= i;
3657                 last_level= level_tab[i];
3658                 last_run= run_tab[i];
3659             }
3660         }
3661     }
3662
3663     s->coded_score[n] = last_score;
3664
3665     dc= FFABS(block[0]);
3666     last_non_zero= last_i - 1;
3667     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3668
3669     if(last_non_zero < start_i)
3670         return last_non_zero;
3671
3672     if(last_non_zero == 0 && start_i == 0){
3673         int best_level= 0;
3674         int best_score= dc * dc;
3675
3676         for(i=0; i<coeff_count[0]; i++){
3677             int level= coeff[i][0];
3678             int alevel= FFABS(level);
3679             int unquant_coeff, score, distortion;
3680
3681             if(s->out_format == FMT_H263){
3682                     unquant_coeff= (alevel*qmul + qadd)>>3;
3683             }else{ //MPEG1
3684                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3685                     unquant_coeff =   (unquant_coeff - 1) | 1;
3686             }
3687             unquant_coeff = (unquant_coeff + 4) >> 3;
3688             unquant_coeff<<= 3 + 3;
3689
3690             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3691             level+=64;
3692             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3693             else                    score= distortion + esc_length*lambda;
3694
3695             if(score < best_score){
3696                 best_score= score;
3697                 best_level= level - 64;
3698             }
3699         }
3700         block[0]= best_level;
3701         s->coded_score[n] = best_score - dc*dc;
3702         if(best_level == 0) return -1;
3703         else                return last_non_zero;
3704     }
3705
3706     i= last_i;
3707     assert(last_level);
3708
3709     block[ perm_scantable[last_non_zero] ]= last_level;
3710     i -= last_run + 1;
3711
3712     for(; i>start_i; i -= run_tab[i] + 1){
3713         block[ perm_scantable[i-1] ]= level_tab[i];
3714     }
3715
3716     return last_non_zero;
3717 }
3718
3719 //#define REFINE_STATS 1
3720 static int16_t basis[64][64];
3721
3722 static void build_basis(uint8_t *perm){
3723     int i, j, x, y;
3724     emms_c();
3725     for(i=0; i<8; i++){
3726         for(j=0; j<8; j++){
3727             for(y=0; y<8; y++){
3728                 for(x=0; x<8; x++){
3729                     double s= 0.25*(1<<BASIS_SHIFT);
3730                     int index= 8*i + j;
3731                     int perm_index= perm[index];
3732                     if(i==0) s*= sqrt(0.5);
3733                     if(j==0) s*= sqrt(0.5);
3734                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3735                 }
3736             }
3737         }
3738     }
3739 }
3740
3741 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3742                         int16_t *block, int16_t *weight, int16_t *orig,
3743                         int n, int qscale){
3744     int16_t rem[64];
3745     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3746     const uint8_t *scantable= s->intra_scantable.scantable;
3747     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3748 //    unsigned int threshold1, threshold2;
3749 //    int bias=0;
3750     int run_tab[65];
3751     int prev_run=0;
3752     int prev_level=0;
3753     int qmul, qadd, start_i, last_non_zero, i, dc;
3754     uint8_t * length;
3755     uint8_t * last_length;
3756     int lambda;
3757     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3758 #ifdef REFINE_STATS
3759 static int count=0;
3760 static int after_last=0;
3761 static int to_zero=0;
3762 static int from_zero=0;
3763 static int raise=0;
3764 static int lower=0;
3765 static int messed_sign=0;
3766 #endif
3767
3768     if(basis[0][0] == 0)
3769         build_basis(s->dsp.idct_permutation);
3770
3771     qmul= qscale*2;
3772     qadd= (qscale-1)|1;
3773     if (s->mb_intra) {
3774         if (!s->h263_aic) {
3775             if (n < 4)
3776                 q = s->y_dc_scale;
3777             else
3778                 q = s->c_dc_scale;
3779         } else{
3780             /* For AIC we skip quant/dequant of INTRADC */
3781             q = 1;
3782             qadd=0;
3783         }
3784         q <<= RECON_SHIFT-3;
3785         /* note: block[0] is assumed to be positive */
3786         dc= block[0]*q;
3787 //        block[0] = (block[0] + (q >> 1)) / q;
3788         start_i = 1;
3789 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3790 //            bias= 1<<(QMAT_SHIFT-1);
3791         length     = s->intra_ac_vlc_length;
3792         last_length= s->intra_ac_vlc_last_length;
3793     } else {
3794         dc= 0;
3795         start_i = 0;
3796         length     = s->inter_ac_vlc_length;
3797         last_length= s->inter_ac_vlc_last_length;
3798     }
3799     last_non_zero = s->block_last_index[n];
3800
3801 #ifdef REFINE_STATS
3802 {START_TIMER
3803 #endif
3804     dc += (1<<(RECON_SHIFT-1));
3805     for(i=0; i<64; i++){
3806         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3807     }
3808 #ifdef REFINE_STATS
3809 STOP_TIMER("memset rem[]")}
3810 #endif
3811     sum=0;
3812     for(i=0; i<64; i++){
3813         int one= 36;
3814         int qns=4;
3815         int w;
3816
3817         w= FFABS(weight[i]) + qns*one;
3818         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3819
3820         weight[i] = w;
3821 //        w=weight[i] = (63*qns + (w/2)) / w;
3822
3823         assert(w>0);
3824         assert(w<(1<<6));
3825         sum += w*w;
3826     }
3827     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3828 #ifdef REFINE_STATS
3829 {START_TIMER
3830 #endif
3831     run=0;
3832     rle_index=0;
3833     for(i=start_i; i<=last_non_zero; i++){
3834         int j= perm_scantable[i];
3835         const int level= block[j];
3836         int coeff;
3837
3838         if(level){
3839             if(level<0) coeff= qmul*level - qadd;
3840             else        coeff= qmul*level + qadd;
3841             run_tab[rle_index++]=run;
3842             run=0;
3843
3844             s->dsp.add_8x8basis(rem, basis[j], coeff);
3845         }else{
3846             run++;
3847         }
3848     }
3849 #ifdef REFINE_STATS
3850 if(last_non_zero>0){
3851 STOP_TIMER("init rem[]")
3852 }
3853 }
3854
3855 {START_TIMER
3856 #endif
3857     for(;;){
3858         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3859         int best_coeff=0;
3860         int best_change=0;
3861         int run2, best_unquant_change=0, analyze_gradient;
3862 #ifdef REFINE_STATS
3863 {START_TIMER
3864 #endif
3865         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3866
3867         if(analyze_gradient){
3868 #ifdef REFINE_STATS
3869 {START_TIMER
3870 #endif
3871             for(i=0; i<64; i++){
3872                 int w= weight[i];
3873
3874                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3875             }
3876 #ifdef REFINE_STATS
3877 STOP_TIMER("rem*w*w")}
3878 {START_TIMER
3879 #endif
3880             s->dsp.fdct(d1);
3881 #ifdef REFINE_STATS
3882 STOP_TIMER("dct")}
3883 #endif
3884         }
3885
3886         if(start_i){
3887             const int level= block[0];
3888             int change, old_coeff;
3889
3890             assert(s->mb_intra);
3891
3892             old_coeff= q*level;
3893
3894             for(change=-1; change<=1; change+=2){
3895                 int new_level= level + change;
3896                 int score, new_coeff;
3897
3898                 new_coeff= q*new_level;
3899                 if(new_coeff >= 2048 || new_coeff < 0)
3900                     continue;
3901
3902                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3903                 if(score<best_score){
3904                     best_score= score;
3905                     best_coeff= 0;
3906                     best_change= change;
3907                     best_unquant_change= new_coeff - old_coeff;
3908                 }
3909             }
3910         }
3911
3912         run=0;
3913         rle_index=0;
3914         run2= run_tab[rle_index++];
3915         prev_level=0;
3916         prev_run=0;
3917
3918         for(i=start_i; i<64; i++){
3919             int j= perm_scantable[i];
3920             const int level= block[j];
3921             int change, old_coeff;
3922
3923             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3924                 break;
3925
3926             if(level){
3927                 if(level<0) old_coeff= qmul*level - qadd;
3928                 else        old_coeff= qmul*level + qadd;
3929                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3930             }else{
3931                 old_coeff=0;
3932                 run2--;
3933                 assert(run2>=0 || i >= last_non_zero );
3934             }
3935
3936             for(change=-1; change<=1; change+=2){
3937                 int new_level= level + change;
3938                 int score, new_coeff, unquant_change;
3939
3940                 score=0;
3941                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3942                    continue;
3943
3944                 if(new_level){
3945                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3946                     else            new_coeff= qmul*new_level + qadd;
3947                     if(new_coeff >= 2048 || new_coeff <= -2048)
3948                         continue;
3949                     //FIXME check for overflow
3950
3951                     if(level){
3952                         if(level < 63 && level > -63){
3953                             if(i < last_non_zero)
3954                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3955                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3956                             else
3957                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3958                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3959                         }
3960                     }else{
3961                         assert(FFABS(new_level)==1);
3962
3963                         if(analyze_gradient){
3964                             int g= d1[ scantable[i] ];
3965                             if(g && (g^new_level) >= 0)
3966                                 continue;
3967                         }
3968
3969                         if(i < last_non_zero){
3970                             int next_i= i + run2 + 1;
3971                             int next_level= block[ perm_scantable[next_i] ] + 64;
3972
3973                             if(next_level&(~127))
3974                                 next_level= 0;
3975
3976                             if(next_i < last_non_zero)
3977                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3978                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3979                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3980                             else
3981                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3982                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3983                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3984                         }else{
3985                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3986                             if(prev_level){
3987                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3988                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3989                             }
3990                         }
3991                     }
3992                 }else{
3993                     new_coeff=0;
3994                     assert(FFABS(level)==1);
3995
3996                     if(i < last_non_zero){
3997                         int next_i= i + run2 + 1;
3998                         int next_level= block[ perm_scantable[next_i] ] + 64;
3999
4000                         if(next_level&(~127))
4001                             next_level= 0;
4002
4003                         if(next_i < last_non_zero)
4004                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4005                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4006                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4007                         else
4008                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4009                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4010                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4011                     }else{
4012                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4013                         if(prev_level){
4014                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4015                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4016                         }
4017                     }
4018                 }
4019
4020                 score *= lambda;
4021
4022                 unquant_change= new_coeff - old_coeff;
4023                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4024
4025                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4026                 if(score<best_score){
4027                     best_score= score;
4028                     best_coeff= i;
4029                     best_change= change;
4030                     best_unquant_change= unquant_change;
4031                 }
4032             }
4033             if(level){
4034                 prev_level= level + 64;
4035                 if(prev_level&(~127))
4036                     prev_level= 0;
4037                 prev_run= run;
4038                 run=0;
4039             }else{
4040                 run++;
4041             }
4042         }
4043 #ifdef REFINE_STATS
4044 STOP_TIMER("iterative step")}
4045 #endif
4046
4047         if(best_change){
4048             int j= perm_scantable[ best_coeff ];
4049
4050             block[j] += best_change;
4051
4052             if(best_coeff > last_non_zero){
4053                 last_non_zero= best_coeff;
4054                 assert(block[j]);
4055 #ifdef REFINE_STATS
4056 after_last++;
4057 #endif
4058             }else{
4059 #ifdef REFINE_STATS
4060 if(block[j]){
4061     if(block[j] - best_change){
4062         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4063             raise++;
4064         }else{
4065             lower++;
4066         }
4067     }else{
4068         from_zero++;
4069     }
4070 }else{
4071     to_zero++;
4072 }
4073 #endif
4074                 for(; last_non_zero>=start_i; last_non_zero--){
4075                     if(block[perm_scantable[last_non_zero]])
4076                         break;
4077                 }
4078             }
4079 #ifdef REFINE_STATS
4080 count++;
4081 if(256*256*256*64 % count == 0){
4082     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4083 }
4084 #endif
4085             run=0;
4086             rle_index=0;
4087             for(i=start_i; i<=last_non_zero; i++){
4088                 int j= perm_scantable[i];
4089                 const int level= block[j];
4090
4091                  if(level){
4092                      run_tab[rle_index++]=run;
4093                      run=0;
4094                  }else{
4095                      run++;
4096                  }
4097             }
4098
4099             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4100         }else{
4101             break;
4102         }
4103     }
4104 #ifdef REFINE_STATS
4105 if(last_non_zero>0){
4106 STOP_TIMER("iterative search")
4107 }
4108 }
4109 #endif
4110
4111     return last_non_zero;
4112 }
4113
4114 int ff_dct_quantize_c(MpegEncContext *s,
4115                         int16_t *block, int n,
4116                         int qscale, int *overflow)
4117 {
4118     int i, j, level, last_non_zero, q, start_i;
4119     const int *qmat;
4120     const uint8_t *scantable= s->intra_scantable.scantable;
4121     int bias;
4122     int max=0;
4123     unsigned int threshold1, threshold2;
4124
4125     s->dsp.fdct (block);
4126
4127     if(s->dct_error_sum)
4128         s->denoise_dct(s, block);
4129
4130     if (s->mb_intra) {
4131         if (!s->h263_aic) {
4132             if (n < 4)
4133                 q = s->y_dc_scale;
4134             else
4135                 q = s->c_dc_scale;
4136             q = q << 3;
4137         } else
4138             /* For AIC we skip quant/dequant of INTRADC */
4139             q = 1 << 3;
4140
4141         /* note: block[0] is assumed to be positive */
4142         block[0] = (block[0] + (q >> 1)) / q;
4143         start_i = 1;
4144         last_non_zero = 0;
4145         qmat = s->q_intra_matrix[qscale];
4146         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4147     } else {
4148         start_i = 0;
4149         last_non_zero = -1;
4150         qmat = s->q_inter_matrix[qscale];
4151         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4152     }
4153     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4154     threshold2= (threshold1<<1);
4155     for(i=63;i>=start_i;i--) {
4156         j = scantable[i];
4157         level = block[j] * qmat[j];
4158
4159         if(((unsigned)(level+threshold1))>threshold2){
4160             last_non_zero = i;
4161             break;
4162         }else{
4163             block[j]=0;
4164         }
4165     }
4166     for(i=start_i; i<=last_non_zero; i++) {
4167         j = scantable[i];
4168         level = block[j] * qmat[j];
4169
4170 //        if(   bias+level >= (1<<QMAT_SHIFT)
4171 //           || bias-level >= (1<<QMAT_SHIFT)){
4172         if(((unsigned)(level+threshold1))>threshold2){
4173             if(level>0){
4174                 level= (bias + level)>>QMAT_SHIFT;
4175                 block[j]= level;
4176             }else{
4177                 level= (bias - level)>>QMAT_SHIFT;
4178                 block[j]= -level;
4179             }
4180             max |=level;
4181         }else{
4182             block[j]=0;
4183         }
4184     }
4185     *overflow= s->max_qcoeff < max; //overflow might have happened
4186
4187     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4188     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4189         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4190
4191     return last_non_zero;
4192 }
4193
4194 #define OFFSET(x) offsetof(MpegEncContext, x)
4195 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4196 static const AVOption h263_options[] = {
4197     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4198     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4199     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4200     FF_MPV_COMMON_OPTS
4201     { NULL },
4202 };
4203
4204 static const AVClass h263_class = {
4205     .class_name = "H.263 encoder",
4206     .item_name  = av_default_item_name,
4207     .option     = h263_options,
4208     .version    = LIBAVUTIL_VERSION_INT,
4209 };
4210
4211 AVCodec ff_h263_encoder = {
4212     .name           = "h263",
4213     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4214     .type           = AVMEDIA_TYPE_VIDEO,
4215     .id             = AV_CODEC_ID_H263,
4216     .priv_data_size = sizeof(MpegEncContext),
4217     .init           = ff_MPV_encode_init,
4218     .encode2        = ff_MPV_encode_picture,
4219     .close          = ff_MPV_encode_end,
4220     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4221     .priv_class     = &h263_class,
4222 };
4223
4224 static const AVOption h263p_options[] = {
4225     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4226     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4227     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4228     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4229     FF_MPV_COMMON_OPTS
4230     { NULL },
4231 };
4232 static const AVClass h263p_class = {
4233     .class_name = "H.263p encoder",
4234     .item_name  = av_default_item_name,
4235     .option     = h263p_options,
4236     .version    = LIBAVUTIL_VERSION_INT,
4237 };
4238
4239 AVCodec ff_h263p_encoder = {
4240     .name           = "h263p",
4241     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4242     .type           = AVMEDIA_TYPE_VIDEO,
4243     .id             = AV_CODEC_ID_H263P,
4244     .priv_data_size = sizeof(MpegEncContext),
4245     .init           = ff_MPV_encode_init,
4246     .encode2        = ff_MPV_encode_picture,
4247     .close          = ff_MPV_encode_end,
4248     .capabilities   = CODEC_CAP_SLICE_THREADS,
4249     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4250     .priv_class     = &h263p_class,
4251 };
4252
4253 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4254
4255 AVCodec ff_msmpeg4v2_encoder = {
4256     .name           = "msmpeg4v2",
4257     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4258     .type           = AVMEDIA_TYPE_VIDEO,
4259     .id             = AV_CODEC_ID_MSMPEG4V2,
4260     .priv_data_size = sizeof(MpegEncContext),
4261     .init           = ff_MPV_encode_init,
4262     .encode2        = ff_MPV_encode_picture,
4263     .close          = ff_MPV_encode_end,
4264     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4265     .priv_class     = &msmpeg4v2_class,
4266 };
4267
4268 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4269
4270 AVCodec ff_msmpeg4v3_encoder = {
4271     .name           = "msmpeg4",
4272     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4273     .type           = AVMEDIA_TYPE_VIDEO,
4274     .id             = AV_CODEC_ID_MSMPEG4V3,
4275     .priv_data_size = sizeof(MpegEncContext),
4276     .init           = ff_MPV_encode_init,
4277     .encode2        = ff_MPV_encode_picture,
4278     .close          = ff_MPV_encode_end,
4279     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4280     .priv_class     = &msmpeg4v3_class,
4281 };
4282
4283 FF_MPV_GENERIC_CLASS(wmv1)
4284
4285 AVCodec ff_wmv1_encoder = {
4286     .name           = "wmv1",
4287     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4288     .type           = AVMEDIA_TYPE_VIDEO,
4289     .id             = AV_CODEC_ID_WMV1,
4290     .priv_data_size = sizeof(MpegEncContext),
4291     .init           = ff_MPV_encode_init,
4292     .encode2        = ff_MPV_encode_picture,
4293     .close          = ff_MPV_encode_end,
4294     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4295     .priv_class     = &wmv1_class,
4296 };