]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
dsputil: Split off quarterpel bits into their own context
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "dsputil.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mathops.h"
46 #include "mpegutils.h"
47 #include "mjpegenc.h"
48 #include "msmpeg4.h"
49 #include "qpeldsp.h"
50 #include "faandct.h"
51 #include "thread.h"
52 #include "aandcttab.h"
53 #include "flv.h"
54 #include "mpeg4video.h"
55 #include "internal.h"
56 #include "bytestream.h"
57 #include <limits.h>
58
59 static int encode_picture(MpegEncContext *s, int picture_number);
60 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
61 static int sse_mb(MpegEncContext *s);
62 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
63 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
64
65 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
66 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
67
68 const AVOption ff_mpv_generic_options[] = {
69     FF_MPV_COMMON_OPTS
70     { NULL },
71 };
72
73 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
74                        uint16_t (*qmat16)[2][64],
75                        const uint16_t *quant_matrix,
76                        int bias, int qmin, int qmax, int intra)
77 {
78     int qscale;
79     int shift = 0;
80
81     for (qscale = qmin; qscale <= qmax; qscale++) {
82         int i;
83         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
84             dsp->fdct == ff_jpeg_fdct_islow_10 ||
85             dsp->fdct == ff_faandct) {
86             for (i = 0; i < 64; i++) {
87                 const int j = dsp->idct_permutation[i];
88                 /* 16 <= qscale * quant_matrix[i] <= 7905
89                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
90                  *             19952 <=              x  <= 249205026
91                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
92                  *           3444240 >= (1 << 36) / (x) >= 275 */
93
94                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
95                                         (qscale * quant_matrix[j]));
96             }
97         } else if (dsp->fdct == ff_fdct_ifast) {
98             for (i = 0; i < 64; i++) {
99                 const int j = dsp->idct_permutation[i];
100                 /* 16 <= qscale * quant_matrix[i] <= 7905
101                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
102                  *             19952 <=              x  <= 249205026
103                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
104                  *           3444240 >= (1 << 36) / (x) >= 275 */
105
106                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
107                                         (ff_aanscales[i] * qscale *
108                                          quant_matrix[j]));
109             }
110         } else {
111             for (i = 0; i < 64; i++) {
112                 const int j = dsp->idct_permutation[i];
113                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
114                  * Assume x = qscale * quant_matrix[i]
115                  * So             16 <=              x  <= 7905
116                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
117                  * so          32768 >= (1 << 19) / (x) >= 67 */
118                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
119                                         (qscale * quant_matrix[j]));
120                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
121                 //                    (qscale * quant_matrix[i]);
122                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
123                                        (qscale * quant_matrix[j]);
124
125                 if (qmat16[qscale][0][i] == 0 ||
126                     qmat16[qscale][0][i] == 128 * 256)
127                     qmat16[qscale][0][i] = 128 * 256 - 1;
128                 qmat16[qscale][1][i] =
129                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
130                                 qmat16[qscale][0][i]);
131             }
132         }
133
134         for (i = intra; i < 64; i++) {
135             int64_t max = 8191;
136             if (dsp->fdct == ff_fdct_ifast) {
137                 max = (8191LL * ff_aanscales[i]) >> 14;
138             }
139             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
140                 shift++;
141             }
142         }
143     }
144     if (shift) {
145         av_log(NULL, AV_LOG_INFO,
146                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
147                QMAT_SHIFT - shift);
148     }
149 }
150
151 static inline void update_qscale(MpegEncContext *s)
152 {
153     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
154                 (FF_LAMBDA_SHIFT + 7);
155     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
156
157     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
158                  FF_LAMBDA_SHIFT;
159 }
160
161 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
162 {
163     int i;
164
165     if (matrix) {
166         put_bits(pb, 1, 1);
167         for (i = 0; i < 64; i++) {
168             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
169         }
170     } else
171         put_bits(pb, 1, 0);
172 }
173
174 /**
175  * init s->current_picture.qscale_table from s->lambda_table
176  */
177 void ff_init_qscale_tab(MpegEncContext *s)
178 {
179     int8_t * const qscale_table = s->current_picture.qscale_table;
180     int i;
181
182     for (i = 0; i < s->mb_num; i++) {
183         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
184         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
185         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
186                                                   s->avctx->qmax);
187     }
188 }
189
190 static void update_duplicate_context_after_me(MpegEncContext *dst,
191                                               MpegEncContext *src)
192 {
193 #define COPY(a) dst->a= src->a
194     COPY(pict_type);
195     COPY(current_picture);
196     COPY(f_code);
197     COPY(b_code);
198     COPY(qscale);
199     COPY(lambda);
200     COPY(lambda2);
201     COPY(picture_in_gop_number);
202     COPY(gop_picture_number);
203     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
204     COPY(progressive_frame);    // FIXME don't set in encode_header
205     COPY(partitioned_frame);    // FIXME don't set in encode_header
206 #undef COPY
207 }
208
209 /**
210  * Set the given MpegEncContext to defaults for encoding.
211  * the changed fields will not depend upon the prior state of the MpegEncContext.
212  */
213 static void MPV_encode_defaults(MpegEncContext *s)
214 {
215     int i;
216     ff_MPV_common_defaults(s);
217
218     for (i = -16; i < 16; i++) {
219         default_fcode_tab[i + MAX_MV] = 1;
220     }
221     s->me.mv_penalty = default_mv_penalty;
222     s->fcode_tab     = default_fcode_tab;
223
224     s->input_picture_number  = 0;
225     s->picture_in_gop_number = 0;
226 }
227
228 /* init video encoder */
229 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
230 {
231     MpegEncContext *s = avctx->priv_data;
232     int i, ret;
233
234     MPV_encode_defaults(s);
235
236     switch (avctx->codec_id) {
237     case AV_CODEC_ID_MPEG2VIDEO:
238         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
239             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
240             av_log(avctx, AV_LOG_ERROR,
241                    "only YUV420 and YUV422 are supported\n");
242             return -1;
243         }
244         break;
245     case AV_CODEC_ID_MJPEG:
246         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
247             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
248             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
249               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
250              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
251             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
252             return -1;
253         }
254         break;
255     default:
256         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
257             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
258             return -1;
259         }
260     }
261
262     switch (avctx->pix_fmt) {
263     case AV_PIX_FMT_YUVJ422P:
264     case AV_PIX_FMT_YUV422P:
265         s->chroma_format = CHROMA_422;
266         break;
267     case AV_PIX_FMT_YUVJ420P:
268     case AV_PIX_FMT_YUV420P:
269     default:
270         s->chroma_format = CHROMA_420;
271         break;
272     }
273
274     s->bit_rate = avctx->bit_rate;
275     s->width    = avctx->width;
276     s->height   = avctx->height;
277     if (avctx->gop_size > 600 &&
278         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
279         av_log(avctx, AV_LOG_ERROR,
280                "Warning keyframe interval too large! reducing it ...\n");
281         avctx->gop_size = 600;
282     }
283     s->gop_size     = avctx->gop_size;
284     s->avctx        = avctx;
285     s->flags        = avctx->flags;
286     s->flags2       = avctx->flags2;
287     if (avctx->max_b_frames > MAX_B_FRAMES) {
288         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
289                "is %d.\n", MAX_B_FRAMES);
290     }
291     s->max_b_frames = avctx->max_b_frames;
292     s->codec_id     = avctx->codec->id;
293     s->strict_std_compliance = avctx->strict_std_compliance;
294     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
295     s->mpeg_quant         = avctx->mpeg_quant;
296     s->rtp_mode           = !!avctx->rtp_payload_size;
297     s->intra_dc_precision = avctx->intra_dc_precision;
298     s->user_specified_pts = AV_NOPTS_VALUE;
299
300     if (s->gop_size <= 1) {
301         s->intra_only = 1;
302         s->gop_size   = 12;
303     } else {
304         s->intra_only = 0;
305     }
306
307     s->me_method = avctx->me_method;
308
309     /* Fixed QSCALE */
310     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
311
312     s->adaptive_quant = (s->avctx->lumi_masking ||
313                          s->avctx->dark_masking ||
314                          s->avctx->temporal_cplx_masking ||
315                          s->avctx->spatial_cplx_masking  ||
316                          s->avctx->p_masking      ||
317                          s->avctx->border_masking ||
318                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
319                         !s->fixed_qscale;
320
321     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
322
323     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
324         av_log(avctx, AV_LOG_ERROR,
325                "a vbv buffer size is needed, "
326                "for encoding with a maximum bitrate\n");
327         return -1;
328     }
329
330     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
331         av_log(avctx, AV_LOG_INFO,
332                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
333     }
334
335     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
336         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
337         return -1;
338     }
339
340     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
341         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
342         return -1;
343     }
344
345     if (avctx->rc_max_rate &&
346         avctx->rc_max_rate == avctx->bit_rate &&
347         avctx->rc_max_rate != avctx->rc_min_rate) {
348         av_log(avctx, AV_LOG_INFO,
349                "impossible bitrate constraints, this will fail\n");
350     }
351
352     if (avctx->rc_buffer_size &&
353         avctx->bit_rate * (int64_t)avctx->time_base.num >
354             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
355         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
356         return -1;
357     }
358
359     if (!s->fixed_qscale &&
360         avctx->bit_rate * av_q2d(avctx->time_base) >
361             avctx->bit_rate_tolerance) {
362         av_log(avctx, AV_LOG_ERROR,
363                "bitrate tolerance too small for bitrate\n");
364         return -1;
365     }
366
367     if (s->avctx->rc_max_rate &&
368         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
369         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
370          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
371         90000LL * (avctx->rc_buffer_size - 1) >
372             s->avctx->rc_max_rate * 0xFFFFLL) {
373         av_log(avctx, AV_LOG_INFO,
374                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
375                "specified vbv buffer is too large for the given bitrate!\n");
376     }
377
378     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
379         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
380         s->codec_id != AV_CODEC_ID_FLV1) {
381         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
382         return -1;
383     }
384
385     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
386         av_log(avctx, AV_LOG_ERROR,
387                "OBMC is only supported with simple mb decision\n");
388         return -1;
389     }
390
391     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
392         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
393         return -1;
394     }
395
396     if (s->max_b_frames                    &&
397         s->codec_id != AV_CODEC_ID_MPEG4      &&
398         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
399         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
400         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
401         return -1;
402     }
403
404     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
405          s->codec_id == AV_CODEC_ID_H263  ||
406          s->codec_id == AV_CODEC_ID_H263P) &&
407         (avctx->sample_aspect_ratio.num > 255 ||
408          avctx->sample_aspect_ratio.den > 255)) {
409         av_log(avctx, AV_LOG_ERROR,
410                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
411                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
412         return -1;
413     }
414
415     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
416         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
417         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
418         return -1;
419     }
420
421     // FIXME mpeg2 uses that too
422     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
423         av_log(avctx, AV_LOG_ERROR,
424                "mpeg2 style quantization not supported by codec\n");
425         return -1;
426     }
427
428     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
429         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
430         return -1;
431     }
432
433     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
434         s->avctx->mb_decision != FF_MB_DECISION_RD) {
435         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
436         return -1;
437     }
438
439     if (s->avctx->scenechange_threshold < 1000000000 &&
440         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
441         av_log(avctx, AV_LOG_ERROR,
442                "closed gop with scene change detection are not supported yet, "
443                "set threshold to 1000000000\n");
444         return -1;
445     }
446
447     if (s->flags & CODEC_FLAG_LOW_DELAY) {
448         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
449             av_log(avctx, AV_LOG_ERROR,
450                   "low delay forcing is only available for mpeg2\n");
451             return -1;
452         }
453         if (s->max_b_frames != 0) {
454             av_log(avctx, AV_LOG_ERROR,
455                    "b frames cannot be used with low delay\n");
456             return -1;
457         }
458     }
459
460     if (s->q_scale_type == 1) {
461         if (avctx->qmax > 12) {
462             av_log(avctx, AV_LOG_ERROR,
463                    "non linear quant only supports qmax <= 12 currently\n");
464             return -1;
465         }
466     }
467
468     if (s->avctx->thread_count > 1         &&
469         s->codec_id != AV_CODEC_ID_MPEG4      &&
470         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
471         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
472         (s->codec_id != AV_CODEC_ID_H263P)) {
473         av_log(avctx, AV_LOG_ERROR,
474                "multi threaded encoding not supported by codec\n");
475         return -1;
476     }
477
478     if (s->avctx->thread_count < 1) {
479         av_log(avctx, AV_LOG_ERROR,
480                "automatic thread number detection not supported by codec,"
481                "patch welcome\n");
482         return -1;
483     }
484
485     if (s->avctx->thread_count > 1)
486         s->rtp_mode = 1;
487
488     if (!avctx->time_base.den || !avctx->time_base.num) {
489         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
490         return -1;
491     }
492
493     i = (INT_MAX / 2 + 128) >> 8;
494     if (avctx->mb_threshold >= i) {
495         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
496                i - 1);
497         return -1;
498     }
499
500     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
501         av_log(avctx, AV_LOG_INFO,
502                "notice: b_frame_strategy only affects the first pass\n");
503         avctx->b_frame_strategy = 0;
504     }
505
506     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
507     if (i > 1) {
508         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
509         avctx->time_base.den /= i;
510         avctx->time_base.num /= i;
511         //return -1;
512     }
513
514     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
515         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
516         // (a + x * 3 / 8) / x
517         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
518         s->inter_quant_bias = 0;
519     } else {
520         s->intra_quant_bias = 0;
521         // (a - x / 4) / x
522         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
523     }
524
525     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
526         s->intra_quant_bias = avctx->intra_quant_bias;
527     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
528         s->inter_quant_bias = avctx->inter_quant_bias;
529
530     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
531         s->avctx->time_base.den > (1 << 16) - 1) {
532         av_log(avctx, AV_LOG_ERROR,
533                "timebase %d/%d not supported by MPEG 4 standard, "
534                "the maximum admitted value for the timebase denominator "
535                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
536                (1 << 16) - 1);
537         return -1;
538     }
539     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
540
541     switch (avctx->codec->id) {
542     case AV_CODEC_ID_MPEG1VIDEO:
543         s->out_format = FMT_MPEG1;
544         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
545         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
546         break;
547     case AV_CODEC_ID_MPEG2VIDEO:
548         s->out_format = FMT_MPEG1;
549         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
550         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
551         s->rtp_mode   = 1;
552         break;
553     case AV_CODEC_ID_MJPEG:
554         s->out_format = FMT_MJPEG;
555         s->intra_only = 1; /* force intra only for jpeg */
556         if (!CONFIG_MJPEG_ENCODER ||
557             ff_mjpeg_encode_init(s) < 0)
558             return -1;
559         avctx->delay = 0;
560         s->low_delay = 1;
561         break;
562     case AV_CODEC_ID_H261:
563         if (!CONFIG_H261_ENCODER)
564             return -1;
565         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
566             av_log(avctx, AV_LOG_ERROR,
567                    "The specified picture size of %dx%d is not valid for the "
568                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
569                     s->width, s->height);
570             return -1;
571         }
572         s->out_format = FMT_H261;
573         avctx->delay  = 0;
574         s->low_delay  = 1;
575         break;
576     case AV_CODEC_ID_H263:
577         if (!CONFIG_H263_ENCODER)
578         return -1;
579         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
580                              s->width, s->height) == 8) {
581             av_log(avctx, AV_LOG_INFO,
582                    "The specified picture size of %dx%d is not valid for "
583                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
584                    "352x288, 704x576, and 1408x1152."
585                    "Try H.263+.\n", s->width, s->height);
586             return -1;
587         }
588         s->out_format = FMT_H263;
589         avctx->delay  = 0;
590         s->low_delay  = 1;
591         break;
592     case AV_CODEC_ID_H263P:
593         s->out_format = FMT_H263;
594         s->h263_plus  = 1;
595         /* Fx */
596         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
597         s->modified_quant  = s->h263_aic;
598         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
599         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
600
601         /* /Fx */
602         /* These are just to be sure */
603         avctx->delay = 0;
604         s->low_delay = 1;
605         break;
606     case AV_CODEC_ID_FLV1:
607         s->out_format      = FMT_H263;
608         s->h263_flv        = 2; /* format = 1; 11-bit codes */
609         s->unrestricted_mv = 1;
610         s->rtp_mode  = 0; /* don't allow GOB */
611         avctx->delay = 0;
612         s->low_delay = 1;
613         break;
614     case AV_CODEC_ID_RV10:
615         s->out_format = FMT_H263;
616         avctx->delay  = 0;
617         s->low_delay  = 1;
618         break;
619     case AV_CODEC_ID_RV20:
620         s->out_format      = FMT_H263;
621         avctx->delay       = 0;
622         s->low_delay       = 1;
623         s->modified_quant  = 1;
624         s->h263_aic        = 1;
625         s->h263_plus       = 1;
626         s->loop_filter     = 1;
627         s->unrestricted_mv = 0;
628         break;
629     case AV_CODEC_ID_MPEG4:
630         s->out_format      = FMT_H263;
631         s->h263_pred       = 1;
632         s->unrestricted_mv = 1;
633         s->low_delay       = s->max_b_frames ? 0 : 1;
634         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
635         break;
636     case AV_CODEC_ID_MSMPEG4V2:
637         s->out_format      = FMT_H263;
638         s->h263_pred       = 1;
639         s->unrestricted_mv = 1;
640         s->msmpeg4_version = 2;
641         avctx->delay       = 0;
642         s->low_delay       = 1;
643         break;
644     case AV_CODEC_ID_MSMPEG4V3:
645         s->out_format        = FMT_H263;
646         s->h263_pred         = 1;
647         s->unrestricted_mv   = 1;
648         s->msmpeg4_version   = 3;
649         s->flipflop_rounding = 1;
650         avctx->delay         = 0;
651         s->low_delay         = 1;
652         break;
653     case AV_CODEC_ID_WMV1:
654         s->out_format        = FMT_H263;
655         s->h263_pred         = 1;
656         s->unrestricted_mv   = 1;
657         s->msmpeg4_version   = 4;
658         s->flipflop_rounding = 1;
659         avctx->delay         = 0;
660         s->low_delay         = 1;
661         break;
662     case AV_CODEC_ID_WMV2:
663         s->out_format        = FMT_H263;
664         s->h263_pred         = 1;
665         s->unrestricted_mv   = 1;
666         s->msmpeg4_version   = 5;
667         s->flipflop_rounding = 1;
668         avctx->delay         = 0;
669         s->low_delay         = 1;
670         break;
671     default:
672         return -1;
673     }
674
675     avctx->has_b_frames = !s->low_delay;
676
677     s->encoding = 1;
678
679     s->progressive_frame    =
680     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
681                                                 CODEC_FLAG_INTERLACED_ME) ||
682                                 s->alternate_scan);
683
684     /* init */
685     if (ff_MPV_common_init(s) < 0)
686         return -1;
687
688     if (ARCH_X86)
689         ff_MPV_encode_init_x86(s);
690
691     ff_qpeldsp_init(&s->qdsp);
692
693     s->avctx->coded_frame = s->current_picture.f;
694
695     if (s->msmpeg4_version) {
696         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
697                           2 * 2 * (MAX_LEVEL + 1) *
698                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
699     }
700     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
701
702     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
703     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
704     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
705     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
706     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
707                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
708     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
709                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
710
711     if (s->avctx->noise_reduction) {
712         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
713                           2 * 64 * sizeof(uint16_t), fail);
714     }
715
716     if (CONFIG_H263_ENCODER)
717         ff_h263dsp_init(&s->h263dsp);
718     if (!s->dct_quantize)
719         s->dct_quantize = ff_dct_quantize_c;
720     if (!s->denoise_dct)
721         s->denoise_dct  = denoise_dct_c;
722     s->fast_dct_quantize = s->dct_quantize;
723     if (avctx->trellis)
724         s->dct_quantize  = dct_quantize_trellis_c;
725
726     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
727         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
728
729     s->quant_precision = 5;
730
731     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
732     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
733
734     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
735         ff_h261_encode_init(s);
736     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
737         ff_h263_encode_init(s);
738     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
739         ff_msmpeg4_encode_init(s);
740     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
741         && s->out_format == FMT_MPEG1)
742         ff_mpeg1_encode_init(s);
743
744     /* init q matrix */
745     for (i = 0; i < 64; i++) {
746         int j = s->dsp.idct_permutation[i];
747         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
748             s->mpeg_quant) {
749             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
750             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
751         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
752             s->intra_matrix[j] =
753             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
754         } else {
755             /* mpeg1/2 */
756             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
757             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
758         }
759         if (s->avctx->intra_matrix)
760             s->intra_matrix[j] = s->avctx->intra_matrix[i];
761         if (s->avctx->inter_matrix)
762             s->inter_matrix[j] = s->avctx->inter_matrix[i];
763     }
764
765     /* precompute matrix */
766     /* for mjpeg, we do include qscale in the matrix */
767     if (s->out_format != FMT_MJPEG) {
768         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
769                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
770                           31, 1);
771         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
772                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
773                           31, 0);
774     }
775
776     if (ff_rate_control_init(s) < 0)
777         return -1;
778
779 #if FF_API_ERROR_RATE
780     FF_DISABLE_DEPRECATION_WARNINGS
781     if (avctx->error_rate)
782         s->error_rate = avctx->error_rate;
783     FF_ENABLE_DEPRECATION_WARNINGS;
784 #endif
785
786 #if FF_API_NORMALIZE_AQP
787     FF_DISABLE_DEPRECATION_WARNINGS
788     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
789         s->mpv_flags |= FF_MPV_FLAG_NAQ;
790     FF_ENABLE_DEPRECATION_WARNINGS;
791 #endif
792
793 #if FF_API_MV0
794     FF_DISABLE_DEPRECATION_WARNINGS
795     if (avctx->flags & CODEC_FLAG_MV0)
796         s->mpv_flags |= FF_MPV_FLAG_MV0;
797     FF_ENABLE_DEPRECATION_WARNINGS
798 #endif
799
800     if (avctx->b_frame_strategy == 2) {
801         for (i = 0; i < s->max_b_frames + 2; i++) {
802             s->tmp_frames[i] = av_frame_alloc();
803             if (!s->tmp_frames[i])
804                 return AVERROR(ENOMEM);
805
806             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
807             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
808             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
809
810             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
811             if (ret < 0)
812                 return ret;
813         }
814     }
815
816     return 0;
817 fail:
818     ff_MPV_encode_end(avctx);
819     return AVERROR_UNKNOWN;
820 }
821
822 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
823 {
824     MpegEncContext *s = avctx->priv_data;
825     int i;
826
827     ff_rate_control_uninit(s);
828
829     ff_MPV_common_end(s);
830     if (CONFIG_MJPEG_ENCODER &&
831         s->out_format == FMT_MJPEG)
832         ff_mjpeg_encode_close(s);
833
834     av_freep(&avctx->extradata);
835
836     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
837         av_frame_free(&s->tmp_frames[i]);
838
839     ff_free_picture_tables(&s->new_picture);
840     ff_mpeg_unref_picture(s, &s->new_picture);
841
842     av_freep(&s->avctx->stats_out);
843     av_freep(&s->ac_stats);
844
845     av_freep(&s->q_intra_matrix);
846     av_freep(&s->q_inter_matrix);
847     av_freep(&s->q_intra_matrix16);
848     av_freep(&s->q_inter_matrix16);
849     av_freep(&s->input_picture);
850     av_freep(&s->reordered_input_picture);
851     av_freep(&s->dct_offset);
852
853     return 0;
854 }
855
856 static int get_sae(uint8_t *src, int ref, int stride)
857 {
858     int x,y;
859     int acc = 0;
860
861     for (y = 0; y < 16; y++) {
862         for (x = 0; x < 16; x++) {
863             acc += FFABS(src[x + y * stride] - ref);
864         }
865     }
866
867     return acc;
868 }
869
870 static int get_intra_count(MpegEncContext *s, uint8_t *src,
871                            uint8_t *ref, int stride)
872 {
873     int x, y, w, h;
874     int acc = 0;
875
876     w = s->width  & ~15;
877     h = s->height & ~15;
878
879     for (y = 0; y < h; y += 16) {
880         for (x = 0; x < w; x += 16) {
881             int offset = x + y * stride;
882             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
883                                      16);
884             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
885             int sae  = get_sae(src + offset, mean, stride);
886
887             acc += sae + 500 < sad;
888         }
889     }
890     return acc;
891 }
892
893
894 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
895 {
896     Picture *pic = NULL;
897     int64_t pts;
898     int i, display_picture_number = 0, ret;
899     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
900                                                  (s->low_delay ? 0 : 1);
901     int direct = 1;
902
903     if (pic_arg) {
904         pts = pic_arg->pts;
905         display_picture_number = s->input_picture_number++;
906
907         if (pts != AV_NOPTS_VALUE) {
908             if (s->user_specified_pts != AV_NOPTS_VALUE) {
909                 int64_t time = pts;
910                 int64_t last = s->user_specified_pts;
911
912                 if (time <= last) {
913                     av_log(s->avctx, AV_LOG_ERROR,
914                            "Error, Invalid timestamp=%"PRId64", "
915                            "last=%"PRId64"\n", pts, s->user_specified_pts);
916                     return -1;
917                 }
918
919                 if (!s->low_delay && display_picture_number == 1)
920                     s->dts_delta = time - last;
921             }
922             s->user_specified_pts = pts;
923         } else {
924             if (s->user_specified_pts != AV_NOPTS_VALUE) {
925                 s->user_specified_pts =
926                 pts = s->user_specified_pts + 1;
927                 av_log(s->avctx, AV_LOG_INFO,
928                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
929                        pts);
930             } else {
931                 pts = display_picture_number;
932             }
933         }
934     }
935
936     if (pic_arg) {
937         if (!pic_arg->buf[0]);
938             direct = 0;
939         if (pic_arg->linesize[0] != s->linesize)
940             direct = 0;
941         if (pic_arg->linesize[1] != s->uvlinesize)
942             direct = 0;
943         if (pic_arg->linesize[2] != s->uvlinesize)
944             direct = 0;
945
946         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
947                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
948
949         if (direct) {
950             i = ff_find_unused_picture(s, 1);
951             if (i < 0)
952                 return i;
953
954             pic = &s->picture[i];
955             pic->reference = 3;
956
957             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
958                 return ret;
959             if (ff_alloc_picture(s, pic, 1) < 0) {
960                 return -1;
961             }
962         } else {
963             i = ff_find_unused_picture(s, 0);
964             if (i < 0)
965                 return i;
966
967             pic = &s->picture[i];
968             pic->reference = 3;
969
970             if (ff_alloc_picture(s, pic, 0) < 0) {
971                 return -1;
972             }
973
974             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
975                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
976                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
977                 // empty
978             } else {
979                 int h_chroma_shift, v_chroma_shift;
980                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
981                                                  &h_chroma_shift,
982                                                  &v_chroma_shift);
983
984                 for (i = 0; i < 3; i++) {
985                     int src_stride = pic_arg->linesize[i];
986                     int dst_stride = i ? s->uvlinesize : s->linesize;
987                     int h_shift = i ? h_chroma_shift : 0;
988                     int v_shift = i ? v_chroma_shift : 0;
989                     int w = s->width  >> h_shift;
990                     int h = s->height >> v_shift;
991                     uint8_t *src = pic_arg->data[i];
992                     uint8_t *dst = pic->f->data[i];
993
994                     if (!s->avctx->rc_buffer_size)
995                         dst += INPLACE_OFFSET;
996
997                     if (src_stride == dst_stride)
998                         memcpy(dst, src, src_stride * h);
999                     else {
1000                         while (h--) {
1001                             memcpy(dst, src, w);
1002                             dst += dst_stride;
1003                             src += src_stride;
1004                         }
1005                     }
1006                 }
1007             }
1008         }
1009         ret = av_frame_copy_props(pic->f, pic_arg);
1010         if (ret < 0)
1011             return ret;
1012
1013         pic->f->display_picture_number = display_picture_number;
1014         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1015     }
1016
1017     /* shift buffer entries */
1018     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1019         s->input_picture[i - 1] = s->input_picture[i];
1020
1021     s->input_picture[encoding_delay] = (Picture*) pic;
1022
1023     return 0;
1024 }
1025
1026 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1027 {
1028     int x, y, plane;
1029     int score = 0;
1030     int64_t score64 = 0;
1031
1032     for (plane = 0; plane < 3; plane++) {
1033         const int stride = p->f->linesize[plane];
1034         const int bw = plane ? 1 : 2;
1035         for (y = 0; y < s->mb_height * bw; y++) {
1036             for (x = 0; x < s->mb_width * bw; x++) {
1037                 int off = p->shared ? 0 : 16;
1038                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1039                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1040                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1041
1042                 switch (s->avctx->frame_skip_exp) {
1043                 case 0: score    =  FFMAX(score, v);          break;
1044                 case 1: score   += FFABS(v);                  break;
1045                 case 2: score   += v * v;                     break;
1046                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1047                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1048                 }
1049             }
1050         }
1051     }
1052
1053     if (score)
1054         score64 = score;
1055
1056     if (score64 < s->avctx->frame_skip_threshold)
1057         return 1;
1058     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1059         return 1;
1060     return 0;
1061 }
1062
1063 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1064 {
1065     AVPacket pkt = { 0 };
1066     int ret, got_output;
1067
1068     av_init_packet(&pkt);
1069     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1070     if (ret < 0)
1071         return ret;
1072
1073     ret = pkt.size;
1074     av_free_packet(&pkt);
1075     return ret;
1076 }
1077
1078 static int estimate_best_b_count(MpegEncContext *s)
1079 {
1080     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1081     AVCodecContext *c = avcodec_alloc_context3(NULL);
1082     const int scale = s->avctx->brd_scale;
1083     int i, j, out_size, p_lambda, b_lambda, lambda2;
1084     int64_t best_rd  = INT64_MAX;
1085     int best_b_count = -1;
1086
1087     assert(scale >= 0 && scale <= 3);
1088
1089     //emms_c();
1090     //s->next_picture_ptr->quality;
1091     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1092     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1093     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1094     if (!b_lambda) // FIXME we should do this somewhere else
1095         b_lambda = p_lambda;
1096     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1097                FF_LAMBDA_SHIFT;
1098
1099     c->width        = s->width  >> scale;
1100     c->height       = s->height >> scale;
1101     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1102     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1103     c->mb_decision  = s->avctx->mb_decision;
1104     c->me_cmp       = s->avctx->me_cmp;
1105     c->mb_cmp       = s->avctx->mb_cmp;
1106     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1107     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1108     c->time_base    = s->avctx->time_base;
1109     c->max_b_frames = s->max_b_frames;
1110
1111     if (avcodec_open2(c, codec, NULL) < 0)
1112         return -1;
1113
1114     for (i = 0; i < s->max_b_frames + 2; i++) {
1115         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1116                                                 s->next_picture_ptr;
1117
1118         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1119             pre_input = *pre_input_ptr;
1120
1121             if (!pre_input.shared && i) {
1122                 pre_input.f->data[0] += INPLACE_OFFSET;
1123                 pre_input.f->data[1] += INPLACE_OFFSET;
1124                 pre_input.f->data[2] += INPLACE_OFFSET;
1125             }
1126
1127             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1128                                  pre_input.f->data[0], pre_input.f->linesize[0],
1129                                  c->width,      c->height);
1130             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1131                                  pre_input.f->data[1], pre_input.f->linesize[1],
1132                                  c->width >> 1, c->height >> 1);
1133             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1134                                  pre_input.f->data[2], pre_input.f->linesize[2],
1135                                  c->width >> 1, c->height >> 1);
1136         }
1137     }
1138
1139     for (j = 0; j < s->max_b_frames + 1; j++) {
1140         int64_t rd = 0;
1141
1142         if (!s->input_picture[j])
1143             break;
1144
1145         c->error[0] = c->error[1] = c->error[2] = 0;
1146
1147         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1148         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1149
1150         out_size = encode_frame(c, s->tmp_frames[0]);
1151
1152         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1153
1154         for (i = 0; i < s->max_b_frames + 1; i++) {
1155             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1156
1157             s->tmp_frames[i + 1]->pict_type = is_p ?
1158                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1159             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1160
1161             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1162
1163             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1164         }
1165
1166         /* get the delayed frames */
1167         while (out_size) {
1168             out_size = encode_frame(c, NULL);
1169             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1170         }
1171
1172         rd += c->error[0] + c->error[1] + c->error[2];
1173
1174         if (rd < best_rd) {
1175             best_rd = rd;
1176             best_b_count = j;
1177         }
1178     }
1179
1180     avcodec_close(c);
1181     av_freep(&c);
1182
1183     return best_b_count;
1184 }
1185
1186 static int select_input_picture(MpegEncContext *s)
1187 {
1188     int i, ret;
1189
1190     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1191         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1192     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1193
1194     /* set next picture type & ordering */
1195     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1196         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1197             s->next_picture_ptr == NULL || s->intra_only) {
1198             s->reordered_input_picture[0] = s->input_picture[0];
1199             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1200             s->reordered_input_picture[0]->f->coded_picture_number =
1201                 s->coded_picture_number++;
1202         } else {
1203             int b_frames;
1204
1205             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1206                 if (s->picture_in_gop_number < s->gop_size &&
1207                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1208                     // FIXME check that te gop check above is +-1 correct
1209                     av_frame_unref(s->input_picture[0]->f);
1210
1211                     emms_c();
1212                     ff_vbv_update(s, 0);
1213
1214                     goto no_output_pic;
1215                 }
1216             }
1217
1218             if (s->flags & CODEC_FLAG_PASS2) {
1219                 for (i = 0; i < s->max_b_frames + 1; i++) {
1220                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1221
1222                     if (pict_num >= s->rc_context.num_entries)
1223                         break;
1224                     if (!s->input_picture[i]) {
1225                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1226                         break;
1227                     }
1228
1229                     s->input_picture[i]->f->pict_type =
1230                         s->rc_context.entry[pict_num].new_pict_type;
1231                 }
1232             }
1233
1234             if (s->avctx->b_frame_strategy == 0) {
1235                 b_frames = s->max_b_frames;
1236                 while (b_frames && !s->input_picture[b_frames])
1237                     b_frames--;
1238             } else if (s->avctx->b_frame_strategy == 1) {
1239                 for (i = 1; i < s->max_b_frames + 1; i++) {
1240                     if (s->input_picture[i] &&
1241                         s->input_picture[i]->b_frame_score == 0) {
1242                         s->input_picture[i]->b_frame_score =
1243                             get_intra_count(s,
1244                                             s->input_picture[i    ]->f->data[0],
1245                                             s->input_picture[i - 1]->f->data[0],
1246                                             s->linesize) + 1;
1247                     }
1248                 }
1249                 for (i = 0; i < s->max_b_frames + 1; i++) {
1250                     if (s->input_picture[i] == NULL ||
1251                         s->input_picture[i]->b_frame_score - 1 >
1252                             s->mb_num / s->avctx->b_sensitivity)
1253                         break;
1254                 }
1255
1256                 b_frames = FFMAX(0, i - 1);
1257
1258                 /* reset scores */
1259                 for (i = 0; i < b_frames + 1; i++) {
1260                     s->input_picture[i]->b_frame_score = 0;
1261                 }
1262             } else if (s->avctx->b_frame_strategy == 2) {
1263                 b_frames = estimate_best_b_count(s);
1264             } else {
1265                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1266                 b_frames = 0;
1267             }
1268
1269             emms_c();
1270
1271             for (i = b_frames - 1; i >= 0; i--) {
1272                 int type = s->input_picture[i]->f->pict_type;
1273                 if (type && type != AV_PICTURE_TYPE_B)
1274                     b_frames = i;
1275             }
1276             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1277                 b_frames == s->max_b_frames) {
1278                 av_log(s->avctx, AV_LOG_ERROR,
1279                        "warning, too many b frames in a row\n");
1280             }
1281
1282             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1283                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1284                     s->gop_size > s->picture_in_gop_number) {
1285                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1286                 } else {
1287                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1288                         b_frames = 0;
1289                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1290                 }
1291             }
1292
1293             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1294                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1295                 b_frames--;
1296
1297             s->reordered_input_picture[0] = s->input_picture[b_frames];
1298             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1299                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1300             s->reordered_input_picture[0]->f->coded_picture_number =
1301                 s->coded_picture_number++;
1302             for (i = 0; i < b_frames; i++) {
1303                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1304                 s->reordered_input_picture[i + 1]->f->pict_type =
1305                     AV_PICTURE_TYPE_B;
1306                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1307                     s->coded_picture_number++;
1308             }
1309         }
1310     }
1311 no_output_pic:
1312     if (s->reordered_input_picture[0]) {
1313         s->reordered_input_picture[0]->reference =
1314            s->reordered_input_picture[0]->f->pict_type !=
1315                AV_PICTURE_TYPE_B ? 3 : 0;
1316
1317         ff_mpeg_unref_picture(s, &s->new_picture);
1318         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1319             return ret;
1320
1321         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1322             // input is a shared pix, so we can't modifiy it -> alloc a new
1323             // one & ensure that the shared one is reuseable
1324
1325             Picture *pic;
1326             int i = ff_find_unused_picture(s, 0);
1327             if (i < 0)
1328                 return i;
1329             pic = &s->picture[i];
1330
1331             pic->reference = s->reordered_input_picture[0]->reference;
1332             if (ff_alloc_picture(s, pic, 0) < 0) {
1333                 return -1;
1334             }
1335
1336             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1337             if (ret < 0)
1338                 return ret;
1339
1340             /* mark us unused / free shared pic */
1341             av_frame_unref(s->reordered_input_picture[0]->f);
1342             s->reordered_input_picture[0]->shared = 0;
1343
1344             s->current_picture_ptr = pic;
1345         } else {
1346             // input is not a shared pix -> reuse buffer for current_pix
1347             s->current_picture_ptr = s->reordered_input_picture[0];
1348             for (i = 0; i < 4; i++) {
1349                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1350             }
1351         }
1352         ff_mpeg_unref_picture(s, &s->current_picture);
1353         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1354                                        s->current_picture_ptr)) < 0)
1355             return ret;
1356
1357         s->picture_number = s->new_picture.f->display_picture_number;
1358     } else {
1359         ff_mpeg_unref_picture(s, &s->new_picture);
1360     }
1361     return 0;
1362 }
1363
1364 static void frame_end(MpegEncContext *s)
1365 {
1366     int i;
1367
1368     if (s->unrestricted_mv &&
1369         s->current_picture.reference &&
1370         !s->intra_only) {
1371         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1372         int hshift = desc->log2_chroma_w;
1373         int vshift = desc->log2_chroma_h;
1374         s->dsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1375                           s->h_edge_pos, s->v_edge_pos,
1376                           EDGE_WIDTH, EDGE_WIDTH,
1377                           EDGE_TOP | EDGE_BOTTOM);
1378         s->dsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1379                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1380                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1381                           EDGE_TOP | EDGE_BOTTOM);
1382         s->dsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1383                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1384                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1385                           EDGE_TOP | EDGE_BOTTOM);
1386     }
1387
1388     emms_c();
1389
1390     s->last_pict_type                 = s->pict_type;
1391     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1392     if (s->pict_type!= AV_PICTURE_TYPE_B)
1393         s->last_non_b_pict_type = s->pict_type;
1394
1395     if (s->encoding) {
1396         /* release non-reference frames */
1397         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1398             if (!s->picture[i].reference)
1399                 ff_mpeg_unref_picture(s, &s->picture[i]);
1400         }
1401     }
1402
1403     s->avctx->coded_frame = s->current_picture_ptr->f;
1404
1405 }
1406
1407 static void update_noise_reduction(MpegEncContext *s)
1408 {
1409     int intra, i;
1410
1411     for (intra = 0; intra < 2; intra++) {
1412         if (s->dct_count[intra] > (1 << 16)) {
1413             for (i = 0; i < 64; i++) {
1414                 s->dct_error_sum[intra][i] >>= 1;
1415             }
1416             s->dct_count[intra] >>= 1;
1417         }
1418
1419         for (i = 0; i < 64; i++) {
1420             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1421                                        s->dct_count[intra] +
1422                                        s->dct_error_sum[intra][i] / 2) /
1423                                       (s->dct_error_sum[intra][i] + 1);
1424         }
1425     }
1426 }
1427
1428 static int frame_start(MpegEncContext *s)
1429 {
1430     int ret;
1431
1432     /* mark & release old frames */
1433     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1434         s->last_picture_ptr != s->next_picture_ptr &&
1435         s->last_picture_ptr->f->buf[0]) {
1436         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1437     }
1438
1439     s->current_picture_ptr->f->pict_type = s->pict_type;
1440     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1441
1442     ff_mpeg_unref_picture(s, &s->current_picture);
1443     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1444                                    s->current_picture_ptr)) < 0)
1445         return ret;
1446
1447     if (s->pict_type != AV_PICTURE_TYPE_B) {
1448         s->last_picture_ptr = s->next_picture_ptr;
1449         if (!s->droppable)
1450             s->next_picture_ptr = s->current_picture_ptr;
1451     }
1452
1453     if (s->last_picture_ptr) {
1454         ff_mpeg_unref_picture(s, &s->last_picture);
1455         if (s->last_picture_ptr->f->buf[0] &&
1456             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1457                                        s->last_picture_ptr)) < 0)
1458             return ret;
1459     }
1460     if (s->next_picture_ptr) {
1461         ff_mpeg_unref_picture(s, &s->next_picture);
1462         if (s->next_picture_ptr->f->buf[0] &&
1463             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1464                                        s->next_picture_ptr)) < 0)
1465             return ret;
1466     }
1467
1468     if (s->picture_structure!= PICT_FRAME) {
1469         int i;
1470         for (i = 0; i < 4; i++) {
1471             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1472                 s->current_picture.f->data[i] +=
1473                     s->current_picture.f->linesize[i];
1474             }
1475             s->current_picture.f->linesize[i] *= 2;
1476             s->last_picture.f->linesize[i]    *= 2;
1477             s->next_picture.f->linesize[i]    *= 2;
1478         }
1479     }
1480
1481     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1482         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1483         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1484     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1485         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1486         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1487     } else {
1488         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1489         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1490     }
1491
1492     if (s->dct_error_sum) {
1493         assert(s->avctx->noise_reduction && s->encoding);
1494         update_noise_reduction(s);
1495     }
1496
1497     return 0;
1498 }
1499
1500 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1501                           const AVFrame *pic_arg, int *got_packet)
1502 {
1503     MpegEncContext *s = avctx->priv_data;
1504     int i, stuffing_count, ret;
1505     int context_count = s->slice_context_count;
1506
1507     s->picture_in_gop_number++;
1508
1509     if (load_input_picture(s, pic_arg) < 0)
1510         return -1;
1511
1512     if (select_input_picture(s) < 0) {
1513         return -1;
1514     }
1515
1516     /* output? */
1517     if (s->new_picture.f->data[0]) {
1518         if (!pkt->data &&
1519             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1520             return ret;
1521         if (s->mb_info) {
1522             s->mb_info_ptr = av_packet_new_side_data(pkt,
1523                                  AV_PKT_DATA_H263_MB_INFO,
1524                                  s->mb_width*s->mb_height*12);
1525             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1526         }
1527
1528         for (i = 0; i < context_count; i++) {
1529             int start_y = s->thread_context[i]->start_mb_y;
1530             int   end_y = s->thread_context[i]->  end_mb_y;
1531             int h       = s->mb_height;
1532             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1533             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1534
1535             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1536         }
1537
1538         s->pict_type = s->new_picture.f->pict_type;
1539         //emms_c();
1540         ret = frame_start(s);
1541         if (ret < 0)
1542             return ret;
1543 vbv_retry:
1544         if (encode_picture(s, s->picture_number) < 0)
1545             return -1;
1546
1547         avctx->header_bits = s->header_bits;
1548         avctx->mv_bits     = s->mv_bits;
1549         avctx->misc_bits   = s->misc_bits;
1550         avctx->i_tex_bits  = s->i_tex_bits;
1551         avctx->p_tex_bits  = s->p_tex_bits;
1552         avctx->i_count     = s->i_count;
1553         // FIXME f/b_count in avctx
1554         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1555         avctx->skip_count  = s->skip_count;
1556
1557         frame_end(s);
1558
1559         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1560             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1561
1562         if (avctx->rc_buffer_size) {
1563             RateControlContext *rcc = &s->rc_context;
1564             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1565
1566             if (put_bits_count(&s->pb) > max_size &&
1567                 s->lambda < s->avctx->lmax) {
1568                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1569                                        (s->qscale + 1) / s->qscale);
1570                 if (s->adaptive_quant) {
1571                     int i;
1572                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1573                         s->lambda_table[i] =
1574                             FFMAX(s->lambda_table[i] + 1,
1575                                   s->lambda_table[i] * (s->qscale + 1) /
1576                                   s->qscale);
1577                 }
1578                 s->mb_skipped = 0;        // done in frame_start()
1579                 // done in encode_picture() so we must undo it
1580                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1581                     if (s->flipflop_rounding          ||
1582                         s->codec_id == AV_CODEC_ID_H263P ||
1583                         s->codec_id == AV_CODEC_ID_MPEG4)
1584                         s->no_rounding ^= 1;
1585                 }
1586                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1587                     s->time_base       = s->last_time_base;
1588                     s->last_non_b_time = s->time - s->pp_time;
1589                 }
1590                 for (i = 0; i < context_count; i++) {
1591                     PutBitContext *pb = &s->thread_context[i]->pb;
1592                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1593                 }
1594                 goto vbv_retry;
1595             }
1596
1597             assert(s->avctx->rc_max_rate);
1598         }
1599
1600         if (s->flags & CODEC_FLAG_PASS1)
1601             ff_write_pass1_stats(s);
1602
1603         for (i = 0; i < 4; i++) {
1604             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1605             avctx->error[i] += s->current_picture_ptr->f->error[i];
1606         }
1607
1608         if (s->flags & CODEC_FLAG_PASS1)
1609             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1610                    avctx->i_tex_bits + avctx->p_tex_bits ==
1611                        put_bits_count(&s->pb));
1612         flush_put_bits(&s->pb);
1613         s->frame_bits  = put_bits_count(&s->pb);
1614
1615         stuffing_count = ff_vbv_update(s, s->frame_bits);
1616         if (stuffing_count) {
1617             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1618                     stuffing_count + 50) {
1619                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1620                 return -1;
1621             }
1622
1623             switch (s->codec_id) {
1624             case AV_CODEC_ID_MPEG1VIDEO:
1625             case AV_CODEC_ID_MPEG2VIDEO:
1626                 while (stuffing_count--) {
1627                     put_bits(&s->pb, 8, 0);
1628                 }
1629             break;
1630             case AV_CODEC_ID_MPEG4:
1631                 put_bits(&s->pb, 16, 0);
1632                 put_bits(&s->pb, 16, 0x1C3);
1633                 stuffing_count -= 4;
1634                 while (stuffing_count--) {
1635                     put_bits(&s->pb, 8, 0xFF);
1636                 }
1637             break;
1638             default:
1639                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1640             }
1641             flush_put_bits(&s->pb);
1642             s->frame_bits  = put_bits_count(&s->pb);
1643         }
1644
1645         /* update mpeg1/2 vbv_delay for CBR */
1646         if (s->avctx->rc_max_rate                          &&
1647             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1648             s->out_format == FMT_MPEG1                     &&
1649             90000LL * (avctx->rc_buffer_size - 1) <=
1650                 s->avctx->rc_max_rate * 0xFFFFLL) {
1651             int vbv_delay, min_delay;
1652             double inbits  = s->avctx->rc_max_rate *
1653                              av_q2d(s->avctx->time_base);
1654             int    minbits = s->frame_bits - 8 *
1655                              (s->vbv_delay_ptr - s->pb.buf - 1);
1656             double bits    = s->rc_context.buffer_index + minbits - inbits;
1657
1658             if (bits < 0)
1659                 av_log(s->avctx, AV_LOG_ERROR,
1660                        "Internal error, negative bits\n");
1661
1662             assert(s->repeat_first_field == 0);
1663
1664             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1665             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1666                         s->avctx->rc_max_rate;
1667
1668             vbv_delay = FFMAX(vbv_delay, min_delay);
1669
1670             assert(vbv_delay < 0xFFFF);
1671
1672             s->vbv_delay_ptr[0] &= 0xF8;
1673             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1674             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1675             s->vbv_delay_ptr[2] &= 0x07;
1676             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1677             avctx->vbv_delay     = vbv_delay * 300;
1678         }
1679         s->total_bits     += s->frame_bits;
1680         avctx->frame_bits  = s->frame_bits;
1681
1682         pkt->pts = s->current_picture.f->pts;
1683         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1684             if (!s->current_picture.f->coded_picture_number)
1685                 pkt->dts = pkt->pts - s->dts_delta;
1686             else
1687                 pkt->dts = s->reordered_pts;
1688             s->reordered_pts = pkt->pts;
1689         } else
1690             pkt->dts = pkt->pts;
1691         if (s->current_picture.f->key_frame)
1692             pkt->flags |= AV_PKT_FLAG_KEY;
1693         if (s->mb_info)
1694             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1695     } else {
1696         s->frame_bits = 0;
1697     }
1698     assert((s->frame_bits & 7) == 0);
1699
1700     pkt->size = s->frame_bits / 8;
1701     *got_packet = !!pkt->size;
1702     return 0;
1703 }
1704
1705 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1706                                                 int n, int threshold)
1707 {
1708     static const char tab[64] = {
1709         3, 2, 2, 1, 1, 1, 1, 1,
1710         1, 1, 1, 1, 1, 1, 1, 1,
1711         1, 1, 1, 1, 1, 1, 1, 1,
1712         0, 0, 0, 0, 0, 0, 0, 0,
1713         0, 0, 0, 0, 0, 0, 0, 0,
1714         0, 0, 0, 0, 0, 0, 0, 0,
1715         0, 0, 0, 0, 0, 0, 0, 0,
1716         0, 0, 0, 0, 0, 0, 0, 0
1717     };
1718     int score = 0;
1719     int run = 0;
1720     int i;
1721     int16_t *block = s->block[n];
1722     const int last_index = s->block_last_index[n];
1723     int skip_dc;
1724
1725     if (threshold < 0) {
1726         skip_dc = 0;
1727         threshold = -threshold;
1728     } else
1729         skip_dc = 1;
1730
1731     /* Are all we could set to zero already zero? */
1732     if (last_index <= skip_dc - 1)
1733         return;
1734
1735     for (i = 0; i <= last_index; i++) {
1736         const int j = s->intra_scantable.permutated[i];
1737         const int level = FFABS(block[j]);
1738         if (level == 1) {
1739             if (skip_dc && i == 0)
1740                 continue;
1741             score += tab[run];
1742             run = 0;
1743         } else if (level > 1) {
1744             return;
1745         } else {
1746             run++;
1747         }
1748     }
1749     if (score >= threshold)
1750         return;
1751     for (i = skip_dc; i <= last_index; i++) {
1752         const int j = s->intra_scantable.permutated[i];
1753         block[j] = 0;
1754     }
1755     if (block[0])
1756         s->block_last_index[n] = 0;
1757     else
1758         s->block_last_index[n] = -1;
1759 }
1760
1761 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1762                                int last_index)
1763 {
1764     int i;
1765     const int maxlevel = s->max_qcoeff;
1766     const int minlevel = s->min_qcoeff;
1767     int overflow = 0;
1768
1769     if (s->mb_intra) {
1770         i = 1; // skip clipping of intra dc
1771     } else
1772         i = 0;
1773
1774     for (; i <= last_index; i++) {
1775         const int j = s->intra_scantable.permutated[i];
1776         int level = block[j];
1777
1778         if (level > maxlevel) {
1779             level = maxlevel;
1780             overflow++;
1781         } else if (level < minlevel) {
1782             level = minlevel;
1783             overflow++;
1784         }
1785
1786         block[j] = level;
1787     }
1788
1789     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1790         av_log(s->avctx, AV_LOG_INFO,
1791                "warning, clipping %d dct coefficients to %d..%d\n",
1792                overflow, minlevel, maxlevel);
1793 }
1794
1795 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1796 {
1797     int x, y;
1798     // FIXME optimize
1799     for (y = 0; y < 8; y++) {
1800         for (x = 0; x < 8; x++) {
1801             int x2, y2;
1802             int sum = 0;
1803             int sqr = 0;
1804             int count = 0;
1805
1806             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1807                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1808                     int v = ptr[x2 + y2 * stride];
1809                     sum += v;
1810                     sqr += v * v;
1811                     count++;
1812                 }
1813             }
1814             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1815         }
1816     }
1817 }
1818
1819 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1820                                                 int motion_x, int motion_y,
1821                                                 int mb_block_height,
1822                                                 int mb_block_count)
1823 {
1824     int16_t weight[8][64];
1825     int16_t orig[8][64];
1826     const int mb_x = s->mb_x;
1827     const int mb_y = s->mb_y;
1828     int i;
1829     int skip_dct[8];
1830     int dct_offset = s->linesize * 8; // default for progressive frames
1831     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1832     ptrdiff_t wrap_y, wrap_c;
1833
1834     for (i = 0; i < mb_block_count; i++)
1835         skip_dct[i] = s->skipdct;
1836
1837     if (s->adaptive_quant) {
1838         const int last_qp = s->qscale;
1839         const int mb_xy = mb_x + mb_y * s->mb_stride;
1840
1841         s->lambda = s->lambda_table[mb_xy];
1842         update_qscale(s);
1843
1844         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1845             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1846             s->dquant = s->qscale - last_qp;
1847
1848             if (s->out_format == FMT_H263) {
1849                 s->dquant = av_clip(s->dquant, -2, 2);
1850
1851                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1852                     if (!s->mb_intra) {
1853                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1854                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1855                                 s->dquant = 0;
1856                         }
1857                         if (s->mv_type == MV_TYPE_8X8)
1858                             s->dquant = 0;
1859                     }
1860                 }
1861             }
1862         }
1863         ff_set_qscale(s, last_qp + s->dquant);
1864     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1865         ff_set_qscale(s, s->qscale + s->dquant);
1866
1867     wrap_y = s->linesize;
1868     wrap_c = s->uvlinesize;
1869     ptr_y  = s->new_picture.f->data[0] +
1870              (mb_y * 16 * wrap_y)              + mb_x * 16;
1871     ptr_cb = s->new_picture.f->data[1] +
1872              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1873     ptr_cr = s->new_picture.f->data[2] +
1874              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1875
1876     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1877         uint8_t *ebuf = s->edge_emu_buffer + 32;
1878         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1879                                  wrap_y, wrap_y,
1880                                  16, 16, mb_x * 16, mb_y * 16,
1881                                  s->width, s->height);
1882         ptr_y = ebuf;
1883         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1884                                  wrap_c, wrap_c,
1885                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1886                                  s->width >> 1, s->height >> 1);
1887         ptr_cb = ebuf + 18 * wrap_y;
1888         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1889                                  wrap_c, wrap_c,
1890                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1891                                  s->width >> 1, s->height >> 1);
1892         ptr_cr = ebuf + 18 * wrap_y + 8;
1893     }
1894
1895     if (s->mb_intra) {
1896         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1897             int progressive_score, interlaced_score;
1898
1899             s->interlaced_dct = 0;
1900             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1901                                                     NULL, wrap_y, 8) +
1902                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1903                                                     NULL, wrap_y, 8) - 400;
1904
1905             if (progressive_score > 0) {
1906                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1907                                                        NULL, wrap_y * 2, 8) +
1908                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1909                                                        NULL, wrap_y * 2, 8);
1910                 if (progressive_score > interlaced_score) {
1911                     s->interlaced_dct = 1;
1912
1913                     dct_offset = wrap_y;
1914                     wrap_y <<= 1;
1915                     if (s->chroma_format == CHROMA_422)
1916                         wrap_c <<= 1;
1917                 }
1918             }
1919         }
1920
1921         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1922         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1923         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1924         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1925
1926         if (s->flags & CODEC_FLAG_GRAY) {
1927             skip_dct[4] = 1;
1928             skip_dct[5] = 1;
1929         } else {
1930             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1931             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1932             if (!s->chroma_y_shift) { /* 422 */
1933                 s->dsp.get_pixels(s->block[6],
1934                                   ptr_cb + (dct_offset >> 1), wrap_c);
1935                 s->dsp.get_pixels(s->block[7],
1936                                   ptr_cr + (dct_offset >> 1), wrap_c);
1937             }
1938         }
1939     } else {
1940         op_pixels_func (*op_pix)[4];
1941         qpel_mc_func (*op_qpix)[16];
1942         uint8_t *dest_y, *dest_cb, *dest_cr;
1943
1944         dest_y  = s->dest[0];
1945         dest_cb = s->dest[1];
1946         dest_cr = s->dest[2];
1947
1948         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1949             op_pix  = s->hdsp.put_pixels_tab;
1950             op_qpix = s->qdsp.put_qpel_pixels_tab;
1951         } else {
1952             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1953             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
1954         }
1955
1956         if (s->mv_dir & MV_DIR_FORWARD) {
1957             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1958                           s->last_picture.f->data,
1959                           op_pix, op_qpix);
1960             op_pix  = s->hdsp.avg_pixels_tab;
1961             op_qpix = s->qdsp.avg_qpel_pixels_tab;
1962         }
1963         if (s->mv_dir & MV_DIR_BACKWARD) {
1964             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1965                           s->next_picture.f->data,
1966                           op_pix, op_qpix);
1967         }
1968
1969         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1970             int progressive_score, interlaced_score;
1971
1972             s->interlaced_dct = 0;
1973             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1974                                                     ptr_y,              wrap_y,
1975                                                     8) +
1976                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1977                                                     ptr_y + wrap_y * 8, wrap_y,
1978                                                     8) - 400;
1979
1980             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1981                 progressive_score -= 400;
1982
1983             if (progressive_score > 0) {
1984                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1985                                                        ptr_y,
1986                                                        wrap_y * 2, 8) +
1987                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1988                                                        ptr_y + wrap_y,
1989                                                        wrap_y * 2, 8);
1990
1991                 if (progressive_score > interlaced_score) {
1992                     s->interlaced_dct = 1;
1993
1994                     dct_offset = wrap_y;
1995                     wrap_y <<= 1;
1996                     if (s->chroma_format == CHROMA_422)
1997                         wrap_c <<= 1;
1998                 }
1999             }
2000         }
2001
2002         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2003         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2004         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2005                            dest_y + dct_offset, wrap_y);
2006         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2007                            dest_y + dct_offset + 8, wrap_y);
2008
2009         if (s->flags & CODEC_FLAG_GRAY) {
2010             skip_dct[4] = 1;
2011             skip_dct[5] = 1;
2012         } else {
2013             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2014             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2015             if (!s->chroma_y_shift) { /* 422 */
2016                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2017                                    dest_cb + (dct_offset >> 1), wrap_c);
2018                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2019                                    dest_cr + (dct_offset >> 1), wrap_c);
2020             }
2021         }
2022         /* pre quantization */
2023         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2024                 2 * s->qscale * s->qscale) {
2025             // FIXME optimize
2026             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2027                               wrap_y, 8) < 20 * s->qscale)
2028                 skip_dct[0] = 1;
2029             if (s->dsp.sad[1](NULL, ptr_y + 8,
2030                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2031                 skip_dct[1] = 1;
2032             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2033                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2034                 skip_dct[2] = 1;
2035             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2036                               dest_y + dct_offset + 8,
2037                               wrap_y, 8) < 20 * s->qscale)
2038                 skip_dct[3] = 1;
2039             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2040                               wrap_c, 8) < 20 * s->qscale)
2041                 skip_dct[4] = 1;
2042             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2043                               wrap_c, 8) < 20 * s->qscale)
2044                 skip_dct[5] = 1;
2045             if (!s->chroma_y_shift) { /* 422 */
2046                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2047                                   dest_cb + (dct_offset >> 1),
2048                                   wrap_c, 8) < 20 * s->qscale)
2049                     skip_dct[6] = 1;
2050                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2051                                   dest_cr + (dct_offset >> 1),
2052                                   wrap_c, 8) < 20 * s->qscale)
2053                     skip_dct[7] = 1;
2054             }
2055         }
2056     }
2057
2058     if (s->quantizer_noise_shaping) {
2059         if (!skip_dct[0])
2060             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2061         if (!skip_dct[1])
2062             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2063         if (!skip_dct[2])
2064             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2065         if (!skip_dct[3])
2066             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2067         if (!skip_dct[4])
2068             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2069         if (!skip_dct[5])
2070             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2071         if (!s->chroma_y_shift) { /* 422 */
2072             if (!skip_dct[6])
2073                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2074                                   wrap_c);
2075             if (!skip_dct[7])
2076                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2077                                   wrap_c);
2078         }
2079         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2080     }
2081
2082     /* DCT & quantize */
2083     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2084     {
2085         for (i = 0; i < mb_block_count; i++) {
2086             if (!skip_dct[i]) {
2087                 int overflow;
2088                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2089                 // FIXME we could decide to change to quantizer instead of
2090                 // clipping
2091                 // JS: I don't think that would be a good idea it could lower
2092                 //     quality instead of improve it. Just INTRADC clipping
2093                 //     deserves changes in quantizer
2094                 if (overflow)
2095                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2096             } else
2097                 s->block_last_index[i] = -1;
2098         }
2099         if (s->quantizer_noise_shaping) {
2100             for (i = 0; i < mb_block_count; i++) {
2101                 if (!skip_dct[i]) {
2102                     s->block_last_index[i] =
2103                         dct_quantize_refine(s, s->block[i], weight[i],
2104                                             orig[i], i, s->qscale);
2105                 }
2106             }
2107         }
2108
2109         if (s->luma_elim_threshold && !s->mb_intra)
2110             for (i = 0; i < 4; i++)
2111                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2112         if (s->chroma_elim_threshold && !s->mb_intra)
2113             for (i = 4; i < mb_block_count; i++)
2114                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2115
2116         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2117             for (i = 0; i < mb_block_count; i++) {
2118                 if (s->block_last_index[i] == -1)
2119                     s->coded_score[i] = INT_MAX / 256;
2120             }
2121         }
2122     }
2123
2124     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2125         s->block_last_index[4] =
2126         s->block_last_index[5] = 0;
2127         s->block[4][0] =
2128         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2129     }
2130
2131     // non c quantize code returns incorrect block_last_index FIXME
2132     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2133         for (i = 0; i < mb_block_count; i++) {
2134             int j;
2135             if (s->block_last_index[i] > 0) {
2136                 for (j = 63; j > 0; j--) {
2137                     if (s->block[i][s->intra_scantable.permutated[j]])
2138                         break;
2139                 }
2140                 s->block_last_index[i] = j;
2141             }
2142         }
2143     }
2144
2145     /* huffman encode */
2146     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2147     case AV_CODEC_ID_MPEG1VIDEO:
2148     case AV_CODEC_ID_MPEG2VIDEO:
2149         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2150             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2151         break;
2152     case AV_CODEC_ID_MPEG4:
2153         if (CONFIG_MPEG4_ENCODER)
2154             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2155         break;
2156     case AV_CODEC_ID_MSMPEG4V2:
2157     case AV_CODEC_ID_MSMPEG4V3:
2158     case AV_CODEC_ID_WMV1:
2159         if (CONFIG_MSMPEG4_ENCODER)
2160             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2161         break;
2162     case AV_CODEC_ID_WMV2:
2163         if (CONFIG_WMV2_ENCODER)
2164             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2165         break;
2166     case AV_CODEC_ID_H261:
2167         if (CONFIG_H261_ENCODER)
2168             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2169         break;
2170     case AV_CODEC_ID_H263:
2171     case AV_CODEC_ID_H263P:
2172     case AV_CODEC_ID_FLV1:
2173     case AV_CODEC_ID_RV10:
2174     case AV_CODEC_ID_RV20:
2175         if (CONFIG_H263_ENCODER)
2176             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2177         break;
2178     case AV_CODEC_ID_MJPEG:
2179         if (CONFIG_MJPEG_ENCODER)
2180             ff_mjpeg_encode_mb(s, s->block);
2181         break;
2182     default:
2183         assert(0);
2184     }
2185 }
2186
2187 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2188 {
2189     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2190     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2191 }
2192
2193 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2194     int i;
2195
2196     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2197
2198     /* mpeg1 */
2199     d->mb_skip_run= s->mb_skip_run;
2200     for(i=0; i<3; i++)
2201         d->last_dc[i] = s->last_dc[i];
2202
2203     /* statistics */
2204     d->mv_bits= s->mv_bits;
2205     d->i_tex_bits= s->i_tex_bits;
2206     d->p_tex_bits= s->p_tex_bits;
2207     d->i_count= s->i_count;
2208     d->f_count= s->f_count;
2209     d->b_count= s->b_count;
2210     d->skip_count= s->skip_count;
2211     d->misc_bits= s->misc_bits;
2212     d->last_bits= 0;
2213
2214     d->mb_skipped= 0;
2215     d->qscale= s->qscale;
2216     d->dquant= s->dquant;
2217
2218     d->esc3_level_length= s->esc3_level_length;
2219 }
2220
2221 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2222     int i;
2223
2224     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2225     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2226
2227     /* mpeg1 */
2228     d->mb_skip_run= s->mb_skip_run;
2229     for(i=0; i<3; i++)
2230         d->last_dc[i] = s->last_dc[i];
2231
2232     /* statistics */
2233     d->mv_bits= s->mv_bits;
2234     d->i_tex_bits= s->i_tex_bits;
2235     d->p_tex_bits= s->p_tex_bits;
2236     d->i_count= s->i_count;
2237     d->f_count= s->f_count;
2238     d->b_count= s->b_count;
2239     d->skip_count= s->skip_count;
2240     d->misc_bits= s->misc_bits;
2241
2242     d->mb_intra= s->mb_intra;
2243     d->mb_skipped= s->mb_skipped;
2244     d->mv_type= s->mv_type;
2245     d->mv_dir= s->mv_dir;
2246     d->pb= s->pb;
2247     if(s->data_partitioning){
2248         d->pb2= s->pb2;
2249         d->tex_pb= s->tex_pb;
2250     }
2251     d->block= s->block;
2252     for(i=0; i<8; i++)
2253         d->block_last_index[i]= s->block_last_index[i];
2254     d->interlaced_dct= s->interlaced_dct;
2255     d->qscale= s->qscale;
2256
2257     d->esc3_level_length= s->esc3_level_length;
2258 }
2259
2260 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2261                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2262                            int *dmin, int *next_block, int motion_x, int motion_y)
2263 {
2264     int score;
2265     uint8_t *dest_backup[3];
2266
2267     copy_context_before_encode(s, backup, type);
2268
2269     s->block= s->blocks[*next_block];
2270     s->pb= pb[*next_block];
2271     if(s->data_partitioning){
2272         s->pb2   = pb2   [*next_block];
2273         s->tex_pb= tex_pb[*next_block];
2274     }
2275
2276     if(*next_block){
2277         memcpy(dest_backup, s->dest, sizeof(s->dest));
2278         s->dest[0] = s->rd_scratchpad;
2279         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2280         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2281         assert(s->linesize >= 32); //FIXME
2282     }
2283
2284     encode_mb(s, motion_x, motion_y);
2285
2286     score= put_bits_count(&s->pb);
2287     if(s->data_partitioning){
2288         score+= put_bits_count(&s->pb2);
2289         score+= put_bits_count(&s->tex_pb);
2290     }
2291
2292     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2293         ff_MPV_decode_mb(s, s->block);
2294
2295         score *= s->lambda2;
2296         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2297     }
2298
2299     if(*next_block){
2300         memcpy(s->dest, dest_backup, sizeof(s->dest));
2301     }
2302
2303     if(score<*dmin){
2304         *dmin= score;
2305         *next_block^=1;
2306
2307         copy_context_after_encode(best, s, type);
2308     }
2309 }
2310
2311 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2312     uint32_t *sq = ff_square_tab + 256;
2313     int acc=0;
2314     int x,y;
2315
2316     if(w==16 && h==16)
2317         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2318     else if(w==8 && h==8)
2319         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2320
2321     for(y=0; y<h; y++){
2322         for(x=0; x<w; x++){
2323             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2324         }
2325     }
2326
2327     assert(acc>=0);
2328
2329     return acc;
2330 }
2331
2332 static int sse_mb(MpegEncContext *s){
2333     int w= 16;
2334     int h= 16;
2335
2336     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2337     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2338
2339     if(w==16 && h==16)
2340       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2341         return  s->dsp.nsse[0](s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2342                +s->dsp.nsse[1](s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2343                +s->dsp.nsse[1](s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2344       }else{
2345         return  s->dsp.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2346                +s->dsp.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2347                +s->dsp.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2348       }
2349     else
2350         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2351                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2352                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2353 }
2354
2355 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2356     MpegEncContext *s= *(void**)arg;
2357
2358
2359     s->me.pre_pass=1;
2360     s->me.dia_size= s->avctx->pre_dia_size;
2361     s->first_slice_line=1;
2362     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2363         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2364             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2365         }
2366         s->first_slice_line=0;
2367     }
2368
2369     s->me.pre_pass=0;
2370
2371     return 0;
2372 }
2373
2374 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2375     MpegEncContext *s= *(void**)arg;
2376
2377     s->me.dia_size= s->avctx->dia_size;
2378     s->first_slice_line=1;
2379     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2380         s->mb_x=0; //for block init below
2381         ff_init_block_index(s);
2382         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2383             s->block_index[0]+=2;
2384             s->block_index[1]+=2;
2385             s->block_index[2]+=2;
2386             s->block_index[3]+=2;
2387
2388             /* compute motion vector & mb_type and store in context */
2389             if(s->pict_type==AV_PICTURE_TYPE_B)
2390                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2391             else
2392                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2393         }
2394         s->first_slice_line=0;
2395     }
2396     return 0;
2397 }
2398
2399 static int mb_var_thread(AVCodecContext *c, void *arg){
2400     MpegEncContext *s= *(void**)arg;
2401     int mb_x, mb_y;
2402
2403     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2404         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2405             int xx = mb_x * 16;
2406             int yy = mb_y * 16;
2407             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2408             int varc;
2409             int sum = s->dsp.pix_sum(pix, s->linesize);
2410
2411             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2412
2413             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2414             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2415             s->me.mb_var_sum_temp    += varc;
2416         }
2417     }
2418     return 0;
2419 }
2420
2421 static void write_slice_end(MpegEncContext *s){
2422     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2423         if(s->partitioned_frame){
2424             ff_mpeg4_merge_partitions(s);
2425         }
2426
2427         ff_mpeg4_stuffing(&s->pb);
2428     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2429         ff_mjpeg_encode_stuffing(&s->pb);
2430     }
2431
2432     avpriv_align_put_bits(&s->pb);
2433     flush_put_bits(&s->pb);
2434
2435     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2436         s->misc_bits+= get_bits_diff(s);
2437 }
2438
2439 static void write_mb_info(MpegEncContext *s)
2440 {
2441     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2442     int offset = put_bits_count(&s->pb);
2443     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2444     int gobn = s->mb_y / s->gob_index;
2445     int pred_x, pred_y;
2446     if (CONFIG_H263_ENCODER)
2447         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2448     bytestream_put_le32(&ptr, offset);
2449     bytestream_put_byte(&ptr, s->qscale);
2450     bytestream_put_byte(&ptr, gobn);
2451     bytestream_put_le16(&ptr, mba);
2452     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2453     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2454     /* 4MV not implemented */
2455     bytestream_put_byte(&ptr, 0); /* hmv2 */
2456     bytestream_put_byte(&ptr, 0); /* vmv2 */
2457 }
2458
2459 static void update_mb_info(MpegEncContext *s, int startcode)
2460 {
2461     if (!s->mb_info)
2462         return;
2463     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2464         s->mb_info_size += 12;
2465         s->prev_mb_info = s->last_mb_info;
2466     }
2467     if (startcode) {
2468         s->prev_mb_info = put_bits_count(&s->pb)/8;
2469         /* This might have incremented mb_info_size above, and we return without
2470          * actually writing any info into that slot yet. But in that case,
2471          * this will be called again at the start of the after writing the
2472          * start code, actually writing the mb info. */
2473         return;
2474     }
2475
2476     s->last_mb_info = put_bits_count(&s->pb)/8;
2477     if (!s->mb_info_size)
2478         s->mb_info_size += 12;
2479     write_mb_info(s);
2480 }
2481
2482 static int encode_thread(AVCodecContext *c, void *arg){
2483     MpegEncContext *s= *(void**)arg;
2484     int mb_x, mb_y, pdif = 0;
2485     int chr_h= 16>>s->chroma_y_shift;
2486     int i, j;
2487     MpegEncContext best_s, backup_s;
2488     uint8_t bit_buf[2][MAX_MB_BYTES];
2489     uint8_t bit_buf2[2][MAX_MB_BYTES];
2490     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2491     PutBitContext pb[2], pb2[2], tex_pb[2];
2492
2493     for(i=0; i<2; i++){
2494         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2495         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2496         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2497     }
2498
2499     s->last_bits= put_bits_count(&s->pb);
2500     s->mv_bits=0;
2501     s->misc_bits=0;
2502     s->i_tex_bits=0;
2503     s->p_tex_bits=0;
2504     s->i_count=0;
2505     s->f_count=0;
2506     s->b_count=0;
2507     s->skip_count=0;
2508
2509     for(i=0; i<3; i++){
2510         /* init last dc values */
2511         /* note: quant matrix value (8) is implied here */
2512         s->last_dc[i] = 128 << s->intra_dc_precision;
2513
2514         s->current_picture.f->error[i] = 0;
2515     }
2516     s->mb_skip_run = 0;
2517     memset(s->last_mv, 0, sizeof(s->last_mv));
2518
2519     s->last_mv_dir = 0;
2520
2521     switch(s->codec_id){
2522     case AV_CODEC_ID_H263:
2523     case AV_CODEC_ID_H263P:
2524     case AV_CODEC_ID_FLV1:
2525         if (CONFIG_H263_ENCODER)
2526             s->gob_index = ff_h263_get_gob_height(s);
2527         break;
2528     case AV_CODEC_ID_MPEG4:
2529         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2530             ff_mpeg4_init_partitions(s);
2531         break;
2532     }
2533
2534     s->resync_mb_x=0;
2535     s->resync_mb_y=0;
2536     s->first_slice_line = 1;
2537     s->ptr_lastgob = s->pb.buf;
2538     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2539         s->mb_x=0;
2540         s->mb_y= mb_y;
2541
2542         ff_set_qscale(s, s->qscale);
2543         ff_init_block_index(s);
2544
2545         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2546             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2547             int mb_type= s->mb_type[xy];
2548 //            int d;
2549             int dmin= INT_MAX;
2550             int dir;
2551
2552             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2553                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2554                 return -1;
2555             }
2556             if(s->data_partitioning){
2557                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2558                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2559                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2560                     return -1;
2561                 }
2562             }
2563
2564             s->mb_x = mb_x;
2565             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2566             ff_update_block_index(s);
2567
2568             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2569                 ff_h261_reorder_mb_index(s);
2570                 xy= s->mb_y*s->mb_stride + s->mb_x;
2571                 mb_type= s->mb_type[xy];
2572             }
2573
2574             /* write gob / video packet header  */
2575             if(s->rtp_mode){
2576                 int current_packet_size, is_gob_start;
2577
2578                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2579
2580                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2581
2582                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2583
2584                 switch(s->codec_id){
2585                 case AV_CODEC_ID_H263:
2586                 case AV_CODEC_ID_H263P:
2587                     if(!s->h263_slice_structured)
2588                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2589                     break;
2590                 case AV_CODEC_ID_MPEG2VIDEO:
2591                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2592                 case AV_CODEC_ID_MPEG1VIDEO:
2593                     if(s->mb_skip_run) is_gob_start=0;
2594                     break;
2595                 }
2596
2597                 if(is_gob_start){
2598                     if(s->start_mb_y != mb_y || mb_x!=0){
2599                         write_slice_end(s);
2600
2601                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2602                             ff_mpeg4_init_partitions(s);
2603                         }
2604                     }
2605
2606                     assert((put_bits_count(&s->pb)&7) == 0);
2607                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2608
2609                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2610                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2611                         int d = 100 / s->error_rate;
2612                         if(r % d == 0){
2613                             current_packet_size=0;
2614                             s->pb.buf_ptr= s->ptr_lastgob;
2615                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2616                         }
2617                     }
2618
2619                     if (s->avctx->rtp_callback){
2620                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2621                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2622                     }
2623                     update_mb_info(s, 1);
2624
2625                     switch(s->codec_id){
2626                     case AV_CODEC_ID_MPEG4:
2627                         if (CONFIG_MPEG4_ENCODER) {
2628                             ff_mpeg4_encode_video_packet_header(s);
2629                             ff_mpeg4_clean_buffers(s);
2630                         }
2631                     break;
2632                     case AV_CODEC_ID_MPEG1VIDEO:
2633                     case AV_CODEC_ID_MPEG2VIDEO:
2634                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2635                             ff_mpeg1_encode_slice_header(s);
2636                             ff_mpeg1_clean_buffers(s);
2637                         }
2638                     break;
2639                     case AV_CODEC_ID_H263:
2640                     case AV_CODEC_ID_H263P:
2641                         if (CONFIG_H263_ENCODER)
2642                             ff_h263_encode_gob_header(s, mb_y);
2643                     break;
2644                     }
2645
2646                     if(s->flags&CODEC_FLAG_PASS1){
2647                         int bits= put_bits_count(&s->pb);
2648                         s->misc_bits+= bits - s->last_bits;
2649                         s->last_bits= bits;
2650                     }
2651
2652                     s->ptr_lastgob += current_packet_size;
2653                     s->first_slice_line=1;
2654                     s->resync_mb_x=mb_x;
2655                     s->resync_mb_y=mb_y;
2656                 }
2657             }
2658
2659             if(  (s->resync_mb_x   == s->mb_x)
2660                && s->resync_mb_y+1 == s->mb_y){
2661                 s->first_slice_line=0;
2662             }
2663
2664             s->mb_skipped=0;
2665             s->dquant=0; //only for QP_RD
2666
2667             update_mb_info(s, 0);
2668
2669             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2670                 int next_block=0;
2671                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2672
2673                 copy_context_before_encode(&backup_s, s, -1);
2674                 backup_s.pb= s->pb;
2675                 best_s.data_partitioning= s->data_partitioning;
2676                 best_s.partitioned_frame= s->partitioned_frame;
2677                 if(s->data_partitioning){
2678                     backup_s.pb2= s->pb2;
2679                     backup_s.tex_pb= s->tex_pb;
2680                 }
2681
2682                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2683                     s->mv_dir = MV_DIR_FORWARD;
2684                     s->mv_type = MV_TYPE_16X16;
2685                     s->mb_intra= 0;
2686                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2687                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2688                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2689                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2690                 }
2691                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2692                     s->mv_dir = MV_DIR_FORWARD;
2693                     s->mv_type = MV_TYPE_FIELD;
2694                     s->mb_intra= 0;
2695                     for(i=0; i<2; i++){
2696                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2697                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2698                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2699                     }
2700                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2701                                  &dmin, &next_block, 0, 0);
2702                 }
2703                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2704                     s->mv_dir = MV_DIR_FORWARD;
2705                     s->mv_type = MV_TYPE_16X16;
2706                     s->mb_intra= 0;
2707                     s->mv[0][0][0] = 0;
2708                     s->mv[0][0][1] = 0;
2709                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2710                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2711                 }
2712                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2713                     s->mv_dir = MV_DIR_FORWARD;
2714                     s->mv_type = MV_TYPE_8X8;
2715                     s->mb_intra= 0;
2716                     for(i=0; i<4; i++){
2717                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2718                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2719                     }
2720                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2721                                  &dmin, &next_block, 0, 0);
2722                 }
2723                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2724                     s->mv_dir = MV_DIR_FORWARD;
2725                     s->mv_type = MV_TYPE_16X16;
2726                     s->mb_intra= 0;
2727                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2728                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2729                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2730                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2731                 }
2732                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2733                     s->mv_dir = MV_DIR_BACKWARD;
2734                     s->mv_type = MV_TYPE_16X16;
2735                     s->mb_intra= 0;
2736                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2737                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2738                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2739                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2740                 }
2741                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2742                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2743                     s->mv_type = MV_TYPE_16X16;
2744                     s->mb_intra= 0;
2745                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2746                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2747                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2748                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2749                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2750                                  &dmin, &next_block, 0, 0);
2751                 }
2752                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2753                     s->mv_dir = MV_DIR_FORWARD;
2754                     s->mv_type = MV_TYPE_FIELD;
2755                     s->mb_intra= 0;
2756                     for(i=0; i<2; i++){
2757                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2758                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2759                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2760                     }
2761                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2762                                  &dmin, &next_block, 0, 0);
2763                 }
2764                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2765                     s->mv_dir = MV_DIR_BACKWARD;
2766                     s->mv_type = MV_TYPE_FIELD;
2767                     s->mb_intra= 0;
2768                     for(i=0; i<2; i++){
2769                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2770                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2771                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2772                     }
2773                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2774                                  &dmin, &next_block, 0, 0);
2775                 }
2776                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2777                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2778                     s->mv_type = MV_TYPE_FIELD;
2779                     s->mb_intra= 0;
2780                     for(dir=0; dir<2; dir++){
2781                         for(i=0; i<2; i++){
2782                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2783                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2784                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2785                         }
2786                     }
2787                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2788                                  &dmin, &next_block, 0, 0);
2789                 }
2790                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2791                     s->mv_dir = 0;
2792                     s->mv_type = MV_TYPE_16X16;
2793                     s->mb_intra= 1;
2794                     s->mv[0][0][0] = 0;
2795                     s->mv[0][0][1] = 0;
2796                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2797                                  &dmin, &next_block, 0, 0);
2798                     if(s->h263_pred || s->h263_aic){
2799                         if(best_s.mb_intra)
2800                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2801                         else
2802                             ff_clean_intra_table_entries(s); //old mode?
2803                     }
2804                 }
2805
2806                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2807                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2808                         const int last_qp= backup_s.qscale;
2809                         int qpi, qp, dc[6];
2810                         int16_t ac[6][16];
2811                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2812                         static const int dquant_tab[4]={-1,1,-2,2};
2813
2814                         assert(backup_s.dquant == 0);
2815
2816                         //FIXME intra
2817                         s->mv_dir= best_s.mv_dir;
2818                         s->mv_type = MV_TYPE_16X16;
2819                         s->mb_intra= best_s.mb_intra;
2820                         s->mv[0][0][0] = best_s.mv[0][0][0];
2821                         s->mv[0][0][1] = best_s.mv[0][0][1];
2822                         s->mv[1][0][0] = best_s.mv[1][0][0];
2823                         s->mv[1][0][1] = best_s.mv[1][0][1];
2824
2825                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2826                         for(; qpi<4; qpi++){
2827                             int dquant= dquant_tab[qpi];
2828                             qp= last_qp + dquant;
2829                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2830                                 continue;
2831                             backup_s.dquant= dquant;
2832                             if(s->mb_intra && s->dc_val[0]){
2833                                 for(i=0; i<6; i++){
2834                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2835                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2836                                 }
2837                             }
2838
2839                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2840                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2841                             if(best_s.qscale != qp){
2842                                 if(s->mb_intra && s->dc_val[0]){
2843                                     for(i=0; i<6; i++){
2844                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2845                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2846                                     }
2847                                 }
2848                             }
2849                         }
2850                     }
2851                 }
2852                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2853                     int mx= s->b_direct_mv_table[xy][0];
2854                     int my= s->b_direct_mv_table[xy][1];
2855
2856                     backup_s.dquant = 0;
2857                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2858                     s->mb_intra= 0;
2859                     ff_mpeg4_set_direct_mv(s, mx, my);
2860                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2861                                  &dmin, &next_block, mx, my);
2862                 }
2863                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2864                     backup_s.dquant = 0;
2865                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2866                     s->mb_intra= 0;
2867                     ff_mpeg4_set_direct_mv(s, 0, 0);
2868                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2869                                  &dmin, &next_block, 0, 0);
2870                 }
2871                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2872                     int coded=0;
2873                     for(i=0; i<6; i++)
2874                         coded |= s->block_last_index[i];
2875                     if(coded){
2876                         int mx,my;
2877                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2878                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2879                             mx=my=0; //FIXME find the one we actually used
2880                             ff_mpeg4_set_direct_mv(s, mx, my);
2881                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2882                             mx= s->mv[1][0][0];
2883                             my= s->mv[1][0][1];
2884                         }else{
2885                             mx= s->mv[0][0][0];
2886                             my= s->mv[0][0][1];
2887                         }
2888
2889                         s->mv_dir= best_s.mv_dir;
2890                         s->mv_type = best_s.mv_type;
2891                         s->mb_intra= 0;
2892 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2893                         s->mv[0][0][1] = best_s.mv[0][0][1];
2894                         s->mv[1][0][0] = best_s.mv[1][0][0];
2895                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2896                         backup_s.dquant= 0;
2897                         s->skipdct=1;
2898                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2899                                         &dmin, &next_block, mx, my);
2900                         s->skipdct=0;
2901                     }
2902                 }
2903
2904                 s->current_picture.qscale_table[xy] = best_s.qscale;
2905
2906                 copy_context_after_encode(s, &best_s, -1);
2907
2908                 pb_bits_count= put_bits_count(&s->pb);
2909                 flush_put_bits(&s->pb);
2910                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2911                 s->pb= backup_s.pb;
2912
2913                 if(s->data_partitioning){
2914                     pb2_bits_count= put_bits_count(&s->pb2);
2915                     flush_put_bits(&s->pb2);
2916                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2917                     s->pb2= backup_s.pb2;
2918
2919                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2920                     flush_put_bits(&s->tex_pb);
2921                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2922                     s->tex_pb= backup_s.tex_pb;
2923                 }
2924                 s->last_bits= put_bits_count(&s->pb);
2925
2926                 if (CONFIG_H263_ENCODER &&
2927                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2928                     ff_h263_update_motion_val(s);
2929
2930                 if(next_block==0){ //FIXME 16 vs linesize16
2931                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2932                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2933                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2934                 }
2935
2936                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2937                     ff_MPV_decode_mb(s, s->block);
2938             } else {
2939                 int motion_x = 0, motion_y = 0;
2940                 s->mv_type=MV_TYPE_16X16;
2941                 // only one MB-Type possible
2942
2943                 switch(mb_type){
2944                 case CANDIDATE_MB_TYPE_INTRA:
2945                     s->mv_dir = 0;
2946                     s->mb_intra= 1;
2947                     motion_x= s->mv[0][0][0] = 0;
2948                     motion_y= s->mv[0][0][1] = 0;
2949                     break;
2950                 case CANDIDATE_MB_TYPE_INTER:
2951                     s->mv_dir = MV_DIR_FORWARD;
2952                     s->mb_intra= 0;
2953                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2954                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2955                     break;
2956                 case CANDIDATE_MB_TYPE_INTER_I:
2957                     s->mv_dir = MV_DIR_FORWARD;
2958                     s->mv_type = MV_TYPE_FIELD;
2959                     s->mb_intra= 0;
2960                     for(i=0; i<2; i++){
2961                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2962                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2963                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2964                     }
2965                     break;
2966                 case CANDIDATE_MB_TYPE_INTER4V:
2967                     s->mv_dir = MV_DIR_FORWARD;
2968                     s->mv_type = MV_TYPE_8X8;
2969                     s->mb_intra= 0;
2970                     for(i=0; i<4; i++){
2971                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2972                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2973                     }
2974                     break;
2975                 case CANDIDATE_MB_TYPE_DIRECT:
2976                     if (CONFIG_MPEG4_ENCODER) {
2977                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2978                         s->mb_intra= 0;
2979                         motion_x=s->b_direct_mv_table[xy][0];
2980                         motion_y=s->b_direct_mv_table[xy][1];
2981                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2982                     }
2983                     break;
2984                 case CANDIDATE_MB_TYPE_DIRECT0:
2985                     if (CONFIG_MPEG4_ENCODER) {
2986                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2987                         s->mb_intra= 0;
2988                         ff_mpeg4_set_direct_mv(s, 0, 0);
2989                     }
2990                     break;
2991                 case CANDIDATE_MB_TYPE_BIDIR:
2992                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2993                     s->mb_intra= 0;
2994                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2995                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2996                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2997                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2998                     break;
2999                 case CANDIDATE_MB_TYPE_BACKWARD:
3000                     s->mv_dir = MV_DIR_BACKWARD;
3001                     s->mb_intra= 0;
3002                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3003                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3004                     break;
3005                 case CANDIDATE_MB_TYPE_FORWARD:
3006                     s->mv_dir = MV_DIR_FORWARD;
3007                     s->mb_intra= 0;
3008                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3009                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3010                     break;
3011                 case CANDIDATE_MB_TYPE_FORWARD_I:
3012                     s->mv_dir = MV_DIR_FORWARD;
3013                     s->mv_type = MV_TYPE_FIELD;
3014                     s->mb_intra= 0;
3015                     for(i=0; i<2; i++){
3016                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3017                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3018                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3019                     }
3020                     break;
3021                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3022                     s->mv_dir = MV_DIR_BACKWARD;
3023                     s->mv_type = MV_TYPE_FIELD;
3024                     s->mb_intra= 0;
3025                     for(i=0; i<2; i++){
3026                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3027                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3028                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3029                     }
3030                     break;
3031                 case CANDIDATE_MB_TYPE_BIDIR_I:
3032                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3033                     s->mv_type = MV_TYPE_FIELD;
3034                     s->mb_intra= 0;
3035                     for(dir=0; dir<2; dir++){
3036                         for(i=0; i<2; i++){
3037                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3038                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3039                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3040                         }
3041                     }
3042                     break;
3043                 default:
3044                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3045                 }
3046
3047                 encode_mb(s, motion_x, motion_y);
3048
3049                 // RAL: Update last macroblock type
3050                 s->last_mv_dir = s->mv_dir;
3051
3052                 if (CONFIG_H263_ENCODER &&
3053                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3054                     ff_h263_update_motion_val(s);
3055
3056                 ff_MPV_decode_mb(s, s->block);
3057             }
3058
3059             /* clean the MV table in IPS frames for direct mode in B frames */
3060             if(s->mb_intra /* && I,P,S_TYPE */){
3061                 s->p_mv_table[xy][0]=0;
3062                 s->p_mv_table[xy][1]=0;
3063             }
3064
3065             if(s->flags&CODEC_FLAG_PSNR){
3066                 int w= 16;
3067                 int h= 16;
3068
3069                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3070                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3071
3072                 s->current_picture.f->error[0] += sse(
3073                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3074                     s->dest[0], w, h, s->linesize);
3075                 s->current_picture.f->error[1] += sse(
3076                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3077                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3078                 s->current_picture.f->error[2] += sse(
3079                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3080                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3081             }
3082             if(s->loop_filter){
3083                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3084                     ff_h263_loop_filter(s);
3085             }
3086             av_dlog(s->avctx, "MB %d %d bits\n",
3087                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3088         }
3089     }
3090
3091     //not beautiful here but we must write it before flushing so it has to be here
3092     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3093         ff_msmpeg4_encode_ext_header(s);
3094
3095     write_slice_end(s);
3096
3097     /* Send the last GOB if RTP */
3098     if (s->avctx->rtp_callback) {
3099         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3100         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3101         /* Call the RTP callback to send the last GOB */
3102         emms_c();
3103         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3104     }
3105
3106     return 0;
3107 }
3108
3109 #define MERGE(field) dst->field += src->field; src->field=0
3110 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3111     MERGE(me.scene_change_score);
3112     MERGE(me.mc_mb_var_sum_temp);
3113     MERGE(me.mb_var_sum_temp);
3114 }
3115
3116 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3117     int i;
3118
3119     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3120     MERGE(dct_count[1]);
3121     MERGE(mv_bits);
3122     MERGE(i_tex_bits);
3123     MERGE(p_tex_bits);
3124     MERGE(i_count);
3125     MERGE(f_count);
3126     MERGE(b_count);
3127     MERGE(skip_count);
3128     MERGE(misc_bits);
3129     MERGE(er.error_count);
3130     MERGE(padding_bug_score);
3131     MERGE(current_picture.f->error[0]);
3132     MERGE(current_picture.f->error[1]);
3133     MERGE(current_picture.f->error[2]);
3134
3135     if(dst->avctx->noise_reduction){
3136         for(i=0; i<64; i++){
3137             MERGE(dct_error_sum[0][i]);
3138             MERGE(dct_error_sum[1][i]);
3139         }
3140     }
3141
3142     assert(put_bits_count(&src->pb) % 8 ==0);
3143     assert(put_bits_count(&dst->pb) % 8 ==0);
3144     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3145     flush_put_bits(&dst->pb);
3146 }
3147
3148 static int estimate_qp(MpegEncContext *s, int dry_run){
3149     if (s->next_lambda){
3150         s->current_picture_ptr->f->quality =
3151         s->current_picture.f->quality = s->next_lambda;
3152         if(!dry_run) s->next_lambda= 0;
3153     } else if (!s->fixed_qscale) {
3154         s->current_picture_ptr->f->quality =
3155         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3156         if (s->current_picture.f->quality < 0)
3157             return -1;
3158     }
3159
3160     if(s->adaptive_quant){
3161         switch(s->codec_id){
3162         case AV_CODEC_ID_MPEG4:
3163             if (CONFIG_MPEG4_ENCODER)
3164                 ff_clean_mpeg4_qscales(s);
3165             break;
3166         case AV_CODEC_ID_H263:
3167         case AV_CODEC_ID_H263P:
3168         case AV_CODEC_ID_FLV1:
3169             if (CONFIG_H263_ENCODER)
3170                 ff_clean_h263_qscales(s);
3171             break;
3172         default:
3173             ff_init_qscale_tab(s);
3174         }
3175
3176         s->lambda= s->lambda_table[0];
3177         //FIXME broken
3178     }else
3179         s->lambda = s->current_picture.f->quality;
3180     update_qscale(s);
3181     return 0;
3182 }
3183
3184 /* must be called before writing the header */
3185 static void set_frame_distances(MpegEncContext * s){
3186     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3187     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3188
3189     if(s->pict_type==AV_PICTURE_TYPE_B){
3190         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3191         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3192     }else{
3193         s->pp_time= s->time - s->last_non_b_time;
3194         s->last_non_b_time= s->time;
3195         assert(s->picture_number==0 || s->pp_time > 0);
3196     }
3197 }
3198
3199 static int encode_picture(MpegEncContext *s, int picture_number)
3200 {
3201     int i, ret;
3202     int bits;
3203     int context_count = s->slice_context_count;
3204
3205     s->picture_number = picture_number;
3206
3207     /* Reset the average MB variance */
3208     s->me.mb_var_sum_temp    =
3209     s->me.mc_mb_var_sum_temp = 0;
3210
3211     /* we need to initialize some time vars before we can encode b-frames */
3212     // RAL: Condition added for MPEG1VIDEO
3213     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3214         set_frame_distances(s);
3215     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3216         ff_set_mpeg4_time(s);
3217
3218     s->me.scene_change_score=0;
3219
3220 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3221
3222     if(s->pict_type==AV_PICTURE_TYPE_I){
3223         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3224         else                        s->no_rounding=0;
3225     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3226         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3227             s->no_rounding ^= 1;
3228     }
3229
3230     if(s->flags & CODEC_FLAG_PASS2){
3231         if (estimate_qp(s,1) < 0)
3232             return -1;
3233         ff_get_2pass_fcode(s);
3234     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3235         if(s->pict_type==AV_PICTURE_TYPE_B)
3236             s->lambda= s->last_lambda_for[s->pict_type];
3237         else
3238             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3239         update_qscale(s);
3240     }
3241
3242     s->mb_intra=0; //for the rate distortion & bit compare functions
3243     for(i=1; i<context_count; i++){
3244         ret = ff_update_duplicate_context(s->thread_context[i], s);
3245         if (ret < 0)
3246             return ret;
3247     }
3248
3249     if(ff_init_me(s)<0)
3250         return -1;
3251
3252     /* Estimate motion for every MB */
3253     if(s->pict_type != AV_PICTURE_TYPE_I){
3254         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3255         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3256         if (s->pict_type != AV_PICTURE_TYPE_B) {
3257             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3258                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3259             }
3260         }
3261
3262         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3263     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3264         /* I-Frame */
3265         for(i=0; i<s->mb_stride*s->mb_height; i++)
3266             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3267
3268         if(!s->fixed_qscale){
3269             /* finding spatial complexity for I-frame rate control */
3270             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3271         }
3272     }
3273     for(i=1; i<context_count; i++){
3274         merge_context_after_me(s, s->thread_context[i]);
3275     }
3276     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3277     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3278     emms_c();
3279
3280     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3281         s->pict_type= AV_PICTURE_TYPE_I;
3282         for(i=0; i<s->mb_stride*s->mb_height; i++)
3283             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3284         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3285                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3286     }
3287
3288     if(!s->umvplus){
3289         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3290             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3291
3292             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3293                 int a,b;
3294                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3295                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3296                 s->f_code= FFMAX3(s->f_code, a, b);
3297             }
3298
3299             ff_fix_long_p_mvs(s);
3300             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3301             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3302                 int j;
3303                 for(i=0; i<2; i++){
3304                     for(j=0; j<2; j++)
3305                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3306                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3307                 }
3308             }
3309         }
3310
3311         if(s->pict_type==AV_PICTURE_TYPE_B){
3312             int a, b;
3313
3314             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3315             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3316             s->f_code = FFMAX(a, b);
3317
3318             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3319             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3320             s->b_code = FFMAX(a, b);
3321
3322             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3323             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3324             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3325             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3326             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3327                 int dir, j;
3328                 for(dir=0; dir<2; dir++){
3329                     for(i=0; i<2; i++){
3330                         for(j=0; j<2; j++){
3331                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3332                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3333                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3334                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3335                         }
3336                     }
3337                 }
3338             }
3339         }
3340     }
3341
3342     if (estimate_qp(s, 0) < 0)
3343         return -1;
3344
3345     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3346         s->qscale= 3; //reduce clipping problems
3347
3348     if (s->out_format == FMT_MJPEG) {
3349         /* for mjpeg, we do include qscale in the matrix */
3350         for(i=1;i<64;i++){
3351             int j= s->dsp.idct_permutation[i];
3352
3353             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3354         }
3355         s->y_dc_scale_table=
3356         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3357         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3358         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3359                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3360         s->qscale= 8;
3361     }
3362
3363     //FIXME var duplication
3364     s->current_picture_ptr->f->key_frame =
3365     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3366     s->current_picture_ptr->f->pict_type =
3367     s->current_picture.f->pict_type = s->pict_type;
3368
3369     if (s->current_picture.f->key_frame)
3370         s->picture_in_gop_number=0;
3371
3372     s->last_bits= put_bits_count(&s->pb);
3373     switch(s->out_format) {
3374     case FMT_MJPEG:
3375         if (CONFIG_MJPEG_ENCODER)
3376             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3377                                            s->intra_matrix);
3378         break;
3379     case FMT_H261:
3380         if (CONFIG_H261_ENCODER)
3381             ff_h261_encode_picture_header(s, picture_number);
3382         break;
3383     case FMT_H263:
3384         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3385             ff_wmv2_encode_picture_header(s, picture_number);
3386         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3387             ff_msmpeg4_encode_picture_header(s, picture_number);
3388         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3389             ff_mpeg4_encode_picture_header(s, picture_number);
3390         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3391             ff_rv10_encode_picture_header(s, picture_number);
3392         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3393             ff_rv20_encode_picture_header(s, picture_number);
3394         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3395             ff_flv_encode_picture_header(s, picture_number);
3396         else if (CONFIG_H263_ENCODER)
3397             ff_h263_encode_picture_header(s, picture_number);
3398         break;
3399     case FMT_MPEG1:
3400         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3401             ff_mpeg1_encode_picture_header(s, picture_number);
3402         break;
3403     default:
3404         assert(0);
3405     }
3406     bits= put_bits_count(&s->pb);
3407     s->header_bits= bits - s->last_bits;
3408
3409     for(i=1; i<context_count; i++){
3410         update_duplicate_context_after_me(s->thread_context[i], s);
3411     }
3412     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3413     for(i=1; i<context_count; i++){
3414         merge_context_after_encode(s, s->thread_context[i]);
3415     }
3416     emms_c();
3417     return 0;
3418 }
3419
3420 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3421     const int intra= s->mb_intra;
3422     int i;
3423
3424     s->dct_count[intra]++;
3425
3426     for(i=0; i<64; i++){
3427         int level= block[i];
3428
3429         if(level){
3430             if(level>0){
3431                 s->dct_error_sum[intra][i] += level;
3432                 level -= s->dct_offset[intra][i];
3433                 if(level<0) level=0;
3434             }else{
3435                 s->dct_error_sum[intra][i] -= level;
3436                 level += s->dct_offset[intra][i];
3437                 if(level>0) level=0;
3438             }
3439             block[i]= level;
3440         }
3441     }
3442 }
3443
3444 static int dct_quantize_trellis_c(MpegEncContext *s,
3445                                   int16_t *block, int n,
3446                                   int qscale, int *overflow){
3447     const int *qmat;
3448     const uint8_t *scantable= s->intra_scantable.scantable;
3449     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3450     int max=0;
3451     unsigned int threshold1, threshold2;
3452     int bias=0;
3453     int run_tab[65];
3454     int level_tab[65];
3455     int score_tab[65];
3456     int survivor[65];
3457     int survivor_count;
3458     int last_run=0;
3459     int last_level=0;
3460     int last_score= 0;
3461     int last_i;
3462     int coeff[2][64];
3463     int coeff_count[64];
3464     int qmul, qadd, start_i, last_non_zero, i, dc;
3465     const int esc_length= s->ac_esc_length;
3466     uint8_t * length;
3467     uint8_t * last_length;
3468     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3469
3470     s->dsp.fdct (block);
3471
3472     if(s->dct_error_sum)
3473         s->denoise_dct(s, block);
3474     qmul= qscale*16;
3475     qadd= ((qscale-1)|1)*8;
3476
3477     if (s->mb_intra) {
3478         int q;
3479         if (!s->h263_aic) {
3480             if (n < 4)
3481                 q = s->y_dc_scale;
3482             else
3483                 q = s->c_dc_scale;
3484             q = q << 3;
3485         } else{
3486             /* For AIC we skip quant/dequant of INTRADC */
3487             q = 1 << 3;
3488             qadd=0;
3489         }
3490
3491         /* note: block[0] is assumed to be positive */
3492         block[0] = (block[0] + (q >> 1)) / q;
3493         start_i = 1;
3494         last_non_zero = 0;
3495         qmat = s->q_intra_matrix[qscale];
3496         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3497             bias= 1<<(QMAT_SHIFT-1);
3498         length     = s->intra_ac_vlc_length;
3499         last_length= s->intra_ac_vlc_last_length;
3500     } else {
3501         start_i = 0;
3502         last_non_zero = -1;
3503         qmat = s->q_inter_matrix[qscale];
3504         length     = s->inter_ac_vlc_length;
3505         last_length= s->inter_ac_vlc_last_length;
3506     }
3507     last_i= start_i;
3508
3509     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3510     threshold2= (threshold1<<1);
3511
3512     for(i=63; i>=start_i; i--) {
3513         const int j = scantable[i];
3514         int level = block[j] * qmat[j];
3515
3516         if(((unsigned)(level+threshold1))>threshold2){
3517             last_non_zero = i;
3518             break;
3519         }
3520     }
3521
3522     for(i=start_i; i<=last_non_zero; i++) {
3523         const int j = scantable[i];
3524         int level = block[j] * qmat[j];
3525
3526 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3527 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3528         if(((unsigned)(level+threshold1))>threshold2){
3529             if(level>0){
3530                 level= (bias + level)>>QMAT_SHIFT;
3531                 coeff[0][i]= level;
3532                 coeff[1][i]= level-1;
3533 //                coeff[2][k]= level-2;
3534             }else{
3535                 level= (bias - level)>>QMAT_SHIFT;
3536                 coeff[0][i]= -level;
3537                 coeff[1][i]= -level+1;
3538 //                coeff[2][k]= -level+2;
3539             }
3540             coeff_count[i]= FFMIN(level, 2);
3541             assert(coeff_count[i]);
3542             max |=level;
3543         }else{
3544             coeff[0][i]= (level>>31)|1;
3545             coeff_count[i]= 1;
3546         }
3547     }
3548
3549     *overflow= s->max_qcoeff < max; //overflow might have happened
3550
3551     if(last_non_zero < start_i){
3552         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3553         return last_non_zero;
3554     }
3555
3556     score_tab[start_i]= 0;
3557     survivor[0]= start_i;
3558     survivor_count= 1;
3559
3560     for(i=start_i; i<=last_non_zero; i++){
3561         int level_index, j, zero_distortion;
3562         int dct_coeff= FFABS(block[ scantable[i] ]);
3563         int best_score=256*256*256*120;
3564
3565         if (s->dsp.fdct == ff_fdct_ifast)
3566             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3567         zero_distortion= dct_coeff*dct_coeff;
3568
3569         for(level_index=0; level_index < coeff_count[i]; level_index++){
3570             int distortion;
3571             int level= coeff[level_index][i];
3572             const int alevel= FFABS(level);
3573             int unquant_coeff;
3574
3575             assert(level);
3576
3577             if(s->out_format == FMT_H263){
3578                 unquant_coeff= alevel*qmul + qadd;
3579             }else{ //MPEG1
3580                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3581                 if(s->mb_intra){
3582                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3583                         unquant_coeff =   (unquant_coeff - 1) | 1;
3584                 }else{
3585                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3586                         unquant_coeff =   (unquant_coeff - 1) | 1;
3587                 }
3588                 unquant_coeff<<= 3;
3589             }
3590
3591             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3592             level+=64;
3593             if((level&(~127)) == 0){
3594                 for(j=survivor_count-1; j>=0; j--){
3595                     int run= i - survivor[j];
3596                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3597                     score += score_tab[i-run];
3598
3599                     if(score < best_score){
3600                         best_score= score;
3601                         run_tab[i+1]= run;
3602                         level_tab[i+1]= level-64;
3603                     }
3604                 }
3605
3606                 if(s->out_format == FMT_H263){
3607                     for(j=survivor_count-1; j>=0; j--){
3608                         int run= i - survivor[j];
3609                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3610                         score += score_tab[i-run];
3611                         if(score < last_score){
3612                             last_score= score;
3613                             last_run= run;
3614                             last_level= level-64;
3615                             last_i= i+1;
3616                         }
3617                     }
3618                 }
3619             }else{
3620                 distortion += esc_length*lambda;
3621                 for(j=survivor_count-1; j>=0; j--){
3622                     int run= i - survivor[j];
3623                     int score= distortion + score_tab[i-run];
3624
3625                     if(score < best_score){
3626                         best_score= score;
3627                         run_tab[i+1]= run;
3628                         level_tab[i+1]= level-64;
3629                     }
3630                 }
3631
3632                 if(s->out_format == FMT_H263){
3633                   for(j=survivor_count-1; j>=0; j--){
3634                         int run= i - survivor[j];
3635                         int score= distortion + score_tab[i-run];
3636                         if(score < last_score){
3637                             last_score= score;
3638                             last_run= run;
3639                             last_level= level-64;
3640                             last_i= i+1;
3641                         }
3642                     }
3643                 }
3644             }
3645         }
3646
3647         score_tab[i+1]= best_score;
3648
3649         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3650         if(last_non_zero <= 27){
3651             for(; survivor_count; survivor_count--){
3652                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3653                     break;
3654             }
3655         }else{
3656             for(; survivor_count; survivor_count--){
3657                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3658                     break;
3659             }
3660         }
3661
3662         survivor[ survivor_count++ ]= i+1;
3663     }
3664
3665     if(s->out_format != FMT_H263){
3666         last_score= 256*256*256*120;
3667         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3668             int score= score_tab[i];
3669             if(i) score += lambda*2; //FIXME exacter?
3670
3671             if(score < last_score){
3672                 last_score= score;
3673                 last_i= i;
3674                 last_level= level_tab[i];
3675                 last_run= run_tab[i];
3676             }
3677         }
3678     }
3679
3680     s->coded_score[n] = last_score;
3681
3682     dc= FFABS(block[0]);
3683     last_non_zero= last_i - 1;
3684     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3685
3686     if(last_non_zero < start_i)
3687         return last_non_zero;
3688
3689     if(last_non_zero == 0 && start_i == 0){
3690         int best_level= 0;
3691         int best_score= dc * dc;
3692
3693         for(i=0; i<coeff_count[0]; i++){
3694             int level= coeff[i][0];
3695             int alevel= FFABS(level);
3696             int unquant_coeff, score, distortion;
3697
3698             if(s->out_format == FMT_H263){
3699                     unquant_coeff= (alevel*qmul + qadd)>>3;
3700             }else{ //MPEG1
3701                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3702                     unquant_coeff =   (unquant_coeff - 1) | 1;
3703             }
3704             unquant_coeff = (unquant_coeff + 4) >> 3;
3705             unquant_coeff<<= 3 + 3;
3706
3707             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3708             level+=64;
3709             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3710             else                    score= distortion + esc_length*lambda;
3711
3712             if(score < best_score){
3713                 best_score= score;
3714                 best_level= level - 64;
3715             }
3716         }
3717         block[0]= best_level;
3718         s->coded_score[n] = best_score - dc*dc;
3719         if(best_level == 0) return -1;
3720         else                return last_non_zero;
3721     }
3722
3723     i= last_i;
3724     assert(last_level);
3725
3726     block[ perm_scantable[last_non_zero] ]= last_level;
3727     i -= last_run + 1;
3728
3729     for(; i>start_i; i -= run_tab[i] + 1){
3730         block[ perm_scantable[i-1] ]= level_tab[i];
3731     }
3732
3733     return last_non_zero;
3734 }
3735
3736 //#define REFINE_STATS 1
3737 static int16_t basis[64][64];
3738
3739 static void build_basis(uint8_t *perm){
3740     int i, j, x, y;
3741     emms_c();
3742     for(i=0; i<8; i++){
3743         for(j=0; j<8; j++){
3744             for(y=0; y<8; y++){
3745                 for(x=0; x<8; x++){
3746                     double s= 0.25*(1<<BASIS_SHIFT);
3747                     int index= 8*i + j;
3748                     int perm_index= perm[index];
3749                     if(i==0) s*= sqrt(0.5);
3750                     if(j==0) s*= sqrt(0.5);
3751                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3752                 }
3753             }
3754         }
3755     }
3756 }
3757
3758 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3759                         int16_t *block, int16_t *weight, int16_t *orig,
3760                         int n, int qscale){
3761     int16_t rem[64];
3762     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3763     const uint8_t *scantable= s->intra_scantable.scantable;
3764     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3765 //    unsigned int threshold1, threshold2;
3766 //    int bias=0;
3767     int run_tab[65];
3768     int prev_run=0;
3769     int prev_level=0;
3770     int qmul, qadd, start_i, last_non_zero, i, dc;
3771     uint8_t * length;
3772     uint8_t * last_length;
3773     int lambda;
3774     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3775 #ifdef REFINE_STATS
3776 static int count=0;
3777 static int after_last=0;
3778 static int to_zero=0;
3779 static int from_zero=0;
3780 static int raise=0;
3781 static int lower=0;
3782 static int messed_sign=0;
3783 #endif
3784
3785     if(basis[0][0] == 0)
3786         build_basis(s->dsp.idct_permutation);
3787
3788     qmul= qscale*2;
3789     qadd= (qscale-1)|1;
3790     if (s->mb_intra) {
3791         if (!s->h263_aic) {
3792             if (n < 4)
3793                 q = s->y_dc_scale;
3794             else
3795                 q = s->c_dc_scale;
3796         } else{
3797             /* For AIC we skip quant/dequant of INTRADC */
3798             q = 1;
3799             qadd=0;
3800         }
3801         q <<= RECON_SHIFT-3;
3802         /* note: block[0] is assumed to be positive */
3803         dc= block[0]*q;
3804 //        block[0] = (block[0] + (q >> 1)) / q;
3805         start_i = 1;
3806 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3807 //            bias= 1<<(QMAT_SHIFT-1);
3808         length     = s->intra_ac_vlc_length;
3809         last_length= s->intra_ac_vlc_last_length;
3810     } else {
3811         dc= 0;
3812         start_i = 0;
3813         length     = s->inter_ac_vlc_length;
3814         last_length= s->inter_ac_vlc_last_length;
3815     }
3816     last_non_zero = s->block_last_index[n];
3817
3818 #ifdef REFINE_STATS
3819 {START_TIMER
3820 #endif
3821     dc += (1<<(RECON_SHIFT-1));
3822     for(i=0; i<64; i++){
3823         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3824     }
3825 #ifdef REFINE_STATS
3826 STOP_TIMER("memset rem[]")}
3827 #endif
3828     sum=0;
3829     for(i=0; i<64; i++){
3830         int one= 36;
3831         int qns=4;
3832         int w;
3833
3834         w= FFABS(weight[i]) + qns*one;
3835         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3836
3837         weight[i] = w;
3838 //        w=weight[i] = (63*qns + (w/2)) / w;
3839
3840         assert(w>0);
3841         assert(w<(1<<6));
3842         sum += w*w;
3843     }
3844     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3845 #ifdef REFINE_STATS
3846 {START_TIMER
3847 #endif
3848     run=0;
3849     rle_index=0;
3850     for(i=start_i; i<=last_non_zero; i++){
3851         int j= perm_scantable[i];
3852         const int level= block[j];
3853         int coeff;
3854
3855         if(level){
3856             if(level<0) coeff= qmul*level - qadd;
3857             else        coeff= qmul*level + qadd;
3858             run_tab[rle_index++]=run;
3859             run=0;
3860
3861             s->dsp.add_8x8basis(rem, basis[j], coeff);
3862         }else{
3863             run++;
3864         }
3865     }
3866 #ifdef REFINE_STATS
3867 if(last_non_zero>0){
3868 STOP_TIMER("init rem[]")
3869 }
3870 }
3871
3872 {START_TIMER
3873 #endif
3874     for(;;){
3875         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3876         int best_coeff=0;
3877         int best_change=0;
3878         int run2, best_unquant_change=0, analyze_gradient;
3879 #ifdef REFINE_STATS
3880 {START_TIMER
3881 #endif
3882         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3883
3884         if(analyze_gradient){
3885 #ifdef REFINE_STATS
3886 {START_TIMER
3887 #endif
3888             for(i=0; i<64; i++){
3889                 int w= weight[i];
3890
3891                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3892             }
3893 #ifdef REFINE_STATS
3894 STOP_TIMER("rem*w*w")}
3895 {START_TIMER
3896 #endif
3897             s->dsp.fdct(d1);
3898 #ifdef REFINE_STATS
3899 STOP_TIMER("dct")}
3900 #endif
3901         }
3902
3903         if(start_i){
3904             const int level= block[0];
3905             int change, old_coeff;
3906
3907             assert(s->mb_intra);
3908
3909             old_coeff= q*level;
3910
3911             for(change=-1; change<=1; change+=2){
3912                 int new_level= level + change;
3913                 int score, new_coeff;
3914
3915                 new_coeff= q*new_level;
3916                 if(new_coeff >= 2048 || new_coeff < 0)
3917                     continue;
3918
3919                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3920                 if(score<best_score){
3921                     best_score= score;
3922                     best_coeff= 0;
3923                     best_change= change;
3924                     best_unquant_change= new_coeff - old_coeff;
3925                 }
3926             }
3927         }
3928
3929         run=0;
3930         rle_index=0;
3931         run2= run_tab[rle_index++];
3932         prev_level=0;
3933         prev_run=0;
3934
3935         for(i=start_i; i<64; i++){
3936             int j= perm_scantable[i];
3937             const int level= block[j];
3938             int change, old_coeff;
3939
3940             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3941                 break;
3942
3943             if(level){
3944                 if(level<0) old_coeff= qmul*level - qadd;
3945                 else        old_coeff= qmul*level + qadd;
3946                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3947             }else{
3948                 old_coeff=0;
3949                 run2--;
3950                 assert(run2>=0 || i >= last_non_zero );
3951             }
3952
3953             for(change=-1; change<=1; change+=2){
3954                 int new_level= level + change;
3955                 int score, new_coeff, unquant_change;
3956
3957                 score=0;
3958                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3959                    continue;
3960
3961                 if(new_level){
3962                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3963                     else            new_coeff= qmul*new_level + qadd;
3964                     if(new_coeff >= 2048 || new_coeff <= -2048)
3965                         continue;
3966                     //FIXME check for overflow
3967
3968                     if(level){
3969                         if(level < 63 && level > -63){
3970                             if(i < last_non_zero)
3971                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3972                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3973                             else
3974                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3975                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3976                         }
3977                     }else{
3978                         assert(FFABS(new_level)==1);
3979
3980                         if(analyze_gradient){
3981                             int g= d1[ scantable[i] ];
3982                             if(g && (g^new_level) >= 0)
3983                                 continue;
3984                         }
3985
3986                         if(i < last_non_zero){
3987                             int next_i= i + run2 + 1;
3988                             int next_level= block[ perm_scantable[next_i] ] + 64;
3989
3990                             if(next_level&(~127))
3991                                 next_level= 0;
3992
3993                             if(next_i < last_non_zero)
3994                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3995                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3996                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3997                             else
3998                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3999                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4000                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4001                         }else{
4002                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4003                             if(prev_level){
4004                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4005                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4006                             }
4007                         }
4008                     }
4009                 }else{
4010                     new_coeff=0;
4011                     assert(FFABS(level)==1);
4012
4013                     if(i < last_non_zero){
4014                         int next_i= i + run2 + 1;
4015                         int next_level= block[ perm_scantable[next_i] ] + 64;
4016
4017                         if(next_level&(~127))
4018                             next_level= 0;
4019
4020                         if(next_i < last_non_zero)
4021                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4022                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4023                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4024                         else
4025                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4026                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4027                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4028                     }else{
4029                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4030                         if(prev_level){
4031                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4032                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4033                         }
4034                     }
4035                 }
4036
4037                 score *= lambda;
4038
4039                 unquant_change= new_coeff - old_coeff;
4040                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4041
4042                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4043                 if(score<best_score){
4044                     best_score= score;
4045                     best_coeff= i;
4046                     best_change= change;
4047                     best_unquant_change= unquant_change;
4048                 }
4049             }
4050             if(level){
4051                 prev_level= level + 64;
4052                 if(prev_level&(~127))
4053                     prev_level= 0;
4054                 prev_run= run;
4055                 run=0;
4056             }else{
4057                 run++;
4058             }
4059         }
4060 #ifdef REFINE_STATS
4061 STOP_TIMER("iterative step")}
4062 #endif
4063
4064         if(best_change){
4065             int j= perm_scantable[ best_coeff ];
4066
4067             block[j] += best_change;
4068
4069             if(best_coeff > last_non_zero){
4070                 last_non_zero= best_coeff;
4071                 assert(block[j]);
4072 #ifdef REFINE_STATS
4073 after_last++;
4074 #endif
4075             }else{
4076 #ifdef REFINE_STATS
4077 if(block[j]){
4078     if(block[j] - best_change){
4079         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4080             raise++;
4081         }else{
4082             lower++;
4083         }
4084     }else{
4085         from_zero++;
4086     }
4087 }else{
4088     to_zero++;
4089 }
4090 #endif
4091                 for(; last_non_zero>=start_i; last_non_zero--){
4092                     if(block[perm_scantable[last_non_zero]])
4093                         break;
4094                 }
4095             }
4096 #ifdef REFINE_STATS
4097 count++;
4098 if(256*256*256*64 % count == 0){
4099     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4100 }
4101 #endif
4102             run=0;
4103             rle_index=0;
4104             for(i=start_i; i<=last_non_zero; i++){
4105                 int j= perm_scantable[i];
4106                 const int level= block[j];
4107
4108                  if(level){
4109                      run_tab[rle_index++]=run;
4110                      run=0;
4111                  }else{
4112                      run++;
4113                  }
4114             }
4115
4116             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4117         }else{
4118             break;
4119         }
4120     }
4121 #ifdef REFINE_STATS
4122 if(last_non_zero>0){
4123 STOP_TIMER("iterative search")
4124 }
4125 }
4126 #endif
4127
4128     return last_non_zero;
4129 }
4130
4131 int ff_dct_quantize_c(MpegEncContext *s,
4132                         int16_t *block, int n,
4133                         int qscale, int *overflow)
4134 {
4135     int i, j, level, last_non_zero, q, start_i;
4136     const int *qmat;
4137     const uint8_t *scantable= s->intra_scantable.scantable;
4138     int bias;
4139     int max=0;
4140     unsigned int threshold1, threshold2;
4141
4142     s->dsp.fdct (block);
4143
4144     if(s->dct_error_sum)
4145         s->denoise_dct(s, block);
4146
4147     if (s->mb_intra) {
4148         if (!s->h263_aic) {
4149             if (n < 4)
4150                 q = s->y_dc_scale;
4151             else
4152                 q = s->c_dc_scale;
4153             q = q << 3;
4154         } else
4155             /* For AIC we skip quant/dequant of INTRADC */
4156             q = 1 << 3;
4157
4158         /* note: block[0] is assumed to be positive */
4159         block[0] = (block[0] + (q >> 1)) / q;
4160         start_i = 1;
4161         last_non_zero = 0;
4162         qmat = s->q_intra_matrix[qscale];
4163         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4164     } else {
4165         start_i = 0;
4166         last_non_zero = -1;
4167         qmat = s->q_inter_matrix[qscale];
4168         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4169     }
4170     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4171     threshold2= (threshold1<<1);
4172     for(i=63;i>=start_i;i--) {
4173         j = scantable[i];
4174         level = block[j] * qmat[j];
4175
4176         if(((unsigned)(level+threshold1))>threshold2){
4177             last_non_zero = i;
4178             break;
4179         }else{
4180             block[j]=0;
4181         }
4182     }
4183     for(i=start_i; i<=last_non_zero; i++) {
4184         j = scantable[i];
4185         level = block[j] * qmat[j];
4186
4187 //        if(   bias+level >= (1<<QMAT_SHIFT)
4188 //           || bias-level >= (1<<QMAT_SHIFT)){
4189         if(((unsigned)(level+threshold1))>threshold2){
4190             if(level>0){
4191                 level= (bias + level)>>QMAT_SHIFT;
4192                 block[j]= level;
4193             }else{
4194                 level= (bias - level)>>QMAT_SHIFT;
4195                 block[j]= -level;
4196             }
4197             max |=level;
4198         }else{
4199             block[j]=0;
4200         }
4201     }
4202     *overflow= s->max_qcoeff < max; //overflow might have happened
4203
4204     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4205     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4206         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4207
4208     return last_non_zero;
4209 }
4210
4211 #define OFFSET(x) offsetof(MpegEncContext, x)
4212 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4213 static const AVOption h263_options[] = {
4214     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4215     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4216     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4217     FF_MPV_COMMON_OPTS
4218     { NULL },
4219 };
4220
4221 static const AVClass h263_class = {
4222     .class_name = "H.263 encoder",
4223     .item_name  = av_default_item_name,
4224     .option     = h263_options,
4225     .version    = LIBAVUTIL_VERSION_INT,
4226 };
4227
4228 AVCodec ff_h263_encoder = {
4229     .name           = "h263",
4230     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4231     .type           = AVMEDIA_TYPE_VIDEO,
4232     .id             = AV_CODEC_ID_H263,
4233     .priv_data_size = sizeof(MpegEncContext),
4234     .init           = ff_MPV_encode_init,
4235     .encode2        = ff_MPV_encode_picture,
4236     .close          = ff_MPV_encode_end,
4237     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4238     .priv_class     = &h263_class,
4239 };
4240
4241 static const AVOption h263p_options[] = {
4242     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4243     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4244     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4245     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4246     FF_MPV_COMMON_OPTS
4247     { NULL },
4248 };
4249 static const AVClass h263p_class = {
4250     .class_name = "H.263p encoder",
4251     .item_name  = av_default_item_name,
4252     .option     = h263p_options,
4253     .version    = LIBAVUTIL_VERSION_INT,
4254 };
4255
4256 AVCodec ff_h263p_encoder = {
4257     .name           = "h263p",
4258     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4259     .type           = AVMEDIA_TYPE_VIDEO,
4260     .id             = AV_CODEC_ID_H263P,
4261     .priv_data_size = sizeof(MpegEncContext),
4262     .init           = ff_MPV_encode_init,
4263     .encode2        = ff_MPV_encode_picture,
4264     .close          = ff_MPV_encode_end,
4265     .capabilities   = CODEC_CAP_SLICE_THREADS,
4266     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4267     .priv_class     = &h263p_class,
4268 };
4269
4270 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4271
4272 AVCodec ff_msmpeg4v2_encoder = {
4273     .name           = "msmpeg4v2",
4274     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4275     .type           = AVMEDIA_TYPE_VIDEO,
4276     .id             = AV_CODEC_ID_MSMPEG4V2,
4277     .priv_data_size = sizeof(MpegEncContext),
4278     .init           = ff_MPV_encode_init,
4279     .encode2        = ff_MPV_encode_picture,
4280     .close          = ff_MPV_encode_end,
4281     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4282     .priv_class     = &msmpeg4v2_class,
4283 };
4284
4285 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4286
4287 AVCodec ff_msmpeg4v3_encoder = {
4288     .name           = "msmpeg4",
4289     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4290     .type           = AVMEDIA_TYPE_VIDEO,
4291     .id             = AV_CODEC_ID_MSMPEG4V3,
4292     .priv_data_size = sizeof(MpegEncContext),
4293     .init           = ff_MPV_encode_init,
4294     .encode2        = ff_MPV_encode_picture,
4295     .close          = ff_MPV_encode_end,
4296     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4297     .priv_class     = &msmpeg4v3_class,
4298 };
4299
4300 FF_MPV_GENERIC_CLASS(wmv1)
4301
4302 AVCodec ff_wmv1_encoder = {
4303     .name           = "wmv1",
4304     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4305     .type           = AVMEDIA_TYPE_VIDEO,
4306     .id             = AV_CODEC_ID_WMV1,
4307     .priv_data_size = sizeof(MpegEncContext),
4308     .init           = ff_MPV_encode_init,
4309     .encode2        = ff_MPV_encode_picture,
4310     .close          = ff_MPV_encode_end,
4311     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4312     .priv_class     = &wmv1_class,
4313 };