]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
mpegvideo: make ff_release_unused_pictures() static
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "avcodec.h"
38 #include "dct.h"
39 #include "dsputil.h"
40 #include "mpeg12.h"
41 #include "mpegvideo.h"
42 #include "h261.h"
43 #include "h263.h"
44 #include "mathops.h"
45 #include "mjpegenc.h"
46 #include "msmpeg4.h"
47 #include "faandct.h"
48 #include "thread.h"
49 #include "aandcttab.h"
50 #include "flv.h"
51 #include "mpeg4video.h"
52 #include "internal.h"
53 #include "bytestream.h"
54 #include <limits.h>
55
56 static int encode_picture(MpegEncContext *s, int picture_number);
57 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
58 static int sse_mb(MpegEncContext *s);
59 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
60 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
61
62 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
63 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
64
65 const AVOption ff_mpv_generic_options[] = {
66     FF_MPV_COMMON_OPTS
67     { NULL },
68 };
69
70 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
71                        uint16_t (*qmat16)[2][64],
72                        const uint16_t *quant_matrix,
73                        int bias, int qmin, int qmax, int intra)
74 {
75     int qscale;
76     int shift = 0;
77
78     for (qscale = qmin; qscale <= qmax; qscale++) {
79         int i;
80         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
81             dsp->fdct == ff_jpeg_fdct_islow_10 ||
82             dsp->fdct == ff_faandct) {
83             for (i = 0; i < 64; i++) {
84                 const int j = dsp->idct_permutation[i];
85                 /* 16 <= qscale * quant_matrix[i] <= 7905
86                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
87                  *             19952 <=              x  <= 249205026
88                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
89                  *           3444240 >= (1 << 36) / (x) >= 275 */
90
91                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
92                                         (qscale * quant_matrix[j]));
93             }
94         } else if (dsp->fdct == ff_fdct_ifast) {
95             for (i = 0; i < 64; i++) {
96                 const int j = dsp->idct_permutation[i];
97                 /* 16 <= qscale * quant_matrix[i] <= 7905
98                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
99                  *             19952 <=              x  <= 249205026
100                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
101                  *           3444240 >= (1 << 36) / (x) >= 275 */
102
103                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
104                                         (ff_aanscales[i] * qscale *
105                                          quant_matrix[j]));
106             }
107         } else {
108             for (i = 0; i < 64; i++) {
109                 const int j = dsp->idct_permutation[i];
110                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
111                  * Assume x = qscale * quant_matrix[i]
112                  * So             16 <=              x  <= 7905
113                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
114                  * so          32768 >= (1 << 19) / (x) >= 67 */
115                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
116                                         (qscale * quant_matrix[j]));
117                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
118                 //                    (qscale * quant_matrix[i]);
119                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
120                                        (qscale * quant_matrix[j]);
121
122                 if (qmat16[qscale][0][i] == 0 ||
123                     qmat16[qscale][0][i] == 128 * 256)
124                     qmat16[qscale][0][i] = 128 * 256 - 1;
125                 qmat16[qscale][1][i] =
126                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
127                                 qmat16[qscale][0][i]);
128             }
129         }
130
131         for (i = intra; i < 64; i++) {
132             int64_t max = 8191;
133             if (dsp->fdct == ff_fdct_ifast) {
134                 max = (8191LL * ff_aanscales[i]) >> 14;
135             }
136             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
137                 shift++;
138             }
139         }
140     }
141     if (shift) {
142         av_log(NULL, AV_LOG_INFO,
143                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
144                QMAT_SHIFT - shift);
145     }
146 }
147
148 static inline void update_qscale(MpegEncContext *s)
149 {
150     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
151                 (FF_LAMBDA_SHIFT + 7);
152     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
153
154     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
155                  FF_LAMBDA_SHIFT;
156 }
157
158 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
159 {
160     int i;
161
162     if (matrix) {
163         put_bits(pb, 1, 1);
164         for (i = 0; i < 64; i++) {
165             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
166         }
167     } else
168         put_bits(pb, 1, 0);
169 }
170
171 /**
172  * init s->current_picture.qscale_table from s->lambda_table
173  */
174 void ff_init_qscale_tab(MpegEncContext *s)
175 {
176     int8_t * const qscale_table = s->current_picture.qscale_table;
177     int i;
178
179     for (i = 0; i < s->mb_num; i++) {
180         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
181         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
182         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
183                                                   s->avctx->qmax);
184     }
185 }
186
187 static void update_duplicate_context_after_me(MpegEncContext *dst,
188                                               MpegEncContext *src)
189 {
190 #define COPY(a) dst->a= src->a
191     COPY(pict_type);
192     COPY(current_picture);
193     COPY(f_code);
194     COPY(b_code);
195     COPY(qscale);
196     COPY(lambda);
197     COPY(lambda2);
198     COPY(picture_in_gop_number);
199     COPY(gop_picture_number);
200     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
201     COPY(progressive_frame);    // FIXME don't set in encode_header
202     COPY(partitioned_frame);    // FIXME don't set in encode_header
203 #undef COPY
204 }
205
206 /**
207  * Set the given MpegEncContext to defaults for encoding.
208  * the changed fields will not depend upon the prior state of the MpegEncContext.
209  */
210 static void MPV_encode_defaults(MpegEncContext *s)
211 {
212     int i;
213     ff_MPV_common_defaults(s);
214
215     for (i = -16; i < 16; i++) {
216         default_fcode_tab[i + MAX_MV] = 1;
217     }
218     s->me.mv_penalty = default_mv_penalty;
219     s->fcode_tab     = default_fcode_tab;
220
221     s->input_picture_number  = 0;
222     s->picture_in_gop_number = 0;
223 }
224
225 /* init video encoder */
226 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
227 {
228     MpegEncContext *s = avctx->priv_data;
229     int i, ret;
230     int chroma_h_shift, chroma_v_shift;
231
232     MPV_encode_defaults(s);
233
234     switch (avctx->codec_id) {
235     case AV_CODEC_ID_MPEG2VIDEO:
236         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
237             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
238             av_log(avctx, AV_LOG_ERROR,
239                    "only YUV420 and YUV422 are supported\n");
240             return -1;
241         }
242         break;
243     case AV_CODEC_ID_LJPEG:
244         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
245             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
246             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
247             avctx->pix_fmt != AV_PIX_FMT_BGRA     &&
248             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
249               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
250               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
251              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
252             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
253             return -1;
254         }
255         break;
256     case AV_CODEC_ID_MJPEG:
257         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
258             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
259             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
260               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
261              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
262             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
263             return -1;
264         }
265         break;
266     default:
267         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
268             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
269             return -1;
270         }
271     }
272
273     switch (avctx->pix_fmt) {
274     case AV_PIX_FMT_YUVJ422P:
275     case AV_PIX_FMT_YUV422P:
276         s->chroma_format = CHROMA_422;
277         break;
278     case AV_PIX_FMT_YUVJ420P:
279     case AV_PIX_FMT_YUV420P:
280     default:
281         s->chroma_format = CHROMA_420;
282         break;
283     }
284
285     s->bit_rate = avctx->bit_rate;
286     s->width    = avctx->width;
287     s->height   = avctx->height;
288     if (avctx->gop_size > 600 &&
289         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
290         av_log(avctx, AV_LOG_ERROR,
291                "Warning keyframe interval too large! reducing it ...\n");
292         avctx->gop_size = 600;
293     }
294     s->gop_size     = avctx->gop_size;
295     s->avctx        = avctx;
296     s->flags        = avctx->flags;
297     s->flags2       = avctx->flags2;
298     if (avctx->max_b_frames > MAX_B_FRAMES) {
299         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
300                "is %d.\n", MAX_B_FRAMES);
301     }
302     s->max_b_frames = avctx->max_b_frames;
303     s->codec_id     = avctx->codec->id;
304     s->strict_std_compliance = avctx->strict_std_compliance;
305     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
306     s->mpeg_quant         = avctx->mpeg_quant;
307     s->rtp_mode           = !!avctx->rtp_payload_size;
308     s->intra_dc_precision = avctx->intra_dc_precision;
309     s->user_specified_pts = AV_NOPTS_VALUE;
310
311     if (s->gop_size <= 1) {
312         s->intra_only = 1;
313         s->gop_size   = 12;
314     } else {
315         s->intra_only = 0;
316     }
317
318     s->me_method = avctx->me_method;
319
320     /* Fixed QSCALE */
321     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
322
323     s->adaptive_quant = (s->avctx->lumi_masking ||
324                          s->avctx->dark_masking ||
325                          s->avctx->temporal_cplx_masking ||
326                          s->avctx->spatial_cplx_masking  ||
327                          s->avctx->p_masking      ||
328                          s->avctx->border_masking ||
329                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
330                         !s->fixed_qscale;
331
332     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
333
334     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
335         av_log(avctx, AV_LOG_ERROR,
336                "a vbv buffer size is needed, "
337                "for encoding with a maximum bitrate\n");
338         return -1;
339     }
340
341     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
342         av_log(avctx, AV_LOG_INFO,
343                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
344     }
345
346     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
347         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
348         return -1;
349     }
350
351     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
352         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
353         return -1;
354     }
355
356     if (avctx->rc_max_rate &&
357         avctx->rc_max_rate == avctx->bit_rate &&
358         avctx->rc_max_rate != avctx->rc_min_rate) {
359         av_log(avctx, AV_LOG_INFO,
360                "impossible bitrate constraints, this will fail\n");
361     }
362
363     if (avctx->rc_buffer_size &&
364         avctx->bit_rate * (int64_t)avctx->time_base.num >
365             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
366         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
367         return -1;
368     }
369
370     if (!s->fixed_qscale &&
371         avctx->bit_rate * av_q2d(avctx->time_base) >
372             avctx->bit_rate_tolerance) {
373         av_log(avctx, AV_LOG_ERROR,
374                "bitrate tolerance too small for bitrate\n");
375         return -1;
376     }
377
378     if (s->avctx->rc_max_rate &&
379         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
380         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
381          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
382         90000LL * (avctx->rc_buffer_size - 1) >
383             s->avctx->rc_max_rate * 0xFFFFLL) {
384         av_log(avctx, AV_LOG_INFO,
385                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
386                "specified vbv buffer is too large for the given bitrate!\n");
387     }
388
389     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
390         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
391         s->codec_id != AV_CODEC_ID_FLV1) {
392         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
393         return -1;
394     }
395
396     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
397         av_log(avctx, AV_LOG_ERROR,
398                "OBMC is only supported with simple mb decision\n");
399         return -1;
400     }
401
402     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
403         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
404         return -1;
405     }
406
407     if (s->max_b_frames                    &&
408         s->codec_id != AV_CODEC_ID_MPEG4      &&
409         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
410         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
411         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
412         return -1;
413     }
414
415     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
416          s->codec_id == AV_CODEC_ID_H263  ||
417          s->codec_id == AV_CODEC_ID_H263P) &&
418         (avctx->sample_aspect_ratio.num > 255 ||
419          avctx->sample_aspect_ratio.den > 255)) {
420         av_log(avctx, AV_LOG_ERROR,
421                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
422                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
423         return -1;
424     }
425
426     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
427         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
428         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
429         return -1;
430     }
431
432     // FIXME mpeg2 uses that too
433     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
434         av_log(avctx, AV_LOG_ERROR,
435                "mpeg2 style quantization not supported by codec\n");
436         return -1;
437     }
438
439     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
440         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
441         return -1;
442     }
443
444     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
445         s->avctx->mb_decision != FF_MB_DECISION_RD) {
446         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
447         return -1;
448     }
449
450     if (s->avctx->scenechange_threshold < 1000000000 &&
451         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
452         av_log(avctx, AV_LOG_ERROR,
453                "closed gop with scene change detection are not supported yet, "
454                "set threshold to 1000000000\n");
455         return -1;
456     }
457
458     if (s->flags & CODEC_FLAG_LOW_DELAY) {
459         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
460             av_log(avctx, AV_LOG_ERROR,
461                   "low delay forcing is only available for mpeg2\n");
462             return -1;
463         }
464         if (s->max_b_frames != 0) {
465             av_log(avctx, AV_LOG_ERROR,
466                    "b frames cannot be used with low delay\n");
467             return -1;
468         }
469     }
470
471     if (s->q_scale_type == 1) {
472         if (avctx->qmax > 12) {
473             av_log(avctx, AV_LOG_ERROR,
474                    "non linear quant only supports qmax <= 12 currently\n");
475             return -1;
476         }
477     }
478
479     if (s->avctx->thread_count > 1         &&
480         s->codec_id != AV_CODEC_ID_MPEG4      &&
481         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
482         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
483         (s->codec_id != AV_CODEC_ID_H263P)) {
484         av_log(avctx, AV_LOG_ERROR,
485                "multi threaded encoding not supported by codec\n");
486         return -1;
487     }
488
489     if (s->avctx->thread_count < 1) {
490         av_log(avctx, AV_LOG_ERROR,
491                "automatic thread number detection not supported by codec,"
492                "patch welcome\n");
493         return -1;
494     }
495
496     if (s->avctx->thread_count > 1)
497         s->rtp_mode = 1;
498
499     if (!avctx->time_base.den || !avctx->time_base.num) {
500         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
501         return -1;
502     }
503
504     i = (INT_MAX / 2 + 128) >> 8;
505     if (avctx->mb_threshold >= i) {
506         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
507                i - 1);
508         return -1;
509     }
510
511     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
512         av_log(avctx, AV_LOG_INFO,
513                "notice: b_frame_strategy only affects the first pass\n");
514         avctx->b_frame_strategy = 0;
515     }
516
517     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
518     if (i > 1) {
519         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
520         avctx->time_base.den /= i;
521         avctx->time_base.num /= i;
522         //return -1;
523     }
524
525     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
526         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
527         // (a + x * 3 / 8) / x
528         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
529         s->inter_quant_bias = 0;
530     } else {
531         s->intra_quant_bias = 0;
532         // (a - x / 4) / x
533         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
534     }
535
536     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
537         s->intra_quant_bias = avctx->intra_quant_bias;
538     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
539         s->inter_quant_bias = avctx->inter_quant_bias;
540
541     av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
542                                      &chroma_v_shift);
543
544     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
545         s->avctx->time_base.den > (1 << 16) - 1) {
546         av_log(avctx, AV_LOG_ERROR,
547                "timebase %d/%d not supported by MPEG 4 standard, "
548                "the maximum admitted value for the timebase denominator "
549                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
550                (1 << 16) - 1);
551         return -1;
552     }
553     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
554
555     switch (avctx->codec->id) {
556     case AV_CODEC_ID_MPEG1VIDEO:
557         s->out_format = FMT_MPEG1;
558         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
559         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
560         break;
561     case AV_CODEC_ID_MPEG2VIDEO:
562         s->out_format = FMT_MPEG1;
563         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
564         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
565         s->rtp_mode   = 1;
566         break;
567     case AV_CODEC_ID_LJPEG:
568     case AV_CODEC_ID_MJPEG:
569         s->out_format = FMT_MJPEG;
570         s->intra_only = 1; /* force intra only for jpeg */
571         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
572             avctx->pix_fmt   == AV_PIX_FMT_BGRA) {
573             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
574             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
575             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
576         } else {
577             s->mjpeg_vsample[0] = 2;
578             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
579             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
580             s->mjpeg_hsample[0] = 2;
581             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
582             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
583         }
584         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
585             ff_mjpeg_encode_init(s) < 0)
586             return -1;
587         avctx->delay = 0;
588         s->low_delay = 1;
589         break;
590     case AV_CODEC_ID_H261:
591         if (!CONFIG_H261_ENCODER)
592             return -1;
593         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
594             av_log(avctx, AV_LOG_ERROR,
595                    "The specified picture size of %dx%d is not valid for the "
596                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
597                     s->width, s->height);
598             return -1;
599         }
600         s->out_format = FMT_H261;
601         avctx->delay  = 0;
602         s->low_delay  = 1;
603         break;
604     case AV_CODEC_ID_H263:
605         if (!CONFIG_H263_ENCODER)
606         return -1;
607         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
608                              s->width, s->height) == 8) {
609             av_log(avctx, AV_LOG_INFO,
610                    "The specified picture size of %dx%d is not valid for "
611                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
612                    "352x288, 704x576, and 1408x1152."
613                    "Try H.263+.\n", s->width, s->height);
614             return -1;
615         }
616         s->out_format = FMT_H263;
617         avctx->delay  = 0;
618         s->low_delay  = 1;
619         break;
620     case AV_CODEC_ID_H263P:
621         s->out_format = FMT_H263;
622         s->h263_plus  = 1;
623         /* Fx */
624         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
625         s->modified_quant  = s->h263_aic;
626         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
627         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
628
629         /* /Fx */
630         /* These are just to be sure */
631         avctx->delay = 0;
632         s->low_delay = 1;
633         break;
634     case AV_CODEC_ID_FLV1:
635         s->out_format      = FMT_H263;
636         s->h263_flv        = 2; /* format = 1; 11-bit codes */
637         s->unrestricted_mv = 1;
638         s->rtp_mode  = 0; /* don't allow GOB */
639         avctx->delay = 0;
640         s->low_delay = 1;
641         break;
642     case AV_CODEC_ID_RV10:
643         s->out_format = FMT_H263;
644         avctx->delay  = 0;
645         s->low_delay  = 1;
646         break;
647     case AV_CODEC_ID_RV20:
648         s->out_format      = FMT_H263;
649         avctx->delay       = 0;
650         s->low_delay       = 1;
651         s->modified_quant  = 1;
652         s->h263_aic        = 1;
653         s->h263_plus       = 1;
654         s->loop_filter     = 1;
655         s->unrestricted_mv = 0;
656         break;
657     case AV_CODEC_ID_MPEG4:
658         s->out_format      = FMT_H263;
659         s->h263_pred       = 1;
660         s->unrestricted_mv = 1;
661         s->low_delay       = s->max_b_frames ? 0 : 1;
662         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
663         break;
664     case AV_CODEC_ID_MSMPEG4V2:
665         s->out_format      = FMT_H263;
666         s->h263_pred       = 1;
667         s->unrestricted_mv = 1;
668         s->msmpeg4_version = 2;
669         avctx->delay       = 0;
670         s->low_delay       = 1;
671         break;
672     case AV_CODEC_ID_MSMPEG4V3:
673         s->out_format        = FMT_H263;
674         s->h263_pred         = 1;
675         s->unrestricted_mv   = 1;
676         s->msmpeg4_version   = 3;
677         s->flipflop_rounding = 1;
678         avctx->delay         = 0;
679         s->low_delay         = 1;
680         break;
681     case AV_CODEC_ID_WMV1:
682         s->out_format        = FMT_H263;
683         s->h263_pred         = 1;
684         s->unrestricted_mv   = 1;
685         s->msmpeg4_version   = 4;
686         s->flipflop_rounding = 1;
687         avctx->delay         = 0;
688         s->low_delay         = 1;
689         break;
690     case AV_CODEC_ID_WMV2:
691         s->out_format        = FMT_H263;
692         s->h263_pred         = 1;
693         s->unrestricted_mv   = 1;
694         s->msmpeg4_version   = 5;
695         s->flipflop_rounding = 1;
696         avctx->delay         = 0;
697         s->low_delay         = 1;
698         break;
699     default:
700         return -1;
701     }
702
703     avctx->has_b_frames = !s->low_delay;
704
705     s->encoding = 1;
706
707     s->progressive_frame    =
708     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
709                                                 CODEC_FLAG_INTERLACED_ME) ||
710                                 s->alternate_scan);
711
712     /* init */
713     if (ff_MPV_common_init(s) < 0)
714         return -1;
715
716     if (ARCH_X86)
717         ff_MPV_encode_init_x86(s);
718
719     s->avctx->coded_frame = &s->current_picture.f;
720
721     if (s->msmpeg4_version) {
722         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
723                           2 * 2 * (MAX_LEVEL + 1) *
724                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
725     }
726     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
727
728     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
729     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
730     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
731     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
732     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
733                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
734     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
735                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
736
737     if (s->avctx->noise_reduction) {
738         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
739                           2 * 64 * sizeof(uint16_t), fail);
740     }
741
742     ff_h263dsp_init(&s->h263dsp);
743     if (!s->dct_quantize)
744         s->dct_quantize = ff_dct_quantize_c;
745     if (!s->denoise_dct)
746         s->denoise_dct  = denoise_dct_c;
747     s->fast_dct_quantize = s->dct_quantize;
748     if (avctx->trellis)
749         s->dct_quantize  = dct_quantize_trellis_c;
750
751     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
752         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
753
754     s->quant_precision = 5;
755
756     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
757     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
758
759     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
760         ff_h261_encode_init(s);
761     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
762         ff_h263_encode_init(s);
763     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
764         ff_msmpeg4_encode_init(s);
765     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
766         && s->out_format == FMT_MPEG1)
767         ff_mpeg1_encode_init(s);
768
769     /* init q matrix */
770     for (i = 0; i < 64; i++) {
771         int j = s->dsp.idct_permutation[i];
772         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
773             s->mpeg_quant) {
774             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
775             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
776         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
777             s->intra_matrix[j] =
778             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
779         } else {
780             /* mpeg1/2 */
781             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
782             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
783         }
784         if (s->avctx->intra_matrix)
785             s->intra_matrix[j] = s->avctx->intra_matrix[i];
786         if (s->avctx->inter_matrix)
787             s->inter_matrix[j] = s->avctx->inter_matrix[i];
788     }
789
790     /* precompute matrix */
791     /* for mjpeg, we do include qscale in the matrix */
792     if (s->out_format != FMT_MJPEG) {
793         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
794                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
795                           31, 1);
796         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
797                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
798                           31, 0);
799     }
800
801     if (ff_rate_control_init(s) < 0)
802         return -1;
803
804 #if FF_API_ERROR_RATE
805     FF_DISABLE_DEPRECATION_WARNINGS
806     if (avctx->error_rate)
807         s->error_rate = avctx->error_rate;
808     FF_ENABLE_DEPRECATION_WARNINGS;
809 #endif
810
811     if (avctx->b_frame_strategy == 2) {
812         for (i = 0; i < s->max_b_frames + 2; i++) {
813             s->tmp_frames[i] = av_frame_alloc();
814             if (!s->tmp_frames[i])
815                 return AVERROR(ENOMEM);
816
817             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
818             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
819             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
820
821             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
822             if (ret < 0)
823                 return ret;
824         }
825     }
826
827     return 0;
828 fail:
829     ff_MPV_encode_end(avctx);
830     return AVERROR_UNKNOWN;
831 }
832
833 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
834 {
835     MpegEncContext *s = avctx->priv_data;
836     int i;
837
838     ff_rate_control_uninit(s);
839
840     ff_MPV_common_end(s);
841     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
842         s->out_format == FMT_MJPEG)
843         ff_mjpeg_encode_close(s);
844
845     av_freep(&avctx->extradata);
846
847     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
848         av_frame_free(&s->tmp_frames[i]);
849
850     ff_free_picture_tables(&s->new_picture);
851     ff_mpeg_unref_picture(s, &s->new_picture);
852
853     av_freep(&s->avctx->stats_out);
854     av_freep(&s->ac_stats);
855
856     av_freep(&s->q_intra_matrix);
857     av_freep(&s->q_inter_matrix);
858     av_freep(&s->q_intra_matrix16);
859     av_freep(&s->q_inter_matrix16);
860     av_freep(&s->input_picture);
861     av_freep(&s->reordered_input_picture);
862     av_freep(&s->dct_offset);
863
864     return 0;
865 }
866
867 static int get_sae(uint8_t *src, int ref, int stride)
868 {
869     int x,y;
870     int acc = 0;
871
872     for (y = 0; y < 16; y++) {
873         for (x = 0; x < 16; x++) {
874             acc += FFABS(src[x + y * stride] - ref);
875         }
876     }
877
878     return acc;
879 }
880
881 static int get_intra_count(MpegEncContext *s, uint8_t *src,
882                            uint8_t *ref, int stride)
883 {
884     int x, y, w, h;
885     int acc = 0;
886
887     w = s->width  & ~15;
888     h = s->height & ~15;
889
890     for (y = 0; y < h; y += 16) {
891         for (x = 0; x < w; x += 16) {
892             int offset = x + y * stride;
893             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
894                                      16);
895             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
896             int sae  = get_sae(src + offset, mean, stride);
897
898             acc += sae + 500 < sad;
899         }
900     }
901     return acc;
902 }
903
904
905 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
906 {
907     Picture *pic = NULL;
908     int64_t pts;
909     int i, display_picture_number = 0, ret;
910     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
911                                                  (s->low_delay ? 0 : 1);
912     int direct = 1;
913
914     if (pic_arg) {
915         pts = pic_arg->pts;
916         display_picture_number = s->input_picture_number++;
917
918         if (pts != AV_NOPTS_VALUE) {
919             if (s->user_specified_pts != AV_NOPTS_VALUE) {
920                 int64_t time = pts;
921                 int64_t last = s->user_specified_pts;
922
923                 if (time <= last) {
924                     av_log(s->avctx, AV_LOG_ERROR,
925                            "Error, Invalid timestamp=%"PRId64", "
926                            "last=%"PRId64"\n", pts, s->user_specified_pts);
927                     return -1;
928                 }
929
930                 if (!s->low_delay && display_picture_number == 1)
931                     s->dts_delta = time - last;
932             }
933             s->user_specified_pts = pts;
934         } else {
935             if (s->user_specified_pts != AV_NOPTS_VALUE) {
936                 s->user_specified_pts =
937                 pts = s->user_specified_pts + 1;
938                 av_log(s->avctx, AV_LOG_INFO,
939                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
940                        pts);
941             } else {
942                 pts = display_picture_number;
943             }
944         }
945     }
946
947     if (pic_arg) {
948         if (!pic_arg->buf[0]);
949             direct = 0;
950         if (pic_arg->linesize[0] != s->linesize)
951             direct = 0;
952         if (pic_arg->linesize[1] != s->uvlinesize)
953             direct = 0;
954         if (pic_arg->linesize[2] != s->uvlinesize)
955             direct = 0;
956
957         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
958                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
959
960         if (direct) {
961             i = ff_find_unused_picture(s, 1);
962             if (i < 0)
963                 return i;
964
965             pic = &s->picture[i];
966             pic->reference = 3;
967
968             if ((ret = av_frame_ref(&pic->f, pic_arg)) < 0)
969                 return ret;
970             if (ff_alloc_picture(s, pic, 1) < 0) {
971                 return -1;
972             }
973         } else {
974             i = ff_find_unused_picture(s, 0);
975             if (i < 0)
976                 return i;
977
978             pic = &s->picture[i];
979             pic->reference = 3;
980
981             if (ff_alloc_picture(s, pic, 0) < 0) {
982                 return -1;
983             }
984
985             if (pic->f.data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
986                 pic->f.data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
987                 pic->f.data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
988                 // empty
989             } else {
990                 int h_chroma_shift, v_chroma_shift;
991                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
992                                                  &h_chroma_shift,
993                                                  &v_chroma_shift);
994
995                 for (i = 0; i < 3; i++) {
996                     int src_stride = pic_arg->linesize[i];
997                     int dst_stride = i ? s->uvlinesize : s->linesize;
998                     int h_shift = i ? h_chroma_shift : 0;
999                     int v_shift = i ? v_chroma_shift : 0;
1000                     int w = s->width  >> h_shift;
1001                     int h = s->height >> v_shift;
1002                     uint8_t *src = pic_arg->data[i];
1003                     uint8_t *dst = pic->f.data[i];
1004
1005                     if (!s->avctx->rc_buffer_size)
1006                         dst += INPLACE_OFFSET;
1007
1008                     if (src_stride == dst_stride)
1009                         memcpy(dst, src, src_stride * h);
1010                     else {
1011                         while (h--) {
1012                             memcpy(dst, src, w);
1013                             dst += dst_stride;
1014                             src += src_stride;
1015                         }
1016                     }
1017                 }
1018             }
1019         }
1020         ret = av_frame_copy_props(&pic->f, pic_arg);
1021         if (ret < 0)
1022             return ret;
1023
1024         pic->f.display_picture_number = display_picture_number;
1025         pic->f.pts = pts; // we set this here to avoid modifiying pic_arg
1026     }
1027
1028     /* shift buffer entries */
1029     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1030         s->input_picture[i - 1] = s->input_picture[i];
1031
1032     s->input_picture[encoding_delay] = (Picture*) pic;
1033
1034     return 0;
1035 }
1036
1037 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1038 {
1039     int x, y, plane;
1040     int score = 0;
1041     int64_t score64 = 0;
1042
1043     for (plane = 0; plane < 3; plane++) {
1044         const int stride = p->f.linesize[plane];
1045         const int bw = plane ? 1 : 2;
1046         for (y = 0; y < s->mb_height * bw; y++) {
1047             for (x = 0; x < s->mb_width * bw; x++) {
1048                 int off = p->shared ? 0 : 16;
1049                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1050                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1051                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1052
1053                 switch (s->avctx->frame_skip_exp) {
1054                 case 0: score    =  FFMAX(score, v);          break;
1055                 case 1: score   += FFABS(v);                  break;
1056                 case 2: score   += v * v;                     break;
1057                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1058                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1059                 }
1060             }
1061         }
1062     }
1063
1064     if (score)
1065         score64 = score;
1066
1067     if (score64 < s->avctx->frame_skip_threshold)
1068         return 1;
1069     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1070         return 1;
1071     return 0;
1072 }
1073
1074 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1075 {
1076     AVPacket pkt = { 0 };
1077     int ret, got_output;
1078
1079     av_init_packet(&pkt);
1080     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1081     if (ret < 0)
1082         return ret;
1083
1084     ret = pkt.size;
1085     av_free_packet(&pkt);
1086     return ret;
1087 }
1088
1089 static int estimate_best_b_count(MpegEncContext *s)
1090 {
1091     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1092     AVCodecContext *c = avcodec_alloc_context3(NULL);
1093     const int scale = s->avctx->brd_scale;
1094     int i, j, out_size, p_lambda, b_lambda, lambda2;
1095     int64_t best_rd  = INT64_MAX;
1096     int best_b_count = -1;
1097
1098     assert(scale >= 0 && scale <= 3);
1099
1100     //emms_c();
1101     //s->next_picture_ptr->quality;
1102     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1103     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1104     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1105     if (!b_lambda) // FIXME we should do this somewhere else
1106         b_lambda = p_lambda;
1107     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1108                FF_LAMBDA_SHIFT;
1109
1110     c->width        = s->width  >> scale;
1111     c->height       = s->height >> scale;
1112     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1113                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1114     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1115     c->mb_decision  = s->avctx->mb_decision;
1116     c->me_cmp       = s->avctx->me_cmp;
1117     c->mb_cmp       = s->avctx->mb_cmp;
1118     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1119     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1120     c->time_base    = s->avctx->time_base;
1121     c->max_b_frames = s->max_b_frames;
1122
1123     if (avcodec_open2(c, codec, NULL) < 0)
1124         return -1;
1125
1126     for (i = 0; i < s->max_b_frames + 2; i++) {
1127         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1128                                                 s->next_picture_ptr;
1129
1130         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1131             pre_input = *pre_input_ptr;
1132
1133             if (!pre_input.shared && i) {
1134                 pre_input.f.data[0] += INPLACE_OFFSET;
1135                 pre_input.f.data[1] += INPLACE_OFFSET;
1136                 pre_input.f.data[2] += INPLACE_OFFSET;
1137             }
1138
1139             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1140                                  pre_input.f.data[0], pre_input.f.linesize[0],
1141                                  c->width,      c->height);
1142             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1143                                  pre_input.f.data[1], pre_input.f.linesize[1],
1144                                  c->width >> 1, c->height >> 1);
1145             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1146                                  pre_input.f.data[2], pre_input.f.linesize[2],
1147                                  c->width >> 1, c->height >> 1);
1148         }
1149     }
1150
1151     for (j = 0; j < s->max_b_frames + 1; j++) {
1152         int64_t rd = 0;
1153
1154         if (!s->input_picture[j])
1155             break;
1156
1157         c->error[0] = c->error[1] = c->error[2] = 0;
1158
1159         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1160         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1161
1162         out_size = encode_frame(c, s->tmp_frames[0]);
1163
1164         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1165
1166         for (i = 0; i < s->max_b_frames + 1; i++) {
1167             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1168
1169             s->tmp_frames[i + 1]->pict_type = is_p ?
1170                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1171             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1172
1173             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1174
1175             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1176         }
1177
1178         /* get the delayed frames */
1179         while (out_size) {
1180             out_size = encode_frame(c, NULL);
1181             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1182         }
1183
1184         rd += c->error[0] + c->error[1] + c->error[2];
1185
1186         if (rd < best_rd) {
1187             best_rd = rd;
1188             best_b_count = j;
1189         }
1190     }
1191
1192     avcodec_close(c);
1193     av_freep(&c);
1194
1195     return best_b_count;
1196 }
1197
1198 static int select_input_picture(MpegEncContext *s)
1199 {
1200     int i, ret;
1201
1202     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1203         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1204     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1205
1206     /* set next picture type & ordering */
1207     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1208         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1209             s->next_picture_ptr == NULL || s->intra_only) {
1210             s->reordered_input_picture[0] = s->input_picture[0];
1211             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1212             s->reordered_input_picture[0]->f.coded_picture_number =
1213                 s->coded_picture_number++;
1214         } else {
1215             int b_frames;
1216
1217             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1218                 if (s->picture_in_gop_number < s->gop_size &&
1219                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1220                     // FIXME check that te gop check above is +-1 correct
1221                     av_frame_unref(&s->input_picture[0]->f);
1222
1223                     emms_c();
1224                     ff_vbv_update(s, 0);
1225
1226                     goto no_output_pic;
1227                 }
1228             }
1229
1230             if (s->flags & CODEC_FLAG_PASS2) {
1231                 for (i = 0; i < s->max_b_frames + 1; i++) {
1232                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1233
1234                     if (pict_num >= s->rc_context.num_entries)
1235                         break;
1236                     if (!s->input_picture[i]) {
1237                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1238                         break;
1239                     }
1240
1241                     s->input_picture[i]->f.pict_type =
1242                         s->rc_context.entry[pict_num].new_pict_type;
1243                 }
1244             }
1245
1246             if (s->avctx->b_frame_strategy == 0) {
1247                 b_frames = s->max_b_frames;
1248                 while (b_frames && !s->input_picture[b_frames])
1249                     b_frames--;
1250             } else if (s->avctx->b_frame_strategy == 1) {
1251                 for (i = 1; i < s->max_b_frames + 1; i++) {
1252                     if (s->input_picture[i] &&
1253                         s->input_picture[i]->b_frame_score == 0) {
1254                         s->input_picture[i]->b_frame_score =
1255                             get_intra_count(s,
1256                                             s->input_picture[i    ]->f.data[0],
1257                                             s->input_picture[i - 1]->f.data[0],
1258                                             s->linesize) + 1;
1259                     }
1260                 }
1261                 for (i = 0; i < s->max_b_frames + 1; i++) {
1262                     if (s->input_picture[i] == NULL ||
1263                         s->input_picture[i]->b_frame_score - 1 >
1264                             s->mb_num / s->avctx->b_sensitivity)
1265                         break;
1266                 }
1267
1268                 b_frames = FFMAX(0, i - 1);
1269
1270                 /* reset scores */
1271                 for (i = 0; i < b_frames + 1; i++) {
1272                     s->input_picture[i]->b_frame_score = 0;
1273                 }
1274             } else if (s->avctx->b_frame_strategy == 2) {
1275                 b_frames = estimate_best_b_count(s);
1276             } else {
1277                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1278                 b_frames = 0;
1279             }
1280
1281             emms_c();
1282
1283             for (i = b_frames - 1; i >= 0; i--) {
1284                 int type = s->input_picture[i]->f.pict_type;
1285                 if (type && type != AV_PICTURE_TYPE_B)
1286                     b_frames = i;
1287             }
1288             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1289                 b_frames == s->max_b_frames) {
1290                 av_log(s->avctx, AV_LOG_ERROR,
1291                        "warning, too many b frames in a row\n");
1292             }
1293
1294             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1295                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1296                     s->gop_size > s->picture_in_gop_number) {
1297                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1298                 } else {
1299                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1300                         b_frames = 0;
1301                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1302                 }
1303             }
1304
1305             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1306                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1307                 b_frames--;
1308
1309             s->reordered_input_picture[0] = s->input_picture[b_frames];
1310             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1311                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1312             s->reordered_input_picture[0]->f.coded_picture_number =
1313                 s->coded_picture_number++;
1314             for (i = 0; i < b_frames; i++) {
1315                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1316                 s->reordered_input_picture[i + 1]->f.pict_type =
1317                     AV_PICTURE_TYPE_B;
1318                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1319                     s->coded_picture_number++;
1320             }
1321         }
1322     }
1323 no_output_pic:
1324     if (s->reordered_input_picture[0]) {
1325         s->reordered_input_picture[0]->reference =
1326            s->reordered_input_picture[0]->f.pict_type !=
1327                AV_PICTURE_TYPE_B ? 3 : 0;
1328
1329         ff_mpeg_unref_picture(s, &s->new_picture);
1330         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1331             return ret;
1332
1333         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1334             // input is a shared pix, so we can't modifiy it -> alloc a new
1335             // one & ensure that the shared one is reuseable
1336
1337             Picture *pic;
1338             int i = ff_find_unused_picture(s, 0);
1339             if (i < 0)
1340                 return i;
1341             pic = &s->picture[i];
1342
1343             pic->reference = s->reordered_input_picture[0]->reference;
1344             if (ff_alloc_picture(s, pic, 0) < 0) {
1345                 return -1;
1346             }
1347
1348             ret = av_frame_copy_props(&pic->f, &s->reordered_input_picture[0]->f);
1349             if (ret < 0)
1350                 return ret;
1351
1352             /* mark us unused / free shared pic */
1353             av_frame_unref(&s->reordered_input_picture[0]->f);
1354             s->reordered_input_picture[0]->shared = 0;
1355
1356             s->current_picture_ptr = pic;
1357         } else {
1358             // input is not a shared pix -> reuse buffer for current_pix
1359             s->current_picture_ptr = s->reordered_input_picture[0];
1360             for (i = 0; i < 4; i++) {
1361                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1362             }
1363         }
1364         ff_mpeg_unref_picture(s, &s->current_picture);
1365         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1366                                        s->current_picture_ptr)) < 0)
1367             return ret;
1368
1369         s->picture_number = s->new_picture.f.display_picture_number;
1370     } else {
1371         ff_mpeg_unref_picture(s, &s->new_picture);
1372     }
1373     return 0;
1374 }
1375
1376 static void frame_end(MpegEncContext *s)
1377 {
1378     int i;
1379
1380     if (s->unrestricted_mv &&
1381         s->current_picture.reference &&
1382         !s->intra_only) {
1383         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1384         int hshift = desc->log2_chroma_w;
1385         int vshift = desc->log2_chroma_h;
1386         s->dsp.draw_edges(s->current_picture.f.data[0], s->linesize,
1387                           s->h_edge_pos, s->v_edge_pos,
1388                           EDGE_WIDTH, EDGE_WIDTH,
1389                           EDGE_TOP | EDGE_BOTTOM);
1390         s->dsp.draw_edges(s->current_picture.f.data[1], s->uvlinesize,
1391                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1392                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1393                           EDGE_TOP | EDGE_BOTTOM);
1394         s->dsp.draw_edges(s->current_picture.f.data[2], s->uvlinesize,
1395                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1396                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1397                           EDGE_TOP | EDGE_BOTTOM);
1398     }
1399
1400     emms_c();
1401
1402     s->last_pict_type                 = s->pict_type;
1403     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f.quality;
1404     if (s->pict_type!= AV_PICTURE_TYPE_B)
1405         s->last_non_b_pict_type = s->pict_type;
1406
1407     if (s->encoding) {
1408         /* release non-reference frames */
1409         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1410             if (!s->picture[i].reference)
1411                 ff_mpeg_unref_picture(s, &s->picture[i]);
1412         }
1413     }
1414
1415     s->avctx->coded_frame = &s->current_picture_ptr->f;
1416
1417 }
1418
1419 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1420                           const AVFrame *pic_arg, int *got_packet)
1421 {
1422     MpegEncContext *s = avctx->priv_data;
1423     int i, stuffing_count, ret;
1424     int context_count = s->slice_context_count;
1425
1426     s->picture_in_gop_number++;
1427
1428     if (load_input_picture(s, pic_arg) < 0)
1429         return -1;
1430
1431     if (select_input_picture(s) < 0) {
1432         return -1;
1433     }
1434
1435     /* output? */
1436     if (s->new_picture.f.data[0]) {
1437         if (!pkt->data &&
1438             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1439             return ret;
1440         if (s->mb_info) {
1441             s->mb_info_ptr = av_packet_new_side_data(pkt,
1442                                  AV_PKT_DATA_H263_MB_INFO,
1443                                  s->mb_width*s->mb_height*12);
1444             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1445         }
1446
1447         for (i = 0; i < context_count; i++) {
1448             int start_y = s->thread_context[i]->start_mb_y;
1449             int   end_y = s->thread_context[i]->  end_mb_y;
1450             int h       = s->mb_height;
1451             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1452             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1453
1454             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1455         }
1456
1457         s->pict_type = s->new_picture.f.pict_type;
1458         //emms_c();
1459         ff_MPV_frame_start(s, avctx);
1460 vbv_retry:
1461         if (encode_picture(s, s->picture_number) < 0)
1462             return -1;
1463
1464         avctx->header_bits = s->header_bits;
1465         avctx->mv_bits     = s->mv_bits;
1466         avctx->misc_bits   = s->misc_bits;
1467         avctx->i_tex_bits  = s->i_tex_bits;
1468         avctx->p_tex_bits  = s->p_tex_bits;
1469         avctx->i_count     = s->i_count;
1470         // FIXME f/b_count in avctx
1471         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1472         avctx->skip_count  = s->skip_count;
1473
1474         frame_end(s);
1475
1476         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1477             ff_mjpeg_encode_picture_trailer(s);
1478
1479         if (avctx->rc_buffer_size) {
1480             RateControlContext *rcc = &s->rc_context;
1481             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1482
1483             if (put_bits_count(&s->pb) > max_size &&
1484                 s->lambda < s->avctx->lmax) {
1485                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1486                                        (s->qscale + 1) / s->qscale);
1487                 if (s->adaptive_quant) {
1488                     int i;
1489                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1490                         s->lambda_table[i] =
1491                             FFMAX(s->lambda_table[i] + 1,
1492                                   s->lambda_table[i] * (s->qscale + 1) /
1493                                   s->qscale);
1494                 }
1495                 s->mb_skipped = 0;        // done in MPV_frame_start()
1496                 // done in encode_picture() so we must undo it
1497                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1498                     if (s->flipflop_rounding          ||
1499                         s->codec_id == AV_CODEC_ID_H263P ||
1500                         s->codec_id == AV_CODEC_ID_MPEG4)
1501                         s->no_rounding ^= 1;
1502                 }
1503                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1504                     s->time_base       = s->last_time_base;
1505                     s->last_non_b_time = s->time - s->pp_time;
1506                 }
1507                 for (i = 0; i < context_count; i++) {
1508                     PutBitContext *pb = &s->thread_context[i]->pb;
1509                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1510                 }
1511                 goto vbv_retry;
1512             }
1513
1514             assert(s->avctx->rc_max_rate);
1515         }
1516
1517         if (s->flags & CODEC_FLAG_PASS1)
1518             ff_write_pass1_stats(s);
1519
1520         for (i = 0; i < 4; i++) {
1521             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1522             avctx->error[i] += s->current_picture_ptr->f.error[i];
1523         }
1524
1525         if (s->flags & CODEC_FLAG_PASS1)
1526             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1527                    avctx->i_tex_bits + avctx->p_tex_bits ==
1528                        put_bits_count(&s->pb));
1529         flush_put_bits(&s->pb);
1530         s->frame_bits  = put_bits_count(&s->pb);
1531
1532         stuffing_count = ff_vbv_update(s, s->frame_bits);
1533         if (stuffing_count) {
1534             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1535                     stuffing_count + 50) {
1536                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1537                 return -1;
1538             }
1539
1540             switch (s->codec_id) {
1541             case AV_CODEC_ID_MPEG1VIDEO:
1542             case AV_CODEC_ID_MPEG2VIDEO:
1543                 while (stuffing_count--) {
1544                     put_bits(&s->pb, 8, 0);
1545                 }
1546             break;
1547             case AV_CODEC_ID_MPEG4:
1548                 put_bits(&s->pb, 16, 0);
1549                 put_bits(&s->pb, 16, 0x1C3);
1550                 stuffing_count -= 4;
1551                 while (stuffing_count--) {
1552                     put_bits(&s->pb, 8, 0xFF);
1553                 }
1554             break;
1555             default:
1556                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1557             }
1558             flush_put_bits(&s->pb);
1559             s->frame_bits  = put_bits_count(&s->pb);
1560         }
1561
1562         /* update mpeg1/2 vbv_delay for CBR */
1563         if (s->avctx->rc_max_rate                          &&
1564             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1565             s->out_format == FMT_MPEG1                     &&
1566             90000LL * (avctx->rc_buffer_size - 1) <=
1567                 s->avctx->rc_max_rate * 0xFFFFLL) {
1568             int vbv_delay, min_delay;
1569             double inbits  = s->avctx->rc_max_rate *
1570                              av_q2d(s->avctx->time_base);
1571             int    minbits = s->frame_bits - 8 *
1572                              (s->vbv_delay_ptr - s->pb.buf - 1);
1573             double bits    = s->rc_context.buffer_index + minbits - inbits;
1574
1575             if (bits < 0)
1576                 av_log(s->avctx, AV_LOG_ERROR,
1577                        "Internal error, negative bits\n");
1578
1579             assert(s->repeat_first_field == 0);
1580
1581             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1582             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1583                         s->avctx->rc_max_rate;
1584
1585             vbv_delay = FFMAX(vbv_delay, min_delay);
1586
1587             assert(vbv_delay < 0xFFFF);
1588
1589             s->vbv_delay_ptr[0] &= 0xF8;
1590             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1591             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1592             s->vbv_delay_ptr[2] &= 0x07;
1593             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1594             avctx->vbv_delay     = vbv_delay * 300;
1595         }
1596         s->total_bits     += s->frame_bits;
1597         avctx->frame_bits  = s->frame_bits;
1598
1599         pkt->pts = s->current_picture.f.pts;
1600         if (!s->low_delay) {
1601             if (!s->current_picture.f.coded_picture_number)
1602                 pkt->dts = pkt->pts - s->dts_delta;
1603             else
1604                 pkt->dts = s->reordered_pts;
1605             s->reordered_pts = s->input_picture[0]->f.pts;
1606         } else
1607             pkt->dts = pkt->pts;
1608         if (s->current_picture.f.key_frame)
1609             pkt->flags |= AV_PKT_FLAG_KEY;
1610         if (s->mb_info)
1611             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1612     } else {
1613         s->frame_bits = 0;
1614     }
1615     assert((s->frame_bits & 7) == 0);
1616
1617     pkt->size = s->frame_bits / 8;
1618     *got_packet = !!pkt->size;
1619     return 0;
1620 }
1621
1622 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1623                                                 int n, int threshold)
1624 {
1625     static const char tab[64] = {
1626         3, 2, 2, 1, 1, 1, 1, 1,
1627         1, 1, 1, 1, 1, 1, 1, 1,
1628         1, 1, 1, 1, 1, 1, 1, 1,
1629         0, 0, 0, 0, 0, 0, 0, 0,
1630         0, 0, 0, 0, 0, 0, 0, 0,
1631         0, 0, 0, 0, 0, 0, 0, 0,
1632         0, 0, 0, 0, 0, 0, 0, 0,
1633         0, 0, 0, 0, 0, 0, 0, 0
1634     };
1635     int score = 0;
1636     int run = 0;
1637     int i;
1638     int16_t *block = s->block[n];
1639     const int last_index = s->block_last_index[n];
1640     int skip_dc;
1641
1642     if (threshold < 0) {
1643         skip_dc = 0;
1644         threshold = -threshold;
1645     } else
1646         skip_dc = 1;
1647
1648     /* Are all we could set to zero already zero? */
1649     if (last_index <= skip_dc - 1)
1650         return;
1651
1652     for (i = 0; i <= last_index; i++) {
1653         const int j = s->intra_scantable.permutated[i];
1654         const int level = FFABS(block[j]);
1655         if (level == 1) {
1656             if (skip_dc && i == 0)
1657                 continue;
1658             score += tab[run];
1659             run = 0;
1660         } else if (level > 1) {
1661             return;
1662         } else {
1663             run++;
1664         }
1665     }
1666     if (score >= threshold)
1667         return;
1668     for (i = skip_dc; i <= last_index; i++) {
1669         const int j = s->intra_scantable.permutated[i];
1670         block[j] = 0;
1671     }
1672     if (block[0])
1673         s->block_last_index[n] = 0;
1674     else
1675         s->block_last_index[n] = -1;
1676 }
1677
1678 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1679                                int last_index)
1680 {
1681     int i;
1682     const int maxlevel = s->max_qcoeff;
1683     const int minlevel = s->min_qcoeff;
1684     int overflow = 0;
1685
1686     if (s->mb_intra) {
1687         i = 1; // skip clipping of intra dc
1688     } else
1689         i = 0;
1690
1691     for (; i <= last_index; i++) {
1692         const int j = s->intra_scantable.permutated[i];
1693         int level = block[j];
1694
1695         if (level > maxlevel) {
1696             level = maxlevel;
1697             overflow++;
1698         } else if (level < minlevel) {
1699             level = minlevel;
1700             overflow++;
1701         }
1702
1703         block[j] = level;
1704     }
1705
1706     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1707         av_log(s->avctx, AV_LOG_INFO,
1708                "warning, clipping %d dct coefficients to %d..%d\n",
1709                overflow, minlevel, maxlevel);
1710 }
1711
1712 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1713 {
1714     int x, y;
1715     // FIXME optimize
1716     for (y = 0; y < 8; y++) {
1717         for (x = 0; x < 8; x++) {
1718             int x2, y2;
1719             int sum = 0;
1720             int sqr = 0;
1721             int count = 0;
1722
1723             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1724                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1725                     int v = ptr[x2 + y2 * stride];
1726                     sum += v;
1727                     sqr += v * v;
1728                     count++;
1729                 }
1730             }
1731             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1732         }
1733     }
1734 }
1735
1736 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1737                                                 int motion_x, int motion_y,
1738                                                 int mb_block_height,
1739                                                 int mb_block_count)
1740 {
1741     int16_t weight[8][64];
1742     int16_t orig[8][64];
1743     const int mb_x = s->mb_x;
1744     const int mb_y = s->mb_y;
1745     int i;
1746     int skip_dct[8];
1747     int dct_offset = s->linesize * 8; // default for progressive frames
1748     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1749     ptrdiff_t wrap_y, wrap_c;
1750
1751     for (i = 0; i < mb_block_count; i++)
1752         skip_dct[i] = s->skipdct;
1753
1754     if (s->adaptive_quant) {
1755         const int last_qp = s->qscale;
1756         const int mb_xy = mb_x + mb_y * s->mb_stride;
1757
1758         s->lambda = s->lambda_table[mb_xy];
1759         update_qscale(s);
1760
1761         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1762             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1763             s->dquant = s->qscale - last_qp;
1764
1765             if (s->out_format == FMT_H263) {
1766                 s->dquant = av_clip(s->dquant, -2, 2);
1767
1768                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1769                     if (!s->mb_intra) {
1770                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1771                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1772                                 s->dquant = 0;
1773                         }
1774                         if (s->mv_type == MV_TYPE_8X8)
1775                             s->dquant = 0;
1776                     }
1777                 }
1778             }
1779         }
1780         ff_set_qscale(s, last_qp + s->dquant);
1781     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1782         ff_set_qscale(s, s->qscale + s->dquant);
1783
1784     wrap_y = s->linesize;
1785     wrap_c = s->uvlinesize;
1786     ptr_y  = s->new_picture.f.data[0] +
1787              (mb_y * 16 * wrap_y)              + mb_x * 16;
1788     ptr_cb = s->new_picture.f.data[1] +
1789              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1790     ptr_cr = s->new_picture.f.data[2] +
1791              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1792
1793     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1794         uint8_t *ebuf = s->edge_emu_buffer + 32;
1795         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1796                                  wrap_y, wrap_y,
1797                                  16, 16, mb_x * 16, mb_y * 16,
1798                                  s->width, s->height);
1799         ptr_y = ebuf;
1800         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1801                                  wrap_c, wrap_c,
1802                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1803                                  s->width >> 1, s->height >> 1);
1804         ptr_cb = ebuf + 18 * wrap_y;
1805         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1806                                  wrap_c, wrap_c,
1807                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1808                                  s->width >> 1, s->height >> 1);
1809         ptr_cr = ebuf + 18 * wrap_y + 8;
1810     }
1811
1812     if (s->mb_intra) {
1813         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1814             int progressive_score, interlaced_score;
1815
1816             s->interlaced_dct = 0;
1817             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1818                                                     NULL, wrap_y, 8) +
1819                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1820                                                     NULL, wrap_y, 8) - 400;
1821
1822             if (progressive_score > 0) {
1823                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1824                                                        NULL, wrap_y * 2, 8) +
1825                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1826                                                        NULL, wrap_y * 2, 8);
1827                 if (progressive_score > interlaced_score) {
1828                     s->interlaced_dct = 1;
1829
1830                     dct_offset = wrap_y;
1831                     wrap_y <<= 1;
1832                     if (s->chroma_format == CHROMA_422)
1833                         wrap_c <<= 1;
1834                 }
1835             }
1836         }
1837
1838         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1839         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1840         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1841         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1842
1843         if (s->flags & CODEC_FLAG_GRAY) {
1844             skip_dct[4] = 1;
1845             skip_dct[5] = 1;
1846         } else {
1847             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1848             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1849             if (!s->chroma_y_shift) { /* 422 */
1850                 s->dsp.get_pixels(s->block[6],
1851                                   ptr_cb + (dct_offset >> 1), wrap_c);
1852                 s->dsp.get_pixels(s->block[7],
1853                                   ptr_cr + (dct_offset >> 1), wrap_c);
1854             }
1855         }
1856     } else {
1857         op_pixels_func (*op_pix)[4];
1858         qpel_mc_func (*op_qpix)[16];
1859         uint8_t *dest_y, *dest_cb, *dest_cr;
1860
1861         dest_y  = s->dest[0];
1862         dest_cb = s->dest[1];
1863         dest_cr = s->dest[2];
1864
1865         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1866             op_pix  = s->hdsp.put_pixels_tab;
1867             op_qpix = s->dsp.put_qpel_pixels_tab;
1868         } else {
1869             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1870             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1871         }
1872
1873         if (s->mv_dir & MV_DIR_FORWARD) {
1874             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1875                           s->last_picture.f.data,
1876                           op_pix, op_qpix);
1877             op_pix  = s->hdsp.avg_pixels_tab;
1878             op_qpix = s->dsp.avg_qpel_pixels_tab;
1879         }
1880         if (s->mv_dir & MV_DIR_BACKWARD) {
1881             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1882                           s->next_picture.f.data,
1883                           op_pix, op_qpix);
1884         }
1885
1886         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1887             int progressive_score, interlaced_score;
1888
1889             s->interlaced_dct = 0;
1890             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1891                                                     ptr_y,              wrap_y,
1892                                                     8) +
1893                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1894                                                     ptr_y + wrap_y * 8, wrap_y,
1895                                                     8) - 400;
1896
1897             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1898                 progressive_score -= 400;
1899
1900             if (progressive_score > 0) {
1901                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1902                                                        ptr_y,
1903                                                        wrap_y * 2, 8) +
1904                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1905                                                        ptr_y + wrap_y,
1906                                                        wrap_y * 2, 8);
1907
1908                 if (progressive_score > interlaced_score) {
1909                     s->interlaced_dct = 1;
1910
1911                     dct_offset = wrap_y;
1912                     wrap_y <<= 1;
1913                     if (s->chroma_format == CHROMA_422)
1914                         wrap_c <<= 1;
1915                 }
1916             }
1917         }
1918
1919         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1920         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1921         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1922                            dest_y + dct_offset, wrap_y);
1923         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1924                            dest_y + dct_offset + 8, wrap_y);
1925
1926         if (s->flags & CODEC_FLAG_GRAY) {
1927             skip_dct[4] = 1;
1928             skip_dct[5] = 1;
1929         } else {
1930             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1931             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1932             if (!s->chroma_y_shift) { /* 422 */
1933                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1934                                    dest_cb + (dct_offset >> 1), wrap_c);
1935                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1936                                    dest_cr + (dct_offset >> 1), wrap_c);
1937             }
1938         }
1939         /* pre quantization */
1940         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1941                 2 * s->qscale * s->qscale) {
1942             // FIXME optimize
1943             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1944                               wrap_y, 8) < 20 * s->qscale)
1945                 skip_dct[0] = 1;
1946             if (s->dsp.sad[1](NULL, ptr_y + 8,
1947                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1948                 skip_dct[1] = 1;
1949             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1950                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1951                 skip_dct[2] = 1;
1952             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1953                               dest_y + dct_offset + 8,
1954                               wrap_y, 8) < 20 * s->qscale)
1955                 skip_dct[3] = 1;
1956             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1957                               wrap_c, 8) < 20 * s->qscale)
1958                 skip_dct[4] = 1;
1959             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1960                               wrap_c, 8) < 20 * s->qscale)
1961                 skip_dct[5] = 1;
1962             if (!s->chroma_y_shift) { /* 422 */
1963                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1964                                   dest_cb + (dct_offset >> 1),
1965                                   wrap_c, 8) < 20 * s->qscale)
1966                     skip_dct[6] = 1;
1967                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1968                                   dest_cr + (dct_offset >> 1),
1969                                   wrap_c, 8) < 20 * s->qscale)
1970                     skip_dct[7] = 1;
1971             }
1972         }
1973     }
1974
1975     if (s->quantizer_noise_shaping) {
1976         if (!skip_dct[0])
1977             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1978         if (!skip_dct[1])
1979             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1980         if (!skip_dct[2])
1981             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1982         if (!skip_dct[3])
1983             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1984         if (!skip_dct[4])
1985             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1986         if (!skip_dct[5])
1987             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1988         if (!s->chroma_y_shift) { /* 422 */
1989             if (!skip_dct[6])
1990                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1991                                   wrap_c);
1992             if (!skip_dct[7])
1993                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1994                                   wrap_c);
1995         }
1996         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
1997     }
1998
1999     /* DCT & quantize */
2000     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2001     {
2002         for (i = 0; i < mb_block_count; i++) {
2003             if (!skip_dct[i]) {
2004                 int overflow;
2005                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2006                 // FIXME we could decide to change to quantizer instead of
2007                 // clipping
2008                 // JS: I don't think that would be a good idea it could lower
2009                 //     quality instead of improve it. Just INTRADC clipping
2010                 //     deserves changes in quantizer
2011                 if (overflow)
2012                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2013             } else
2014                 s->block_last_index[i] = -1;
2015         }
2016         if (s->quantizer_noise_shaping) {
2017             for (i = 0; i < mb_block_count; i++) {
2018                 if (!skip_dct[i]) {
2019                     s->block_last_index[i] =
2020                         dct_quantize_refine(s, s->block[i], weight[i],
2021                                             orig[i], i, s->qscale);
2022                 }
2023             }
2024         }
2025
2026         if (s->luma_elim_threshold && !s->mb_intra)
2027             for (i = 0; i < 4; i++)
2028                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2029         if (s->chroma_elim_threshold && !s->mb_intra)
2030             for (i = 4; i < mb_block_count; i++)
2031                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2032
2033         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2034             for (i = 0; i < mb_block_count; i++) {
2035                 if (s->block_last_index[i] == -1)
2036                     s->coded_score[i] = INT_MAX / 256;
2037             }
2038         }
2039     }
2040
2041     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2042         s->block_last_index[4] =
2043         s->block_last_index[5] = 0;
2044         s->block[4][0] =
2045         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2046     }
2047
2048     // non c quantize code returns incorrect block_last_index FIXME
2049     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2050         for (i = 0; i < mb_block_count; i++) {
2051             int j;
2052             if (s->block_last_index[i] > 0) {
2053                 for (j = 63; j > 0; j--) {
2054                     if (s->block[i][s->intra_scantable.permutated[j]])
2055                         break;
2056                 }
2057                 s->block_last_index[i] = j;
2058             }
2059         }
2060     }
2061
2062     /* huffman encode */
2063     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2064     case AV_CODEC_ID_MPEG1VIDEO:
2065     case AV_CODEC_ID_MPEG2VIDEO:
2066         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2067             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2068         break;
2069     case AV_CODEC_ID_MPEG4:
2070         if (CONFIG_MPEG4_ENCODER)
2071             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2072         break;
2073     case AV_CODEC_ID_MSMPEG4V2:
2074     case AV_CODEC_ID_MSMPEG4V3:
2075     case AV_CODEC_ID_WMV1:
2076         if (CONFIG_MSMPEG4_ENCODER)
2077             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2078         break;
2079     case AV_CODEC_ID_WMV2:
2080         if (CONFIG_WMV2_ENCODER)
2081             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2082         break;
2083     case AV_CODEC_ID_H261:
2084         if (CONFIG_H261_ENCODER)
2085             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2086         break;
2087     case AV_CODEC_ID_H263:
2088     case AV_CODEC_ID_H263P:
2089     case AV_CODEC_ID_FLV1:
2090     case AV_CODEC_ID_RV10:
2091     case AV_CODEC_ID_RV20:
2092         if (CONFIG_H263_ENCODER)
2093             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2094         break;
2095     case AV_CODEC_ID_MJPEG:
2096         if (CONFIG_MJPEG_ENCODER)
2097             ff_mjpeg_encode_mb(s, s->block);
2098         break;
2099     default:
2100         assert(0);
2101     }
2102 }
2103
2104 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2105 {
2106     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2107     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2108 }
2109
2110 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2111     int i;
2112
2113     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2114
2115     /* mpeg1 */
2116     d->mb_skip_run= s->mb_skip_run;
2117     for(i=0; i<3; i++)
2118         d->last_dc[i] = s->last_dc[i];
2119
2120     /* statistics */
2121     d->mv_bits= s->mv_bits;
2122     d->i_tex_bits= s->i_tex_bits;
2123     d->p_tex_bits= s->p_tex_bits;
2124     d->i_count= s->i_count;
2125     d->f_count= s->f_count;
2126     d->b_count= s->b_count;
2127     d->skip_count= s->skip_count;
2128     d->misc_bits= s->misc_bits;
2129     d->last_bits= 0;
2130
2131     d->mb_skipped= 0;
2132     d->qscale= s->qscale;
2133     d->dquant= s->dquant;
2134
2135     d->esc3_level_length= s->esc3_level_length;
2136 }
2137
2138 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2139     int i;
2140
2141     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2142     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2143
2144     /* mpeg1 */
2145     d->mb_skip_run= s->mb_skip_run;
2146     for(i=0; i<3; i++)
2147         d->last_dc[i] = s->last_dc[i];
2148
2149     /* statistics */
2150     d->mv_bits= s->mv_bits;
2151     d->i_tex_bits= s->i_tex_bits;
2152     d->p_tex_bits= s->p_tex_bits;
2153     d->i_count= s->i_count;
2154     d->f_count= s->f_count;
2155     d->b_count= s->b_count;
2156     d->skip_count= s->skip_count;
2157     d->misc_bits= s->misc_bits;
2158
2159     d->mb_intra= s->mb_intra;
2160     d->mb_skipped= s->mb_skipped;
2161     d->mv_type= s->mv_type;
2162     d->mv_dir= s->mv_dir;
2163     d->pb= s->pb;
2164     if(s->data_partitioning){
2165         d->pb2= s->pb2;
2166         d->tex_pb= s->tex_pb;
2167     }
2168     d->block= s->block;
2169     for(i=0; i<8; i++)
2170         d->block_last_index[i]= s->block_last_index[i];
2171     d->interlaced_dct= s->interlaced_dct;
2172     d->qscale= s->qscale;
2173
2174     d->esc3_level_length= s->esc3_level_length;
2175 }
2176
2177 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2178                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2179                            int *dmin, int *next_block, int motion_x, int motion_y)
2180 {
2181     int score;
2182     uint8_t *dest_backup[3];
2183
2184     copy_context_before_encode(s, backup, type);
2185
2186     s->block= s->blocks[*next_block];
2187     s->pb= pb[*next_block];
2188     if(s->data_partitioning){
2189         s->pb2   = pb2   [*next_block];
2190         s->tex_pb= tex_pb[*next_block];
2191     }
2192
2193     if(*next_block){
2194         memcpy(dest_backup, s->dest, sizeof(s->dest));
2195         s->dest[0] = s->rd_scratchpad;
2196         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2197         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2198         assert(s->linesize >= 32); //FIXME
2199     }
2200
2201     encode_mb(s, motion_x, motion_y);
2202
2203     score= put_bits_count(&s->pb);
2204     if(s->data_partitioning){
2205         score+= put_bits_count(&s->pb2);
2206         score+= put_bits_count(&s->tex_pb);
2207     }
2208
2209     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2210         ff_MPV_decode_mb(s, s->block);
2211
2212         score *= s->lambda2;
2213         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2214     }
2215
2216     if(*next_block){
2217         memcpy(s->dest, dest_backup, sizeof(s->dest));
2218     }
2219
2220     if(score<*dmin){
2221         *dmin= score;
2222         *next_block^=1;
2223
2224         copy_context_after_encode(best, s, type);
2225     }
2226 }
2227
2228 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2229     uint32_t *sq = ff_squareTbl + 256;
2230     int acc=0;
2231     int x,y;
2232
2233     if(w==16 && h==16)
2234         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2235     else if(w==8 && h==8)
2236         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2237
2238     for(y=0; y<h; y++){
2239         for(x=0; x<w; x++){
2240             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2241         }
2242     }
2243
2244     assert(acc>=0);
2245
2246     return acc;
2247 }
2248
2249 static int sse_mb(MpegEncContext *s){
2250     int w= 16;
2251     int h= 16;
2252
2253     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2254     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2255
2256     if(w==16 && h==16)
2257       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2258         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2259                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2260                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2261       }else{
2262         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2263                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2264                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2265       }
2266     else
2267         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2268                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2269                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2270 }
2271
2272 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2273     MpegEncContext *s= *(void**)arg;
2274
2275
2276     s->me.pre_pass=1;
2277     s->me.dia_size= s->avctx->pre_dia_size;
2278     s->first_slice_line=1;
2279     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2280         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2281             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2282         }
2283         s->first_slice_line=0;
2284     }
2285
2286     s->me.pre_pass=0;
2287
2288     return 0;
2289 }
2290
2291 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2292     MpegEncContext *s= *(void**)arg;
2293
2294     ff_check_alignment();
2295
2296     s->me.dia_size= s->avctx->dia_size;
2297     s->first_slice_line=1;
2298     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2299         s->mb_x=0; //for block init below
2300         ff_init_block_index(s);
2301         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2302             s->block_index[0]+=2;
2303             s->block_index[1]+=2;
2304             s->block_index[2]+=2;
2305             s->block_index[3]+=2;
2306
2307             /* compute motion vector & mb_type and store in context */
2308             if(s->pict_type==AV_PICTURE_TYPE_B)
2309                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2310             else
2311                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2312         }
2313         s->first_slice_line=0;
2314     }
2315     return 0;
2316 }
2317
2318 static int mb_var_thread(AVCodecContext *c, void *arg){
2319     MpegEncContext *s= *(void**)arg;
2320     int mb_x, mb_y;
2321
2322     ff_check_alignment();
2323
2324     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2325         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2326             int xx = mb_x * 16;
2327             int yy = mb_y * 16;
2328             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2329             int varc;
2330             int sum = s->dsp.pix_sum(pix, s->linesize);
2331
2332             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2333
2334             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2335             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2336             s->me.mb_var_sum_temp    += varc;
2337         }
2338     }
2339     return 0;
2340 }
2341
2342 static void write_slice_end(MpegEncContext *s){
2343     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2344         if(s->partitioned_frame){
2345             ff_mpeg4_merge_partitions(s);
2346         }
2347
2348         ff_mpeg4_stuffing(&s->pb);
2349     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2350         ff_mjpeg_encode_stuffing(&s->pb);
2351     }
2352
2353     avpriv_align_put_bits(&s->pb);
2354     flush_put_bits(&s->pb);
2355
2356     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2357         s->misc_bits+= get_bits_diff(s);
2358 }
2359
2360 static void write_mb_info(MpegEncContext *s)
2361 {
2362     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2363     int offset = put_bits_count(&s->pb);
2364     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2365     int gobn = s->mb_y / s->gob_index;
2366     int pred_x, pred_y;
2367     if (CONFIG_H263_ENCODER)
2368         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2369     bytestream_put_le32(&ptr, offset);
2370     bytestream_put_byte(&ptr, s->qscale);
2371     bytestream_put_byte(&ptr, gobn);
2372     bytestream_put_le16(&ptr, mba);
2373     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2374     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2375     /* 4MV not implemented */
2376     bytestream_put_byte(&ptr, 0); /* hmv2 */
2377     bytestream_put_byte(&ptr, 0); /* vmv2 */
2378 }
2379
2380 static void update_mb_info(MpegEncContext *s, int startcode)
2381 {
2382     if (!s->mb_info)
2383         return;
2384     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2385         s->mb_info_size += 12;
2386         s->prev_mb_info = s->last_mb_info;
2387     }
2388     if (startcode) {
2389         s->prev_mb_info = put_bits_count(&s->pb)/8;
2390         /* This might have incremented mb_info_size above, and we return without
2391          * actually writing any info into that slot yet. But in that case,
2392          * this will be called again at the start of the after writing the
2393          * start code, actually writing the mb info. */
2394         return;
2395     }
2396
2397     s->last_mb_info = put_bits_count(&s->pb)/8;
2398     if (!s->mb_info_size)
2399         s->mb_info_size += 12;
2400     write_mb_info(s);
2401 }
2402
2403 static int encode_thread(AVCodecContext *c, void *arg){
2404     MpegEncContext *s= *(void**)arg;
2405     int mb_x, mb_y, pdif = 0;
2406     int chr_h= 16>>s->chroma_y_shift;
2407     int i, j;
2408     MpegEncContext best_s, backup_s;
2409     uint8_t bit_buf[2][MAX_MB_BYTES];
2410     uint8_t bit_buf2[2][MAX_MB_BYTES];
2411     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2412     PutBitContext pb[2], pb2[2], tex_pb[2];
2413
2414     ff_check_alignment();
2415
2416     for(i=0; i<2; i++){
2417         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2418         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2419         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2420     }
2421
2422     s->last_bits= put_bits_count(&s->pb);
2423     s->mv_bits=0;
2424     s->misc_bits=0;
2425     s->i_tex_bits=0;
2426     s->p_tex_bits=0;
2427     s->i_count=0;
2428     s->f_count=0;
2429     s->b_count=0;
2430     s->skip_count=0;
2431
2432     for(i=0; i<3; i++){
2433         /* init last dc values */
2434         /* note: quant matrix value (8) is implied here */
2435         s->last_dc[i] = 128 << s->intra_dc_precision;
2436
2437         s->current_picture.f.error[i] = 0;
2438     }
2439     s->mb_skip_run = 0;
2440     memset(s->last_mv, 0, sizeof(s->last_mv));
2441
2442     s->last_mv_dir = 0;
2443
2444     switch(s->codec_id){
2445     case AV_CODEC_ID_H263:
2446     case AV_CODEC_ID_H263P:
2447     case AV_CODEC_ID_FLV1:
2448         if (CONFIG_H263_ENCODER)
2449             s->gob_index = ff_h263_get_gob_height(s);
2450         break;
2451     case AV_CODEC_ID_MPEG4:
2452         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2453             ff_mpeg4_init_partitions(s);
2454         break;
2455     }
2456
2457     s->resync_mb_x=0;
2458     s->resync_mb_y=0;
2459     s->first_slice_line = 1;
2460     s->ptr_lastgob = s->pb.buf;
2461     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2462         s->mb_x=0;
2463         s->mb_y= mb_y;
2464
2465         ff_set_qscale(s, s->qscale);
2466         ff_init_block_index(s);
2467
2468         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2469             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2470             int mb_type= s->mb_type[xy];
2471 //            int d;
2472             int dmin= INT_MAX;
2473             int dir;
2474
2475             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2476                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2477                 return -1;
2478             }
2479             if(s->data_partitioning){
2480                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2481                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2482                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2483                     return -1;
2484                 }
2485             }
2486
2487             s->mb_x = mb_x;
2488             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2489             ff_update_block_index(s);
2490
2491             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2492                 ff_h261_reorder_mb_index(s);
2493                 xy= s->mb_y*s->mb_stride + s->mb_x;
2494                 mb_type= s->mb_type[xy];
2495             }
2496
2497             /* write gob / video packet header  */
2498             if(s->rtp_mode){
2499                 int current_packet_size, is_gob_start;
2500
2501                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2502
2503                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2504
2505                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2506
2507                 switch(s->codec_id){
2508                 case AV_CODEC_ID_H263:
2509                 case AV_CODEC_ID_H263P:
2510                     if(!s->h263_slice_structured)
2511                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2512                     break;
2513                 case AV_CODEC_ID_MPEG2VIDEO:
2514                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2515                 case AV_CODEC_ID_MPEG1VIDEO:
2516                     if(s->mb_skip_run) is_gob_start=0;
2517                     break;
2518                 }
2519
2520                 if(is_gob_start){
2521                     if(s->start_mb_y != mb_y || mb_x!=0){
2522                         write_slice_end(s);
2523
2524                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2525                             ff_mpeg4_init_partitions(s);
2526                         }
2527                     }
2528
2529                     assert((put_bits_count(&s->pb)&7) == 0);
2530                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2531
2532                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2533                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2534                         int d = 100 / s->error_rate;
2535                         if(r % d == 0){
2536                             current_packet_size=0;
2537                             s->pb.buf_ptr= s->ptr_lastgob;
2538                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2539                         }
2540                     }
2541
2542                     if (s->avctx->rtp_callback){
2543                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2544                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2545                     }
2546                     update_mb_info(s, 1);
2547
2548                     switch(s->codec_id){
2549                     case AV_CODEC_ID_MPEG4:
2550                         if (CONFIG_MPEG4_ENCODER) {
2551                             ff_mpeg4_encode_video_packet_header(s);
2552                             ff_mpeg4_clean_buffers(s);
2553                         }
2554                     break;
2555                     case AV_CODEC_ID_MPEG1VIDEO:
2556                     case AV_CODEC_ID_MPEG2VIDEO:
2557                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2558                             ff_mpeg1_encode_slice_header(s);
2559                             ff_mpeg1_clean_buffers(s);
2560                         }
2561                     break;
2562                     case AV_CODEC_ID_H263:
2563                     case AV_CODEC_ID_H263P:
2564                         if (CONFIG_H263_ENCODER)
2565                             ff_h263_encode_gob_header(s, mb_y);
2566                     break;
2567                     }
2568
2569                     if(s->flags&CODEC_FLAG_PASS1){
2570                         int bits= put_bits_count(&s->pb);
2571                         s->misc_bits+= bits - s->last_bits;
2572                         s->last_bits= bits;
2573                     }
2574
2575                     s->ptr_lastgob += current_packet_size;
2576                     s->first_slice_line=1;
2577                     s->resync_mb_x=mb_x;
2578                     s->resync_mb_y=mb_y;
2579                 }
2580             }
2581
2582             if(  (s->resync_mb_x   == s->mb_x)
2583                && s->resync_mb_y+1 == s->mb_y){
2584                 s->first_slice_line=0;
2585             }
2586
2587             s->mb_skipped=0;
2588             s->dquant=0; //only for QP_RD
2589
2590             update_mb_info(s, 0);
2591
2592             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2593                 int next_block=0;
2594                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2595
2596                 copy_context_before_encode(&backup_s, s, -1);
2597                 backup_s.pb= s->pb;
2598                 best_s.data_partitioning= s->data_partitioning;
2599                 best_s.partitioned_frame= s->partitioned_frame;
2600                 if(s->data_partitioning){
2601                     backup_s.pb2= s->pb2;
2602                     backup_s.tex_pb= s->tex_pb;
2603                 }
2604
2605                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2606                     s->mv_dir = MV_DIR_FORWARD;
2607                     s->mv_type = MV_TYPE_16X16;
2608                     s->mb_intra= 0;
2609                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2610                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2611                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2612                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2613                 }
2614                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2615                     s->mv_dir = MV_DIR_FORWARD;
2616                     s->mv_type = MV_TYPE_FIELD;
2617                     s->mb_intra= 0;
2618                     for(i=0; i<2; i++){
2619                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2620                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2621                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2622                     }
2623                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2624                                  &dmin, &next_block, 0, 0);
2625                 }
2626                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2627                     s->mv_dir = MV_DIR_FORWARD;
2628                     s->mv_type = MV_TYPE_16X16;
2629                     s->mb_intra= 0;
2630                     s->mv[0][0][0] = 0;
2631                     s->mv[0][0][1] = 0;
2632                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2633                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2634                 }
2635                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2636                     s->mv_dir = MV_DIR_FORWARD;
2637                     s->mv_type = MV_TYPE_8X8;
2638                     s->mb_intra= 0;
2639                     for(i=0; i<4; i++){
2640                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2641                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2642                     }
2643                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2644                                  &dmin, &next_block, 0, 0);
2645                 }
2646                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2647                     s->mv_dir = MV_DIR_FORWARD;
2648                     s->mv_type = MV_TYPE_16X16;
2649                     s->mb_intra= 0;
2650                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2651                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2652                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2653                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2654                 }
2655                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2656                     s->mv_dir = MV_DIR_BACKWARD;
2657                     s->mv_type = MV_TYPE_16X16;
2658                     s->mb_intra= 0;
2659                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2660                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2661                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2662                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2663                 }
2664                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2665                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2666                     s->mv_type = MV_TYPE_16X16;
2667                     s->mb_intra= 0;
2668                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2669                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2670                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2671                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2672                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2673                                  &dmin, &next_block, 0, 0);
2674                 }
2675                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2676                     s->mv_dir = MV_DIR_FORWARD;
2677                     s->mv_type = MV_TYPE_FIELD;
2678                     s->mb_intra= 0;
2679                     for(i=0; i<2; i++){
2680                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2681                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2682                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2683                     }
2684                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2685                                  &dmin, &next_block, 0, 0);
2686                 }
2687                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2688                     s->mv_dir = MV_DIR_BACKWARD;
2689                     s->mv_type = MV_TYPE_FIELD;
2690                     s->mb_intra= 0;
2691                     for(i=0; i<2; i++){
2692                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2693                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2694                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2695                     }
2696                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2697                                  &dmin, &next_block, 0, 0);
2698                 }
2699                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2700                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2701                     s->mv_type = MV_TYPE_FIELD;
2702                     s->mb_intra= 0;
2703                     for(dir=0; dir<2; dir++){
2704                         for(i=0; i<2; i++){
2705                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2706                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2707                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2708                         }
2709                     }
2710                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2711                                  &dmin, &next_block, 0, 0);
2712                 }
2713                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2714                     s->mv_dir = 0;
2715                     s->mv_type = MV_TYPE_16X16;
2716                     s->mb_intra= 1;
2717                     s->mv[0][0][0] = 0;
2718                     s->mv[0][0][1] = 0;
2719                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2720                                  &dmin, &next_block, 0, 0);
2721                     if(s->h263_pred || s->h263_aic){
2722                         if(best_s.mb_intra)
2723                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2724                         else
2725                             ff_clean_intra_table_entries(s); //old mode?
2726                     }
2727                 }
2728
2729                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2730                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2731                         const int last_qp= backup_s.qscale;
2732                         int qpi, qp, dc[6];
2733                         int16_t ac[6][16];
2734                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2735                         static const int dquant_tab[4]={-1,1,-2,2};
2736
2737                         assert(backup_s.dquant == 0);
2738
2739                         //FIXME intra
2740                         s->mv_dir= best_s.mv_dir;
2741                         s->mv_type = MV_TYPE_16X16;
2742                         s->mb_intra= best_s.mb_intra;
2743                         s->mv[0][0][0] = best_s.mv[0][0][0];
2744                         s->mv[0][0][1] = best_s.mv[0][0][1];
2745                         s->mv[1][0][0] = best_s.mv[1][0][0];
2746                         s->mv[1][0][1] = best_s.mv[1][0][1];
2747
2748                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2749                         for(; qpi<4; qpi++){
2750                             int dquant= dquant_tab[qpi];
2751                             qp= last_qp + dquant;
2752                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2753                                 continue;
2754                             backup_s.dquant= dquant;
2755                             if(s->mb_intra && s->dc_val[0]){
2756                                 for(i=0; i<6; i++){
2757                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2758                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2759                                 }
2760                             }
2761
2762                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2763                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2764                             if(best_s.qscale != qp){
2765                                 if(s->mb_intra && s->dc_val[0]){
2766                                     for(i=0; i<6; i++){
2767                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2768                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2769                                     }
2770                                 }
2771                             }
2772                         }
2773                     }
2774                 }
2775                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2776                     int mx= s->b_direct_mv_table[xy][0];
2777                     int my= s->b_direct_mv_table[xy][1];
2778
2779                     backup_s.dquant = 0;
2780                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2781                     s->mb_intra= 0;
2782                     ff_mpeg4_set_direct_mv(s, mx, my);
2783                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2784                                  &dmin, &next_block, mx, my);
2785                 }
2786                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2787                     backup_s.dquant = 0;
2788                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2789                     s->mb_intra= 0;
2790                     ff_mpeg4_set_direct_mv(s, 0, 0);
2791                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2792                                  &dmin, &next_block, 0, 0);
2793                 }
2794                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2795                     int coded=0;
2796                     for(i=0; i<6; i++)
2797                         coded |= s->block_last_index[i];
2798                     if(coded){
2799                         int mx,my;
2800                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2801                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2802                             mx=my=0; //FIXME find the one we actually used
2803                             ff_mpeg4_set_direct_mv(s, mx, my);
2804                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2805                             mx= s->mv[1][0][0];
2806                             my= s->mv[1][0][1];
2807                         }else{
2808                             mx= s->mv[0][0][0];
2809                             my= s->mv[0][0][1];
2810                         }
2811
2812                         s->mv_dir= best_s.mv_dir;
2813                         s->mv_type = best_s.mv_type;
2814                         s->mb_intra= 0;
2815 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2816                         s->mv[0][0][1] = best_s.mv[0][0][1];
2817                         s->mv[1][0][0] = best_s.mv[1][0][0];
2818                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2819                         backup_s.dquant= 0;
2820                         s->skipdct=1;
2821                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2822                                         &dmin, &next_block, mx, my);
2823                         s->skipdct=0;
2824                     }
2825                 }
2826
2827                 s->current_picture.qscale_table[xy] = best_s.qscale;
2828
2829                 copy_context_after_encode(s, &best_s, -1);
2830
2831                 pb_bits_count= put_bits_count(&s->pb);
2832                 flush_put_bits(&s->pb);
2833                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2834                 s->pb= backup_s.pb;
2835
2836                 if(s->data_partitioning){
2837                     pb2_bits_count= put_bits_count(&s->pb2);
2838                     flush_put_bits(&s->pb2);
2839                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2840                     s->pb2= backup_s.pb2;
2841
2842                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2843                     flush_put_bits(&s->tex_pb);
2844                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2845                     s->tex_pb= backup_s.tex_pb;
2846                 }
2847                 s->last_bits= put_bits_count(&s->pb);
2848
2849                 if (CONFIG_H263_ENCODER &&
2850                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2851                     ff_h263_update_motion_val(s);
2852
2853                 if(next_block==0){ //FIXME 16 vs linesize16
2854                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2855                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2856                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2857                 }
2858
2859                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2860                     ff_MPV_decode_mb(s, s->block);
2861             } else {
2862                 int motion_x = 0, motion_y = 0;
2863                 s->mv_type=MV_TYPE_16X16;
2864                 // only one MB-Type possible
2865
2866                 switch(mb_type){
2867                 case CANDIDATE_MB_TYPE_INTRA:
2868                     s->mv_dir = 0;
2869                     s->mb_intra= 1;
2870                     motion_x= s->mv[0][0][0] = 0;
2871                     motion_y= s->mv[0][0][1] = 0;
2872                     break;
2873                 case CANDIDATE_MB_TYPE_INTER:
2874                     s->mv_dir = MV_DIR_FORWARD;
2875                     s->mb_intra= 0;
2876                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2877                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2878                     break;
2879                 case CANDIDATE_MB_TYPE_INTER_I:
2880                     s->mv_dir = MV_DIR_FORWARD;
2881                     s->mv_type = MV_TYPE_FIELD;
2882                     s->mb_intra= 0;
2883                     for(i=0; i<2; i++){
2884                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2885                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2886                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2887                     }
2888                     break;
2889                 case CANDIDATE_MB_TYPE_INTER4V:
2890                     s->mv_dir = MV_DIR_FORWARD;
2891                     s->mv_type = MV_TYPE_8X8;
2892                     s->mb_intra= 0;
2893                     for(i=0; i<4; i++){
2894                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2895                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2896                     }
2897                     break;
2898                 case CANDIDATE_MB_TYPE_DIRECT:
2899                     if (CONFIG_MPEG4_ENCODER) {
2900                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2901                         s->mb_intra= 0;
2902                         motion_x=s->b_direct_mv_table[xy][0];
2903                         motion_y=s->b_direct_mv_table[xy][1];
2904                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2905                     }
2906                     break;
2907                 case CANDIDATE_MB_TYPE_DIRECT0:
2908                     if (CONFIG_MPEG4_ENCODER) {
2909                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2910                         s->mb_intra= 0;
2911                         ff_mpeg4_set_direct_mv(s, 0, 0);
2912                     }
2913                     break;
2914                 case CANDIDATE_MB_TYPE_BIDIR:
2915                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2916                     s->mb_intra= 0;
2917                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2918                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2919                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2920                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2921                     break;
2922                 case CANDIDATE_MB_TYPE_BACKWARD:
2923                     s->mv_dir = MV_DIR_BACKWARD;
2924                     s->mb_intra= 0;
2925                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2926                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2927                     break;
2928                 case CANDIDATE_MB_TYPE_FORWARD:
2929                     s->mv_dir = MV_DIR_FORWARD;
2930                     s->mb_intra= 0;
2931                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2932                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2933                     break;
2934                 case CANDIDATE_MB_TYPE_FORWARD_I:
2935                     s->mv_dir = MV_DIR_FORWARD;
2936                     s->mv_type = MV_TYPE_FIELD;
2937                     s->mb_intra= 0;
2938                     for(i=0; i<2; i++){
2939                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2940                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2941                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2942                     }
2943                     break;
2944                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2945                     s->mv_dir = MV_DIR_BACKWARD;
2946                     s->mv_type = MV_TYPE_FIELD;
2947                     s->mb_intra= 0;
2948                     for(i=0; i<2; i++){
2949                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2950                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2951                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2952                     }
2953                     break;
2954                 case CANDIDATE_MB_TYPE_BIDIR_I:
2955                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2956                     s->mv_type = MV_TYPE_FIELD;
2957                     s->mb_intra= 0;
2958                     for(dir=0; dir<2; dir++){
2959                         for(i=0; i<2; i++){
2960                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2961                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2962                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2963                         }
2964                     }
2965                     break;
2966                 default:
2967                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2968                 }
2969
2970                 encode_mb(s, motion_x, motion_y);
2971
2972                 // RAL: Update last macroblock type
2973                 s->last_mv_dir = s->mv_dir;
2974
2975                 if (CONFIG_H263_ENCODER &&
2976                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2977                     ff_h263_update_motion_val(s);
2978
2979                 ff_MPV_decode_mb(s, s->block);
2980             }
2981
2982             /* clean the MV table in IPS frames for direct mode in B frames */
2983             if(s->mb_intra /* && I,P,S_TYPE */){
2984                 s->p_mv_table[xy][0]=0;
2985                 s->p_mv_table[xy][1]=0;
2986             }
2987
2988             if(s->flags&CODEC_FLAG_PSNR){
2989                 int w= 16;
2990                 int h= 16;
2991
2992                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2993                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2994
2995                 s->current_picture.f.error[0] += sse(
2996                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2997                     s->dest[0], w, h, s->linesize);
2998                 s->current_picture.f.error[1] += sse(
2999                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3000                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3001                 s->current_picture.f.error[2] += sse(
3002                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3003                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3004             }
3005             if(s->loop_filter){
3006                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3007                     ff_h263_loop_filter(s);
3008             }
3009             av_dlog(s->avctx, "MB %d %d bits\n",
3010                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3011         }
3012     }
3013
3014     //not beautiful here but we must write it before flushing so it has to be here
3015     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3016         ff_msmpeg4_encode_ext_header(s);
3017
3018     write_slice_end(s);
3019
3020     /* Send the last GOB if RTP */
3021     if (s->avctx->rtp_callback) {
3022         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3023         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3024         /* Call the RTP callback to send the last GOB */
3025         emms_c();
3026         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3027     }
3028
3029     return 0;
3030 }
3031
3032 #define MERGE(field) dst->field += src->field; src->field=0
3033 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3034     MERGE(me.scene_change_score);
3035     MERGE(me.mc_mb_var_sum_temp);
3036     MERGE(me.mb_var_sum_temp);
3037 }
3038
3039 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3040     int i;
3041
3042     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3043     MERGE(dct_count[1]);
3044     MERGE(mv_bits);
3045     MERGE(i_tex_bits);
3046     MERGE(p_tex_bits);
3047     MERGE(i_count);
3048     MERGE(f_count);
3049     MERGE(b_count);
3050     MERGE(skip_count);
3051     MERGE(misc_bits);
3052     MERGE(er.error_count);
3053     MERGE(padding_bug_score);
3054     MERGE(current_picture.f.error[0]);
3055     MERGE(current_picture.f.error[1]);
3056     MERGE(current_picture.f.error[2]);
3057
3058     if(dst->avctx->noise_reduction){
3059         for(i=0; i<64; i++){
3060             MERGE(dct_error_sum[0][i]);
3061             MERGE(dct_error_sum[1][i]);
3062         }
3063     }
3064
3065     assert(put_bits_count(&src->pb) % 8 ==0);
3066     assert(put_bits_count(&dst->pb) % 8 ==0);
3067     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3068     flush_put_bits(&dst->pb);
3069 }
3070
3071 static int estimate_qp(MpegEncContext *s, int dry_run){
3072     if (s->next_lambda){
3073         s->current_picture_ptr->f.quality =
3074         s->current_picture.f.quality = s->next_lambda;
3075         if(!dry_run) s->next_lambda= 0;
3076     } else if (!s->fixed_qscale) {
3077         s->current_picture_ptr->f.quality =
3078         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3079         if (s->current_picture.f.quality < 0)
3080             return -1;
3081     }
3082
3083     if(s->adaptive_quant){
3084         switch(s->codec_id){
3085         case AV_CODEC_ID_MPEG4:
3086             if (CONFIG_MPEG4_ENCODER)
3087                 ff_clean_mpeg4_qscales(s);
3088             break;
3089         case AV_CODEC_ID_H263:
3090         case AV_CODEC_ID_H263P:
3091         case AV_CODEC_ID_FLV1:
3092             if (CONFIG_H263_ENCODER)
3093                 ff_clean_h263_qscales(s);
3094             break;
3095         default:
3096             ff_init_qscale_tab(s);
3097         }
3098
3099         s->lambda= s->lambda_table[0];
3100         //FIXME broken
3101     }else
3102         s->lambda = s->current_picture.f.quality;
3103     update_qscale(s);
3104     return 0;
3105 }
3106
3107 /* must be called before writing the header */
3108 static void set_frame_distances(MpegEncContext * s){
3109     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3110     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3111
3112     if(s->pict_type==AV_PICTURE_TYPE_B){
3113         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3114         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3115     }else{
3116         s->pp_time= s->time - s->last_non_b_time;
3117         s->last_non_b_time= s->time;
3118         assert(s->picture_number==0 || s->pp_time > 0);
3119     }
3120 }
3121
3122 static int encode_picture(MpegEncContext *s, int picture_number)
3123 {
3124     int i, ret;
3125     int bits;
3126     int context_count = s->slice_context_count;
3127
3128     s->picture_number = picture_number;
3129
3130     /* Reset the average MB variance */
3131     s->me.mb_var_sum_temp    =
3132     s->me.mc_mb_var_sum_temp = 0;
3133
3134     /* we need to initialize some time vars before we can encode b-frames */
3135     // RAL: Condition added for MPEG1VIDEO
3136     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3137         set_frame_distances(s);
3138     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3139         ff_set_mpeg4_time(s);
3140
3141     s->me.scene_change_score=0;
3142
3143 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3144
3145     if(s->pict_type==AV_PICTURE_TYPE_I){
3146         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3147         else                        s->no_rounding=0;
3148     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3149         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3150             s->no_rounding ^= 1;
3151     }
3152
3153     if(s->flags & CODEC_FLAG_PASS2){
3154         if (estimate_qp(s,1) < 0)
3155             return -1;
3156         ff_get_2pass_fcode(s);
3157     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3158         if(s->pict_type==AV_PICTURE_TYPE_B)
3159             s->lambda= s->last_lambda_for[s->pict_type];
3160         else
3161             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3162         update_qscale(s);
3163     }
3164
3165     s->mb_intra=0; //for the rate distortion & bit compare functions
3166     for(i=1; i<context_count; i++){
3167         ret = ff_update_duplicate_context(s->thread_context[i], s);
3168         if (ret < 0)
3169             return ret;
3170     }
3171
3172     if(ff_init_me(s)<0)
3173         return -1;
3174
3175     /* Estimate motion for every MB */
3176     if(s->pict_type != AV_PICTURE_TYPE_I){
3177         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3178         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3179         if (s->pict_type != AV_PICTURE_TYPE_B) {
3180             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3181                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3182             }
3183         }
3184
3185         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3186     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3187         /* I-Frame */
3188         for(i=0; i<s->mb_stride*s->mb_height; i++)
3189             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3190
3191         if(!s->fixed_qscale){
3192             /* finding spatial complexity for I-frame rate control */
3193             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3194         }
3195     }
3196     for(i=1; i<context_count; i++){
3197         merge_context_after_me(s, s->thread_context[i]);
3198     }
3199     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3200     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3201     emms_c();
3202
3203     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3204         s->pict_type= AV_PICTURE_TYPE_I;
3205         for(i=0; i<s->mb_stride*s->mb_height; i++)
3206             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3207         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3208                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3209     }
3210
3211     if(!s->umvplus){
3212         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3213             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3214
3215             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3216                 int a,b;
3217                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3218                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3219                 s->f_code= FFMAX3(s->f_code, a, b);
3220             }
3221
3222             ff_fix_long_p_mvs(s);
3223             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3224             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3225                 int j;
3226                 for(i=0; i<2; i++){
3227                     for(j=0; j<2; j++)
3228                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3229                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3230                 }
3231             }
3232         }
3233
3234         if(s->pict_type==AV_PICTURE_TYPE_B){
3235             int a, b;
3236
3237             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3238             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3239             s->f_code = FFMAX(a, b);
3240
3241             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3242             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3243             s->b_code = FFMAX(a, b);
3244
3245             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3246             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3247             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3248             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3249             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3250                 int dir, j;
3251                 for(dir=0; dir<2; dir++){
3252                     for(i=0; i<2; i++){
3253                         for(j=0; j<2; j++){
3254                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3255                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3256                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3257                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3258                         }
3259                     }
3260                 }
3261             }
3262         }
3263     }
3264
3265     if (estimate_qp(s, 0) < 0)
3266         return -1;
3267
3268     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3269         s->qscale= 3; //reduce clipping problems
3270
3271     if (s->out_format == FMT_MJPEG) {
3272         /* for mjpeg, we do include qscale in the matrix */
3273         for(i=1;i<64;i++){
3274             int j= s->dsp.idct_permutation[i];
3275
3276             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3277         }
3278         s->y_dc_scale_table=
3279         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3280         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3281         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3282                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3283         s->qscale= 8;
3284     }
3285
3286     //FIXME var duplication
3287     s->current_picture_ptr->f.key_frame =
3288     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3289     s->current_picture_ptr->f.pict_type =
3290     s->current_picture.f.pict_type = s->pict_type;
3291
3292     if (s->current_picture.f.key_frame)
3293         s->picture_in_gop_number=0;
3294
3295     s->last_bits= put_bits_count(&s->pb);
3296     switch(s->out_format) {
3297     case FMT_MJPEG:
3298         if (CONFIG_MJPEG_ENCODER)
3299             ff_mjpeg_encode_picture_header(s);
3300         break;
3301     case FMT_H261:
3302         if (CONFIG_H261_ENCODER)
3303             ff_h261_encode_picture_header(s, picture_number);
3304         break;
3305     case FMT_H263:
3306         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3307             ff_wmv2_encode_picture_header(s, picture_number);
3308         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3309             ff_msmpeg4_encode_picture_header(s, picture_number);
3310         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3311             ff_mpeg4_encode_picture_header(s, picture_number);
3312         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3313             ff_rv10_encode_picture_header(s, picture_number);
3314         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3315             ff_rv20_encode_picture_header(s, picture_number);
3316         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3317             ff_flv_encode_picture_header(s, picture_number);
3318         else if (CONFIG_H263_ENCODER)
3319             ff_h263_encode_picture_header(s, picture_number);
3320         break;
3321     case FMT_MPEG1:
3322         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3323             ff_mpeg1_encode_picture_header(s, picture_number);
3324         break;
3325     default:
3326         assert(0);
3327     }
3328     bits= put_bits_count(&s->pb);
3329     s->header_bits= bits - s->last_bits;
3330
3331     for(i=1; i<context_count; i++){
3332         update_duplicate_context_after_me(s->thread_context[i], s);
3333     }
3334     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3335     for(i=1; i<context_count; i++){
3336         merge_context_after_encode(s, s->thread_context[i]);
3337     }
3338     emms_c();
3339     return 0;
3340 }
3341
3342 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3343     const int intra= s->mb_intra;
3344     int i;
3345
3346     s->dct_count[intra]++;
3347
3348     for(i=0; i<64; i++){
3349         int level= block[i];
3350
3351         if(level){
3352             if(level>0){
3353                 s->dct_error_sum[intra][i] += level;
3354                 level -= s->dct_offset[intra][i];
3355                 if(level<0) level=0;
3356             }else{
3357                 s->dct_error_sum[intra][i] -= level;
3358                 level += s->dct_offset[intra][i];
3359                 if(level>0) level=0;
3360             }
3361             block[i]= level;
3362         }
3363     }
3364 }
3365
3366 static int dct_quantize_trellis_c(MpegEncContext *s,
3367                                   int16_t *block, int n,
3368                                   int qscale, int *overflow){
3369     const int *qmat;
3370     const uint8_t *scantable= s->intra_scantable.scantable;
3371     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3372     int max=0;
3373     unsigned int threshold1, threshold2;
3374     int bias=0;
3375     int run_tab[65];
3376     int level_tab[65];
3377     int score_tab[65];
3378     int survivor[65];
3379     int survivor_count;
3380     int last_run=0;
3381     int last_level=0;
3382     int last_score= 0;
3383     int last_i;
3384     int coeff[2][64];
3385     int coeff_count[64];
3386     int qmul, qadd, start_i, last_non_zero, i, dc;
3387     const int esc_length= s->ac_esc_length;
3388     uint8_t * length;
3389     uint8_t * last_length;
3390     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3391
3392     s->dsp.fdct (block);
3393
3394     if(s->dct_error_sum)
3395         s->denoise_dct(s, block);
3396     qmul= qscale*16;
3397     qadd= ((qscale-1)|1)*8;
3398
3399     if (s->mb_intra) {
3400         int q;
3401         if (!s->h263_aic) {
3402             if (n < 4)
3403                 q = s->y_dc_scale;
3404             else
3405                 q = s->c_dc_scale;
3406             q = q << 3;
3407         } else{
3408             /* For AIC we skip quant/dequant of INTRADC */
3409             q = 1 << 3;
3410             qadd=0;
3411         }
3412
3413         /* note: block[0] is assumed to be positive */
3414         block[0] = (block[0] + (q >> 1)) / q;
3415         start_i = 1;
3416         last_non_zero = 0;
3417         qmat = s->q_intra_matrix[qscale];
3418         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3419             bias= 1<<(QMAT_SHIFT-1);
3420         length     = s->intra_ac_vlc_length;
3421         last_length= s->intra_ac_vlc_last_length;
3422     } else {
3423         start_i = 0;
3424         last_non_zero = -1;
3425         qmat = s->q_inter_matrix[qscale];
3426         length     = s->inter_ac_vlc_length;
3427         last_length= s->inter_ac_vlc_last_length;
3428     }
3429     last_i= start_i;
3430
3431     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3432     threshold2= (threshold1<<1);
3433
3434     for(i=63; i>=start_i; i--) {
3435         const int j = scantable[i];
3436         int level = block[j] * qmat[j];
3437
3438         if(((unsigned)(level+threshold1))>threshold2){
3439             last_non_zero = i;
3440             break;
3441         }
3442     }
3443
3444     for(i=start_i; i<=last_non_zero; i++) {
3445         const int j = scantable[i];
3446         int level = block[j] * qmat[j];
3447
3448 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3449 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3450         if(((unsigned)(level+threshold1))>threshold2){
3451             if(level>0){
3452                 level= (bias + level)>>QMAT_SHIFT;
3453                 coeff[0][i]= level;
3454                 coeff[1][i]= level-1;
3455 //                coeff[2][k]= level-2;
3456             }else{
3457                 level= (bias - level)>>QMAT_SHIFT;
3458                 coeff[0][i]= -level;
3459                 coeff[1][i]= -level+1;
3460 //                coeff[2][k]= -level+2;
3461             }
3462             coeff_count[i]= FFMIN(level, 2);
3463             assert(coeff_count[i]);
3464             max |=level;
3465         }else{
3466             coeff[0][i]= (level>>31)|1;
3467             coeff_count[i]= 1;
3468         }
3469     }
3470
3471     *overflow= s->max_qcoeff < max; //overflow might have happened
3472
3473     if(last_non_zero < start_i){
3474         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3475         return last_non_zero;
3476     }
3477
3478     score_tab[start_i]= 0;
3479     survivor[0]= start_i;
3480     survivor_count= 1;
3481
3482     for(i=start_i; i<=last_non_zero; i++){
3483         int level_index, j, zero_distortion;
3484         int dct_coeff= FFABS(block[ scantable[i] ]);
3485         int best_score=256*256*256*120;
3486
3487         if (s->dsp.fdct == ff_fdct_ifast)
3488             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3489         zero_distortion= dct_coeff*dct_coeff;
3490
3491         for(level_index=0; level_index < coeff_count[i]; level_index++){
3492             int distortion;
3493             int level= coeff[level_index][i];
3494             const int alevel= FFABS(level);
3495             int unquant_coeff;
3496
3497             assert(level);
3498
3499             if(s->out_format == FMT_H263){
3500                 unquant_coeff= alevel*qmul + qadd;
3501             }else{ //MPEG1
3502                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3503                 if(s->mb_intra){
3504                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3505                         unquant_coeff =   (unquant_coeff - 1) | 1;
3506                 }else{
3507                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3508                         unquant_coeff =   (unquant_coeff - 1) | 1;
3509                 }
3510                 unquant_coeff<<= 3;
3511             }
3512
3513             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3514             level+=64;
3515             if((level&(~127)) == 0){
3516                 for(j=survivor_count-1; j>=0; j--){
3517                     int run= i - survivor[j];
3518                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3519                     score += score_tab[i-run];
3520
3521                     if(score < best_score){
3522                         best_score= score;
3523                         run_tab[i+1]= run;
3524                         level_tab[i+1]= level-64;
3525                     }
3526                 }
3527
3528                 if(s->out_format == FMT_H263){
3529                     for(j=survivor_count-1; j>=0; j--){
3530                         int run= i - survivor[j];
3531                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3532                         score += score_tab[i-run];
3533                         if(score < last_score){
3534                             last_score= score;
3535                             last_run= run;
3536                             last_level= level-64;
3537                             last_i= i+1;
3538                         }
3539                     }
3540                 }
3541             }else{
3542                 distortion += esc_length*lambda;
3543                 for(j=survivor_count-1; j>=0; j--){
3544                     int run= i - survivor[j];
3545                     int score= distortion + score_tab[i-run];
3546
3547                     if(score < best_score){
3548                         best_score= score;
3549                         run_tab[i+1]= run;
3550                         level_tab[i+1]= level-64;
3551                     }
3552                 }
3553
3554                 if(s->out_format == FMT_H263){
3555                   for(j=survivor_count-1; j>=0; j--){
3556                         int run= i - survivor[j];
3557                         int score= distortion + score_tab[i-run];
3558                         if(score < last_score){
3559                             last_score= score;
3560                             last_run= run;
3561                             last_level= level-64;
3562                             last_i= i+1;
3563                         }
3564                     }
3565                 }
3566             }
3567         }
3568
3569         score_tab[i+1]= best_score;
3570
3571         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3572         if(last_non_zero <= 27){
3573             for(; survivor_count; survivor_count--){
3574                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3575                     break;
3576             }
3577         }else{
3578             for(; survivor_count; survivor_count--){
3579                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3580                     break;
3581             }
3582         }
3583
3584         survivor[ survivor_count++ ]= i+1;
3585     }
3586
3587     if(s->out_format != FMT_H263){
3588         last_score= 256*256*256*120;
3589         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3590             int score= score_tab[i];
3591             if(i) score += lambda*2; //FIXME exacter?
3592
3593             if(score < last_score){
3594                 last_score= score;
3595                 last_i= i;
3596                 last_level= level_tab[i];
3597                 last_run= run_tab[i];
3598             }
3599         }
3600     }
3601
3602     s->coded_score[n] = last_score;
3603
3604     dc= FFABS(block[0]);
3605     last_non_zero= last_i - 1;
3606     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3607
3608     if(last_non_zero < start_i)
3609         return last_non_zero;
3610
3611     if(last_non_zero == 0 && start_i == 0){
3612         int best_level= 0;
3613         int best_score= dc * dc;
3614
3615         for(i=0; i<coeff_count[0]; i++){
3616             int level= coeff[i][0];
3617             int alevel= FFABS(level);
3618             int unquant_coeff, score, distortion;
3619
3620             if(s->out_format == FMT_H263){
3621                     unquant_coeff= (alevel*qmul + qadd)>>3;
3622             }else{ //MPEG1
3623                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3624                     unquant_coeff =   (unquant_coeff - 1) | 1;
3625             }
3626             unquant_coeff = (unquant_coeff + 4) >> 3;
3627             unquant_coeff<<= 3 + 3;
3628
3629             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3630             level+=64;
3631             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3632             else                    score= distortion + esc_length*lambda;
3633
3634             if(score < best_score){
3635                 best_score= score;
3636                 best_level= level - 64;
3637             }
3638         }
3639         block[0]= best_level;
3640         s->coded_score[n] = best_score - dc*dc;
3641         if(best_level == 0) return -1;
3642         else                return last_non_zero;
3643     }
3644
3645     i= last_i;
3646     assert(last_level);
3647
3648     block[ perm_scantable[last_non_zero] ]= last_level;
3649     i -= last_run + 1;
3650
3651     for(; i>start_i; i -= run_tab[i] + 1){
3652         block[ perm_scantable[i-1] ]= level_tab[i];
3653     }
3654
3655     return last_non_zero;
3656 }
3657
3658 //#define REFINE_STATS 1
3659 static int16_t basis[64][64];
3660
3661 static void build_basis(uint8_t *perm){
3662     int i, j, x, y;
3663     emms_c();
3664     for(i=0; i<8; i++){
3665         for(j=0; j<8; j++){
3666             for(y=0; y<8; y++){
3667                 for(x=0; x<8; x++){
3668                     double s= 0.25*(1<<BASIS_SHIFT);
3669                     int index= 8*i + j;
3670                     int perm_index= perm[index];
3671                     if(i==0) s*= sqrt(0.5);
3672                     if(j==0) s*= sqrt(0.5);
3673                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3674                 }
3675             }
3676         }
3677     }
3678 }
3679
3680 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3681                         int16_t *block, int16_t *weight, int16_t *orig,
3682                         int n, int qscale){
3683     int16_t rem[64];
3684     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3685     const uint8_t *scantable= s->intra_scantable.scantable;
3686     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3687 //    unsigned int threshold1, threshold2;
3688 //    int bias=0;
3689     int run_tab[65];
3690     int prev_run=0;
3691     int prev_level=0;
3692     int qmul, qadd, start_i, last_non_zero, i, dc;
3693     uint8_t * length;
3694     uint8_t * last_length;
3695     int lambda;
3696     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3697 #ifdef REFINE_STATS
3698 static int count=0;
3699 static int after_last=0;
3700 static int to_zero=0;
3701 static int from_zero=0;
3702 static int raise=0;
3703 static int lower=0;
3704 static int messed_sign=0;
3705 #endif
3706
3707     if(basis[0][0] == 0)
3708         build_basis(s->dsp.idct_permutation);
3709
3710     qmul= qscale*2;
3711     qadd= (qscale-1)|1;
3712     if (s->mb_intra) {
3713         if (!s->h263_aic) {
3714             if (n < 4)
3715                 q = s->y_dc_scale;
3716             else
3717                 q = s->c_dc_scale;
3718         } else{
3719             /* For AIC we skip quant/dequant of INTRADC */
3720             q = 1;
3721             qadd=0;
3722         }
3723         q <<= RECON_SHIFT-3;
3724         /* note: block[0] is assumed to be positive */
3725         dc= block[0]*q;
3726 //        block[0] = (block[0] + (q >> 1)) / q;
3727         start_i = 1;
3728 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3729 //            bias= 1<<(QMAT_SHIFT-1);
3730         length     = s->intra_ac_vlc_length;
3731         last_length= s->intra_ac_vlc_last_length;
3732     } else {
3733         dc= 0;
3734         start_i = 0;
3735         length     = s->inter_ac_vlc_length;
3736         last_length= s->inter_ac_vlc_last_length;
3737     }
3738     last_non_zero = s->block_last_index[n];
3739
3740 #ifdef REFINE_STATS
3741 {START_TIMER
3742 #endif
3743     dc += (1<<(RECON_SHIFT-1));
3744     for(i=0; i<64; i++){
3745         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3746     }
3747 #ifdef REFINE_STATS
3748 STOP_TIMER("memset rem[]")}
3749 #endif
3750     sum=0;
3751     for(i=0; i<64; i++){
3752         int one= 36;
3753         int qns=4;
3754         int w;
3755
3756         w= FFABS(weight[i]) + qns*one;
3757         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3758
3759         weight[i] = w;
3760 //        w=weight[i] = (63*qns + (w/2)) / w;
3761
3762         assert(w>0);
3763         assert(w<(1<<6));
3764         sum += w*w;
3765     }
3766     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3767 #ifdef REFINE_STATS
3768 {START_TIMER
3769 #endif
3770     run=0;
3771     rle_index=0;
3772     for(i=start_i; i<=last_non_zero; i++){
3773         int j= perm_scantable[i];
3774         const int level= block[j];
3775         int coeff;
3776
3777         if(level){
3778             if(level<0) coeff= qmul*level - qadd;
3779             else        coeff= qmul*level + qadd;
3780             run_tab[rle_index++]=run;
3781             run=0;
3782
3783             s->dsp.add_8x8basis(rem, basis[j], coeff);
3784         }else{
3785             run++;
3786         }
3787     }
3788 #ifdef REFINE_STATS
3789 if(last_non_zero>0){
3790 STOP_TIMER("init rem[]")
3791 }
3792 }
3793
3794 {START_TIMER
3795 #endif
3796     for(;;){
3797         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3798         int best_coeff=0;
3799         int best_change=0;
3800         int run2, best_unquant_change=0, analyze_gradient;
3801 #ifdef REFINE_STATS
3802 {START_TIMER
3803 #endif
3804         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3805
3806         if(analyze_gradient){
3807 #ifdef REFINE_STATS
3808 {START_TIMER
3809 #endif
3810             for(i=0; i<64; i++){
3811                 int w= weight[i];
3812
3813                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3814             }
3815 #ifdef REFINE_STATS
3816 STOP_TIMER("rem*w*w")}
3817 {START_TIMER
3818 #endif
3819             s->dsp.fdct(d1);
3820 #ifdef REFINE_STATS
3821 STOP_TIMER("dct")}
3822 #endif
3823         }
3824
3825         if(start_i){
3826             const int level= block[0];
3827             int change, old_coeff;
3828
3829             assert(s->mb_intra);
3830
3831             old_coeff= q*level;
3832
3833             for(change=-1; change<=1; change+=2){
3834                 int new_level= level + change;
3835                 int score, new_coeff;
3836
3837                 new_coeff= q*new_level;
3838                 if(new_coeff >= 2048 || new_coeff < 0)
3839                     continue;
3840
3841                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3842                 if(score<best_score){
3843                     best_score= score;
3844                     best_coeff= 0;
3845                     best_change= change;
3846                     best_unquant_change= new_coeff - old_coeff;
3847                 }
3848             }
3849         }
3850
3851         run=0;
3852         rle_index=0;
3853         run2= run_tab[rle_index++];
3854         prev_level=0;
3855         prev_run=0;
3856
3857         for(i=start_i; i<64; i++){
3858             int j= perm_scantable[i];
3859             const int level= block[j];
3860             int change, old_coeff;
3861
3862             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3863                 break;
3864
3865             if(level){
3866                 if(level<0) old_coeff= qmul*level - qadd;
3867                 else        old_coeff= qmul*level + qadd;
3868                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3869             }else{
3870                 old_coeff=0;
3871                 run2--;
3872                 assert(run2>=0 || i >= last_non_zero );
3873             }
3874
3875             for(change=-1; change<=1; change+=2){
3876                 int new_level= level + change;
3877                 int score, new_coeff, unquant_change;
3878
3879                 score=0;
3880                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3881                    continue;
3882
3883                 if(new_level){
3884                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3885                     else            new_coeff= qmul*new_level + qadd;
3886                     if(new_coeff >= 2048 || new_coeff <= -2048)
3887                         continue;
3888                     //FIXME check for overflow
3889
3890                     if(level){
3891                         if(level < 63 && level > -63){
3892                             if(i < last_non_zero)
3893                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3894                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3895                             else
3896                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3897                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3898                         }
3899                     }else{
3900                         assert(FFABS(new_level)==1);
3901
3902                         if(analyze_gradient){
3903                             int g= d1[ scantable[i] ];
3904                             if(g && (g^new_level) >= 0)
3905                                 continue;
3906                         }
3907
3908                         if(i < last_non_zero){
3909                             int next_i= i + run2 + 1;
3910                             int next_level= block[ perm_scantable[next_i] ] + 64;
3911
3912                             if(next_level&(~127))
3913                                 next_level= 0;
3914
3915                             if(next_i < last_non_zero)
3916                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3917                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3918                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3919                             else
3920                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3921                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3922                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3923                         }else{
3924                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3925                             if(prev_level){
3926                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3927                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3928                             }
3929                         }
3930                     }
3931                 }else{
3932                     new_coeff=0;
3933                     assert(FFABS(level)==1);
3934
3935                     if(i < last_non_zero){
3936                         int next_i= i + run2 + 1;
3937                         int next_level= block[ perm_scantable[next_i] ] + 64;
3938
3939                         if(next_level&(~127))
3940                             next_level= 0;
3941
3942                         if(next_i < last_non_zero)
3943                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3944                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3945                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3946                         else
3947                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3948                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3949                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3950                     }else{
3951                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3952                         if(prev_level){
3953                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3954                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3955                         }
3956                     }
3957                 }
3958
3959                 score *= lambda;
3960
3961                 unquant_change= new_coeff - old_coeff;
3962                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3963
3964                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3965                 if(score<best_score){
3966                     best_score= score;
3967                     best_coeff= i;
3968                     best_change= change;
3969                     best_unquant_change= unquant_change;
3970                 }
3971             }
3972             if(level){
3973                 prev_level= level + 64;
3974                 if(prev_level&(~127))
3975                     prev_level= 0;
3976                 prev_run= run;
3977                 run=0;
3978             }else{
3979                 run++;
3980             }
3981         }
3982 #ifdef REFINE_STATS
3983 STOP_TIMER("iterative step")}
3984 #endif
3985
3986         if(best_change){
3987             int j= perm_scantable[ best_coeff ];
3988
3989             block[j] += best_change;
3990
3991             if(best_coeff > last_non_zero){
3992                 last_non_zero= best_coeff;
3993                 assert(block[j]);
3994 #ifdef REFINE_STATS
3995 after_last++;
3996 #endif
3997             }else{
3998 #ifdef REFINE_STATS
3999 if(block[j]){
4000     if(block[j] - best_change){
4001         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4002             raise++;
4003         }else{
4004             lower++;
4005         }
4006     }else{
4007         from_zero++;
4008     }
4009 }else{
4010     to_zero++;
4011 }
4012 #endif
4013                 for(; last_non_zero>=start_i; last_non_zero--){
4014                     if(block[perm_scantable[last_non_zero]])
4015                         break;
4016                 }
4017             }
4018 #ifdef REFINE_STATS
4019 count++;
4020 if(256*256*256*64 % count == 0){
4021     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4022 }
4023 #endif
4024             run=0;
4025             rle_index=0;
4026             for(i=start_i; i<=last_non_zero; i++){
4027                 int j= perm_scantable[i];
4028                 const int level= block[j];
4029
4030                  if(level){
4031                      run_tab[rle_index++]=run;
4032                      run=0;
4033                  }else{
4034                      run++;
4035                  }
4036             }
4037
4038             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4039         }else{
4040             break;
4041         }
4042     }
4043 #ifdef REFINE_STATS
4044 if(last_non_zero>0){
4045 STOP_TIMER("iterative search")
4046 }
4047 }
4048 #endif
4049
4050     return last_non_zero;
4051 }
4052
4053 int ff_dct_quantize_c(MpegEncContext *s,
4054                         int16_t *block, int n,
4055                         int qscale, int *overflow)
4056 {
4057     int i, j, level, last_non_zero, q, start_i;
4058     const int *qmat;
4059     const uint8_t *scantable= s->intra_scantable.scantable;
4060     int bias;
4061     int max=0;
4062     unsigned int threshold1, threshold2;
4063
4064     s->dsp.fdct (block);
4065
4066     if(s->dct_error_sum)
4067         s->denoise_dct(s, block);
4068
4069     if (s->mb_intra) {
4070         if (!s->h263_aic) {
4071             if (n < 4)
4072                 q = s->y_dc_scale;
4073             else
4074                 q = s->c_dc_scale;
4075             q = q << 3;
4076         } else
4077             /* For AIC we skip quant/dequant of INTRADC */
4078             q = 1 << 3;
4079
4080         /* note: block[0] is assumed to be positive */
4081         block[0] = (block[0] + (q >> 1)) / q;
4082         start_i = 1;
4083         last_non_zero = 0;
4084         qmat = s->q_intra_matrix[qscale];
4085         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4086     } else {
4087         start_i = 0;
4088         last_non_zero = -1;
4089         qmat = s->q_inter_matrix[qscale];
4090         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4091     }
4092     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4093     threshold2= (threshold1<<1);
4094     for(i=63;i>=start_i;i--) {
4095         j = scantable[i];
4096         level = block[j] * qmat[j];
4097
4098         if(((unsigned)(level+threshold1))>threshold2){
4099             last_non_zero = i;
4100             break;
4101         }else{
4102             block[j]=0;
4103         }
4104     }
4105     for(i=start_i; i<=last_non_zero; i++) {
4106         j = scantable[i];
4107         level = block[j] * qmat[j];
4108
4109 //        if(   bias+level >= (1<<QMAT_SHIFT)
4110 //           || bias-level >= (1<<QMAT_SHIFT)){
4111         if(((unsigned)(level+threshold1))>threshold2){
4112             if(level>0){
4113                 level= (bias + level)>>QMAT_SHIFT;
4114                 block[j]= level;
4115             }else{
4116                 level= (bias - level)>>QMAT_SHIFT;
4117                 block[j]= -level;
4118             }
4119             max |=level;
4120         }else{
4121             block[j]=0;
4122         }
4123     }
4124     *overflow= s->max_qcoeff < max; //overflow might have happened
4125
4126     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4127     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4128         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4129
4130     return last_non_zero;
4131 }
4132
4133 #define OFFSET(x) offsetof(MpegEncContext, x)
4134 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4135 static const AVOption h263_options[] = {
4136     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4137     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4138     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4139     FF_MPV_COMMON_OPTS
4140     { NULL },
4141 };
4142
4143 static const AVClass h263_class = {
4144     .class_name = "H.263 encoder",
4145     .item_name  = av_default_item_name,
4146     .option     = h263_options,
4147     .version    = LIBAVUTIL_VERSION_INT,
4148 };
4149
4150 AVCodec ff_h263_encoder = {
4151     .name           = "h263",
4152     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4153     .type           = AVMEDIA_TYPE_VIDEO,
4154     .id             = AV_CODEC_ID_H263,
4155     .priv_data_size = sizeof(MpegEncContext),
4156     .init           = ff_MPV_encode_init,
4157     .encode2        = ff_MPV_encode_picture,
4158     .close          = ff_MPV_encode_end,
4159     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4160     .priv_class     = &h263_class,
4161 };
4162
4163 static const AVOption h263p_options[] = {
4164     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4165     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4166     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4167     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4168     FF_MPV_COMMON_OPTS
4169     { NULL },
4170 };
4171 static const AVClass h263p_class = {
4172     .class_name = "H.263p encoder",
4173     .item_name  = av_default_item_name,
4174     .option     = h263p_options,
4175     .version    = LIBAVUTIL_VERSION_INT,
4176 };
4177
4178 AVCodec ff_h263p_encoder = {
4179     .name           = "h263p",
4180     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4181     .type           = AVMEDIA_TYPE_VIDEO,
4182     .id             = AV_CODEC_ID_H263P,
4183     .priv_data_size = sizeof(MpegEncContext),
4184     .init           = ff_MPV_encode_init,
4185     .encode2        = ff_MPV_encode_picture,
4186     .close          = ff_MPV_encode_end,
4187     .capabilities   = CODEC_CAP_SLICE_THREADS,
4188     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4189     .priv_class     = &h263p_class,
4190 };
4191
4192 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4193
4194 AVCodec ff_msmpeg4v2_encoder = {
4195     .name           = "msmpeg4v2",
4196     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4197     .type           = AVMEDIA_TYPE_VIDEO,
4198     .id             = AV_CODEC_ID_MSMPEG4V2,
4199     .priv_data_size = sizeof(MpegEncContext),
4200     .init           = ff_MPV_encode_init,
4201     .encode2        = ff_MPV_encode_picture,
4202     .close          = ff_MPV_encode_end,
4203     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4204     .priv_class     = &msmpeg4v2_class,
4205 };
4206
4207 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4208
4209 AVCodec ff_msmpeg4v3_encoder = {
4210     .name           = "msmpeg4",
4211     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4212     .type           = AVMEDIA_TYPE_VIDEO,
4213     .id             = AV_CODEC_ID_MSMPEG4V3,
4214     .priv_data_size = sizeof(MpegEncContext),
4215     .init           = ff_MPV_encode_init,
4216     .encode2        = ff_MPV_encode_picture,
4217     .close          = ff_MPV_encode_end,
4218     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4219     .priv_class     = &msmpeg4v3_class,
4220 };
4221
4222 FF_MPV_GENERIC_CLASS(wmv1)
4223
4224 AVCodec ff_wmv1_encoder = {
4225     .name           = "wmv1",
4226     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4227     .type           = AVMEDIA_TYPE_VIDEO,
4228     .id             = AV_CODEC_ID_WMV1,
4229     .priv_data_size = sizeof(MpegEncContext),
4230     .init           = ff_MPV_encode_init,
4231     .encode2        = ff_MPV_encode_picture,
4232     .close          = ff_MPV_encode_end,
4233     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4234     .priv_class     = &wmv1_class,
4235 };