]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
mpegvideo: split the encoding-only parts of ff_MPV_frame_end() into a separate function
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "avcodec.h"
38 #include "dct.h"
39 #include "dsputil.h"
40 #include "mpeg12.h"
41 #include "mpegvideo.h"
42 #include "h261.h"
43 #include "h263.h"
44 #include "mathops.h"
45 #include "mjpegenc.h"
46 #include "msmpeg4.h"
47 #include "faandct.h"
48 #include "thread.h"
49 #include "aandcttab.h"
50 #include "flv.h"
51 #include "mpeg4video.h"
52 #include "internal.h"
53 #include "bytestream.h"
54 #include <limits.h>
55
56 static int encode_picture(MpegEncContext *s, int picture_number);
57 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
58 static int sse_mb(MpegEncContext *s);
59 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
60 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
61
62 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
63 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
64
65 const AVOption ff_mpv_generic_options[] = {
66     FF_MPV_COMMON_OPTS
67     { NULL },
68 };
69
70 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
71                        uint16_t (*qmat16)[2][64],
72                        const uint16_t *quant_matrix,
73                        int bias, int qmin, int qmax, int intra)
74 {
75     int qscale;
76     int shift = 0;
77
78     for (qscale = qmin; qscale <= qmax; qscale++) {
79         int i;
80         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
81             dsp->fdct == ff_jpeg_fdct_islow_10 ||
82             dsp->fdct == ff_faandct) {
83             for (i = 0; i < 64; i++) {
84                 const int j = dsp->idct_permutation[i];
85                 /* 16 <= qscale * quant_matrix[i] <= 7905
86                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
87                  *             19952 <=              x  <= 249205026
88                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
89                  *           3444240 >= (1 << 36) / (x) >= 275 */
90
91                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
92                                         (qscale * quant_matrix[j]));
93             }
94         } else if (dsp->fdct == ff_fdct_ifast) {
95             for (i = 0; i < 64; i++) {
96                 const int j = dsp->idct_permutation[i];
97                 /* 16 <= qscale * quant_matrix[i] <= 7905
98                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
99                  *             19952 <=              x  <= 249205026
100                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
101                  *           3444240 >= (1 << 36) / (x) >= 275 */
102
103                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
104                                         (ff_aanscales[i] * qscale *
105                                          quant_matrix[j]));
106             }
107         } else {
108             for (i = 0; i < 64; i++) {
109                 const int j = dsp->idct_permutation[i];
110                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
111                  * Assume x = qscale * quant_matrix[i]
112                  * So             16 <=              x  <= 7905
113                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
114                  * so          32768 >= (1 << 19) / (x) >= 67 */
115                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
116                                         (qscale * quant_matrix[j]));
117                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
118                 //                    (qscale * quant_matrix[i]);
119                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
120                                        (qscale * quant_matrix[j]);
121
122                 if (qmat16[qscale][0][i] == 0 ||
123                     qmat16[qscale][0][i] == 128 * 256)
124                     qmat16[qscale][0][i] = 128 * 256 - 1;
125                 qmat16[qscale][1][i] =
126                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
127                                 qmat16[qscale][0][i]);
128             }
129         }
130
131         for (i = intra; i < 64; i++) {
132             int64_t max = 8191;
133             if (dsp->fdct == ff_fdct_ifast) {
134                 max = (8191LL * ff_aanscales[i]) >> 14;
135             }
136             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
137                 shift++;
138             }
139         }
140     }
141     if (shift) {
142         av_log(NULL, AV_LOG_INFO,
143                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
144                QMAT_SHIFT - shift);
145     }
146 }
147
148 static inline void update_qscale(MpegEncContext *s)
149 {
150     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
151                 (FF_LAMBDA_SHIFT + 7);
152     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
153
154     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
155                  FF_LAMBDA_SHIFT;
156 }
157
158 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
159 {
160     int i;
161
162     if (matrix) {
163         put_bits(pb, 1, 1);
164         for (i = 0; i < 64; i++) {
165             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
166         }
167     } else
168         put_bits(pb, 1, 0);
169 }
170
171 /**
172  * init s->current_picture.qscale_table from s->lambda_table
173  */
174 void ff_init_qscale_tab(MpegEncContext *s)
175 {
176     int8_t * const qscale_table = s->current_picture.qscale_table;
177     int i;
178
179     for (i = 0; i < s->mb_num; i++) {
180         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
181         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
182         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
183                                                   s->avctx->qmax);
184     }
185 }
186
187 static void update_duplicate_context_after_me(MpegEncContext *dst,
188                                               MpegEncContext *src)
189 {
190 #define COPY(a) dst->a= src->a
191     COPY(pict_type);
192     COPY(current_picture);
193     COPY(f_code);
194     COPY(b_code);
195     COPY(qscale);
196     COPY(lambda);
197     COPY(lambda2);
198     COPY(picture_in_gop_number);
199     COPY(gop_picture_number);
200     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
201     COPY(progressive_frame);    // FIXME don't set in encode_header
202     COPY(partitioned_frame);    // FIXME don't set in encode_header
203 #undef COPY
204 }
205
206 /**
207  * Set the given MpegEncContext to defaults for encoding.
208  * the changed fields will not depend upon the prior state of the MpegEncContext.
209  */
210 static void MPV_encode_defaults(MpegEncContext *s)
211 {
212     int i;
213     ff_MPV_common_defaults(s);
214
215     for (i = -16; i < 16; i++) {
216         default_fcode_tab[i + MAX_MV] = 1;
217     }
218     s->me.mv_penalty = default_mv_penalty;
219     s->fcode_tab     = default_fcode_tab;
220
221     s->input_picture_number  = 0;
222     s->picture_in_gop_number = 0;
223 }
224
225 /* init video encoder */
226 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
227 {
228     MpegEncContext *s = avctx->priv_data;
229     int i, ret;
230     int chroma_h_shift, chroma_v_shift;
231
232     MPV_encode_defaults(s);
233
234     switch (avctx->codec_id) {
235     case AV_CODEC_ID_MPEG2VIDEO:
236         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
237             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
238             av_log(avctx, AV_LOG_ERROR,
239                    "only YUV420 and YUV422 are supported\n");
240             return -1;
241         }
242         break;
243     case AV_CODEC_ID_LJPEG:
244         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
245             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
246             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
247             avctx->pix_fmt != AV_PIX_FMT_BGRA     &&
248             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
249               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
250               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
251              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
252             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
253             return -1;
254         }
255         break;
256     case AV_CODEC_ID_MJPEG:
257         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
258             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
259             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
260               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
261              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
262             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
263             return -1;
264         }
265         break;
266     default:
267         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
268             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
269             return -1;
270         }
271     }
272
273     switch (avctx->pix_fmt) {
274     case AV_PIX_FMT_YUVJ422P:
275     case AV_PIX_FMT_YUV422P:
276         s->chroma_format = CHROMA_422;
277         break;
278     case AV_PIX_FMT_YUVJ420P:
279     case AV_PIX_FMT_YUV420P:
280     default:
281         s->chroma_format = CHROMA_420;
282         break;
283     }
284
285     s->bit_rate = avctx->bit_rate;
286     s->width    = avctx->width;
287     s->height   = avctx->height;
288     if (avctx->gop_size > 600 &&
289         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
290         av_log(avctx, AV_LOG_ERROR,
291                "Warning keyframe interval too large! reducing it ...\n");
292         avctx->gop_size = 600;
293     }
294     s->gop_size     = avctx->gop_size;
295     s->avctx        = avctx;
296     s->flags        = avctx->flags;
297     s->flags2       = avctx->flags2;
298     if (avctx->max_b_frames > MAX_B_FRAMES) {
299         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
300                "is %d.\n", MAX_B_FRAMES);
301     }
302     s->max_b_frames = avctx->max_b_frames;
303     s->codec_id     = avctx->codec->id;
304     s->strict_std_compliance = avctx->strict_std_compliance;
305     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
306     s->mpeg_quant         = avctx->mpeg_quant;
307     s->rtp_mode           = !!avctx->rtp_payload_size;
308     s->intra_dc_precision = avctx->intra_dc_precision;
309     s->user_specified_pts = AV_NOPTS_VALUE;
310
311     if (s->gop_size <= 1) {
312         s->intra_only = 1;
313         s->gop_size   = 12;
314     } else {
315         s->intra_only = 0;
316     }
317
318     s->me_method = avctx->me_method;
319
320     /* Fixed QSCALE */
321     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
322
323     s->adaptive_quant = (s->avctx->lumi_masking ||
324                          s->avctx->dark_masking ||
325                          s->avctx->temporal_cplx_masking ||
326                          s->avctx->spatial_cplx_masking  ||
327                          s->avctx->p_masking      ||
328                          s->avctx->border_masking ||
329                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
330                         !s->fixed_qscale;
331
332     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
333
334     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
335         av_log(avctx, AV_LOG_ERROR,
336                "a vbv buffer size is needed, "
337                "for encoding with a maximum bitrate\n");
338         return -1;
339     }
340
341     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
342         av_log(avctx, AV_LOG_INFO,
343                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
344     }
345
346     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
347         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
348         return -1;
349     }
350
351     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
352         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
353         return -1;
354     }
355
356     if (avctx->rc_max_rate &&
357         avctx->rc_max_rate == avctx->bit_rate &&
358         avctx->rc_max_rate != avctx->rc_min_rate) {
359         av_log(avctx, AV_LOG_INFO,
360                "impossible bitrate constraints, this will fail\n");
361     }
362
363     if (avctx->rc_buffer_size &&
364         avctx->bit_rate * (int64_t)avctx->time_base.num >
365             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
366         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
367         return -1;
368     }
369
370     if (!s->fixed_qscale &&
371         avctx->bit_rate * av_q2d(avctx->time_base) >
372             avctx->bit_rate_tolerance) {
373         av_log(avctx, AV_LOG_ERROR,
374                "bitrate tolerance too small for bitrate\n");
375         return -1;
376     }
377
378     if (s->avctx->rc_max_rate &&
379         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
380         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
381          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
382         90000LL * (avctx->rc_buffer_size - 1) >
383             s->avctx->rc_max_rate * 0xFFFFLL) {
384         av_log(avctx, AV_LOG_INFO,
385                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
386                "specified vbv buffer is too large for the given bitrate!\n");
387     }
388
389     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
390         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
391         s->codec_id != AV_CODEC_ID_FLV1) {
392         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
393         return -1;
394     }
395
396     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
397         av_log(avctx, AV_LOG_ERROR,
398                "OBMC is only supported with simple mb decision\n");
399         return -1;
400     }
401
402     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
403         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
404         return -1;
405     }
406
407     if (s->max_b_frames                    &&
408         s->codec_id != AV_CODEC_ID_MPEG4      &&
409         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
410         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
411         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
412         return -1;
413     }
414
415     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
416          s->codec_id == AV_CODEC_ID_H263  ||
417          s->codec_id == AV_CODEC_ID_H263P) &&
418         (avctx->sample_aspect_ratio.num > 255 ||
419          avctx->sample_aspect_ratio.den > 255)) {
420         av_log(avctx, AV_LOG_ERROR,
421                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
422                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
423         return -1;
424     }
425
426     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
427         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
428         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
429         return -1;
430     }
431
432     // FIXME mpeg2 uses that too
433     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
434         av_log(avctx, AV_LOG_ERROR,
435                "mpeg2 style quantization not supported by codec\n");
436         return -1;
437     }
438
439     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
440         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
441         return -1;
442     }
443
444     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
445         s->avctx->mb_decision != FF_MB_DECISION_RD) {
446         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
447         return -1;
448     }
449
450     if (s->avctx->scenechange_threshold < 1000000000 &&
451         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
452         av_log(avctx, AV_LOG_ERROR,
453                "closed gop with scene change detection are not supported yet, "
454                "set threshold to 1000000000\n");
455         return -1;
456     }
457
458     if (s->flags & CODEC_FLAG_LOW_DELAY) {
459         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
460             av_log(avctx, AV_LOG_ERROR,
461                   "low delay forcing is only available for mpeg2\n");
462             return -1;
463         }
464         if (s->max_b_frames != 0) {
465             av_log(avctx, AV_LOG_ERROR,
466                    "b frames cannot be used with low delay\n");
467             return -1;
468         }
469     }
470
471     if (s->q_scale_type == 1) {
472         if (avctx->qmax > 12) {
473             av_log(avctx, AV_LOG_ERROR,
474                    "non linear quant only supports qmax <= 12 currently\n");
475             return -1;
476         }
477     }
478
479     if (s->avctx->thread_count > 1         &&
480         s->codec_id != AV_CODEC_ID_MPEG4      &&
481         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
482         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
483         (s->codec_id != AV_CODEC_ID_H263P)) {
484         av_log(avctx, AV_LOG_ERROR,
485                "multi threaded encoding not supported by codec\n");
486         return -1;
487     }
488
489     if (s->avctx->thread_count < 1) {
490         av_log(avctx, AV_LOG_ERROR,
491                "automatic thread number detection not supported by codec,"
492                "patch welcome\n");
493         return -1;
494     }
495
496     if (s->avctx->thread_count > 1)
497         s->rtp_mode = 1;
498
499     if (!avctx->time_base.den || !avctx->time_base.num) {
500         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
501         return -1;
502     }
503
504     i = (INT_MAX / 2 + 128) >> 8;
505     if (avctx->mb_threshold >= i) {
506         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
507                i - 1);
508         return -1;
509     }
510
511     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
512         av_log(avctx, AV_LOG_INFO,
513                "notice: b_frame_strategy only affects the first pass\n");
514         avctx->b_frame_strategy = 0;
515     }
516
517     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
518     if (i > 1) {
519         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
520         avctx->time_base.den /= i;
521         avctx->time_base.num /= i;
522         //return -1;
523     }
524
525     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
526         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
527         // (a + x * 3 / 8) / x
528         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
529         s->inter_quant_bias = 0;
530     } else {
531         s->intra_quant_bias = 0;
532         // (a - x / 4) / x
533         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
534     }
535
536     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
537         s->intra_quant_bias = avctx->intra_quant_bias;
538     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
539         s->inter_quant_bias = avctx->inter_quant_bias;
540
541     av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
542                                      &chroma_v_shift);
543
544     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
545         s->avctx->time_base.den > (1 << 16) - 1) {
546         av_log(avctx, AV_LOG_ERROR,
547                "timebase %d/%d not supported by MPEG 4 standard, "
548                "the maximum admitted value for the timebase denominator "
549                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
550                (1 << 16) - 1);
551         return -1;
552     }
553     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
554
555     switch (avctx->codec->id) {
556     case AV_CODEC_ID_MPEG1VIDEO:
557         s->out_format = FMT_MPEG1;
558         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
559         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
560         break;
561     case AV_CODEC_ID_MPEG2VIDEO:
562         s->out_format = FMT_MPEG1;
563         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
564         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
565         s->rtp_mode   = 1;
566         break;
567     case AV_CODEC_ID_LJPEG:
568     case AV_CODEC_ID_MJPEG:
569         s->out_format = FMT_MJPEG;
570         s->intra_only = 1; /* force intra only for jpeg */
571         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
572             avctx->pix_fmt   == AV_PIX_FMT_BGRA) {
573             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
574             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
575             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
576         } else {
577             s->mjpeg_vsample[0] = 2;
578             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
579             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
580             s->mjpeg_hsample[0] = 2;
581             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
582             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
583         }
584         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
585             ff_mjpeg_encode_init(s) < 0)
586             return -1;
587         avctx->delay = 0;
588         s->low_delay = 1;
589         break;
590     case AV_CODEC_ID_H261:
591         if (!CONFIG_H261_ENCODER)
592             return -1;
593         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
594             av_log(avctx, AV_LOG_ERROR,
595                    "The specified picture size of %dx%d is not valid for the "
596                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
597                     s->width, s->height);
598             return -1;
599         }
600         s->out_format = FMT_H261;
601         avctx->delay  = 0;
602         s->low_delay  = 1;
603         break;
604     case AV_CODEC_ID_H263:
605         if (!CONFIG_H263_ENCODER)
606         return -1;
607         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
608                              s->width, s->height) == 8) {
609             av_log(avctx, AV_LOG_INFO,
610                    "The specified picture size of %dx%d is not valid for "
611                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
612                    "352x288, 704x576, and 1408x1152."
613                    "Try H.263+.\n", s->width, s->height);
614             return -1;
615         }
616         s->out_format = FMT_H263;
617         avctx->delay  = 0;
618         s->low_delay  = 1;
619         break;
620     case AV_CODEC_ID_H263P:
621         s->out_format = FMT_H263;
622         s->h263_plus  = 1;
623         /* Fx */
624         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
625         s->modified_quant  = s->h263_aic;
626         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
627         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
628
629         /* /Fx */
630         /* These are just to be sure */
631         avctx->delay = 0;
632         s->low_delay = 1;
633         break;
634     case AV_CODEC_ID_FLV1:
635         s->out_format      = FMT_H263;
636         s->h263_flv        = 2; /* format = 1; 11-bit codes */
637         s->unrestricted_mv = 1;
638         s->rtp_mode  = 0; /* don't allow GOB */
639         avctx->delay = 0;
640         s->low_delay = 1;
641         break;
642     case AV_CODEC_ID_RV10:
643         s->out_format = FMT_H263;
644         avctx->delay  = 0;
645         s->low_delay  = 1;
646         break;
647     case AV_CODEC_ID_RV20:
648         s->out_format      = FMT_H263;
649         avctx->delay       = 0;
650         s->low_delay       = 1;
651         s->modified_quant  = 1;
652         s->h263_aic        = 1;
653         s->h263_plus       = 1;
654         s->loop_filter     = 1;
655         s->unrestricted_mv = 0;
656         break;
657     case AV_CODEC_ID_MPEG4:
658         s->out_format      = FMT_H263;
659         s->h263_pred       = 1;
660         s->unrestricted_mv = 1;
661         s->low_delay       = s->max_b_frames ? 0 : 1;
662         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
663         break;
664     case AV_CODEC_ID_MSMPEG4V2:
665         s->out_format      = FMT_H263;
666         s->h263_pred       = 1;
667         s->unrestricted_mv = 1;
668         s->msmpeg4_version = 2;
669         avctx->delay       = 0;
670         s->low_delay       = 1;
671         break;
672     case AV_CODEC_ID_MSMPEG4V3:
673         s->out_format        = FMT_H263;
674         s->h263_pred         = 1;
675         s->unrestricted_mv   = 1;
676         s->msmpeg4_version   = 3;
677         s->flipflop_rounding = 1;
678         avctx->delay         = 0;
679         s->low_delay         = 1;
680         break;
681     case AV_CODEC_ID_WMV1:
682         s->out_format        = FMT_H263;
683         s->h263_pred         = 1;
684         s->unrestricted_mv   = 1;
685         s->msmpeg4_version   = 4;
686         s->flipflop_rounding = 1;
687         avctx->delay         = 0;
688         s->low_delay         = 1;
689         break;
690     case AV_CODEC_ID_WMV2:
691         s->out_format        = FMT_H263;
692         s->h263_pred         = 1;
693         s->unrestricted_mv   = 1;
694         s->msmpeg4_version   = 5;
695         s->flipflop_rounding = 1;
696         avctx->delay         = 0;
697         s->low_delay         = 1;
698         break;
699     default:
700         return -1;
701     }
702
703     avctx->has_b_frames = !s->low_delay;
704
705     s->encoding = 1;
706
707     s->progressive_frame    =
708     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
709                                                 CODEC_FLAG_INTERLACED_ME) ||
710                                 s->alternate_scan);
711
712     /* init */
713     if (ff_MPV_common_init(s) < 0)
714         return -1;
715
716     if (ARCH_X86)
717         ff_MPV_encode_init_x86(s);
718
719     s->avctx->coded_frame = &s->current_picture.f;
720
721     if (s->msmpeg4_version) {
722         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
723                           2 * 2 * (MAX_LEVEL + 1) *
724                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
725     }
726     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
727
728     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
729     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
730     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
731     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
732     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
733                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
734     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
735                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
736
737     if (s->avctx->noise_reduction) {
738         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
739                           2 * 64 * sizeof(uint16_t), fail);
740     }
741
742     ff_h263dsp_init(&s->h263dsp);
743     if (!s->dct_quantize)
744         s->dct_quantize = ff_dct_quantize_c;
745     if (!s->denoise_dct)
746         s->denoise_dct  = denoise_dct_c;
747     s->fast_dct_quantize = s->dct_quantize;
748     if (avctx->trellis)
749         s->dct_quantize  = dct_quantize_trellis_c;
750
751     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
752         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
753
754     s->quant_precision = 5;
755
756     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
757     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
758
759     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
760         ff_h261_encode_init(s);
761     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
762         ff_h263_encode_init(s);
763     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
764         ff_msmpeg4_encode_init(s);
765     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
766         && s->out_format == FMT_MPEG1)
767         ff_mpeg1_encode_init(s);
768
769     /* init q matrix */
770     for (i = 0; i < 64; i++) {
771         int j = s->dsp.idct_permutation[i];
772         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
773             s->mpeg_quant) {
774             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
775             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
776         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
777             s->intra_matrix[j] =
778             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
779         } else {
780             /* mpeg1/2 */
781             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
782             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
783         }
784         if (s->avctx->intra_matrix)
785             s->intra_matrix[j] = s->avctx->intra_matrix[i];
786         if (s->avctx->inter_matrix)
787             s->inter_matrix[j] = s->avctx->inter_matrix[i];
788     }
789
790     /* precompute matrix */
791     /* for mjpeg, we do include qscale in the matrix */
792     if (s->out_format != FMT_MJPEG) {
793         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
794                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
795                           31, 1);
796         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
797                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
798                           31, 0);
799     }
800
801     if (ff_rate_control_init(s) < 0)
802         return -1;
803
804 #if FF_API_ERROR_RATE
805     FF_DISABLE_DEPRECATION_WARNINGS
806     if (avctx->error_rate)
807         s->error_rate = avctx->error_rate;
808     FF_ENABLE_DEPRECATION_WARNINGS;
809 #endif
810
811     if (avctx->b_frame_strategy == 2) {
812         for (i = 0; i < s->max_b_frames + 2; i++) {
813             s->tmp_frames[i] = av_frame_alloc();
814             if (!s->tmp_frames[i])
815                 return AVERROR(ENOMEM);
816
817             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
818             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
819             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
820
821             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
822             if (ret < 0)
823                 return ret;
824         }
825     }
826
827     return 0;
828 fail:
829     ff_MPV_encode_end(avctx);
830     return AVERROR_UNKNOWN;
831 }
832
833 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
834 {
835     MpegEncContext *s = avctx->priv_data;
836     int i;
837
838     ff_rate_control_uninit(s);
839
840     ff_MPV_common_end(s);
841     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
842         s->out_format == FMT_MJPEG)
843         ff_mjpeg_encode_close(s);
844
845     av_freep(&avctx->extradata);
846
847     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
848         av_frame_free(&s->tmp_frames[i]);
849
850     return 0;
851 }
852
853 static int get_sae(uint8_t *src, int ref, int stride)
854 {
855     int x,y;
856     int acc = 0;
857
858     for (y = 0; y < 16; y++) {
859         for (x = 0; x < 16; x++) {
860             acc += FFABS(src[x + y * stride] - ref);
861         }
862     }
863
864     return acc;
865 }
866
867 static int get_intra_count(MpegEncContext *s, uint8_t *src,
868                            uint8_t *ref, int stride)
869 {
870     int x, y, w, h;
871     int acc = 0;
872
873     w = s->width  & ~15;
874     h = s->height & ~15;
875
876     for (y = 0; y < h; y += 16) {
877         for (x = 0; x < w; x += 16) {
878             int offset = x + y * stride;
879             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
880                                      16);
881             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
882             int sae  = get_sae(src + offset, mean, stride);
883
884             acc += sae + 500 < sad;
885         }
886     }
887     return acc;
888 }
889
890
891 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
892 {
893     Picture *pic = NULL;
894     int64_t pts;
895     int i, display_picture_number = 0, ret;
896     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
897                                                  (s->low_delay ? 0 : 1);
898     int direct = 1;
899
900     if (pic_arg) {
901         pts = pic_arg->pts;
902         display_picture_number = s->input_picture_number++;
903
904         if (pts != AV_NOPTS_VALUE) {
905             if (s->user_specified_pts != AV_NOPTS_VALUE) {
906                 int64_t time = pts;
907                 int64_t last = s->user_specified_pts;
908
909                 if (time <= last) {
910                     av_log(s->avctx, AV_LOG_ERROR,
911                            "Error, Invalid timestamp=%"PRId64", "
912                            "last=%"PRId64"\n", pts, s->user_specified_pts);
913                     return -1;
914                 }
915
916                 if (!s->low_delay && display_picture_number == 1)
917                     s->dts_delta = time - last;
918             }
919             s->user_specified_pts = pts;
920         } else {
921             if (s->user_specified_pts != AV_NOPTS_VALUE) {
922                 s->user_specified_pts =
923                 pts = s->user_specified_pts + 1;
924                 av_log(s->avctx, AV_LOG_INFO,
925                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
926                        pts);
927             } else {
928                 pts = display_picture_number;
929             }
930         }
931     }
932
933     if (pic_arg) {
934         if (!pic_arg->buf[0]);
935             direct = 0;
936         if (pic_arg->linesize[0] != s->linesize)
937             direct = 0;
938         if (pic_arg->linesize[1] != s->uvlinesize)
939             direct = 0;
940         if (pic_arg->linesize[2] != s->uvlinesize)
941             direct = 0;
942
943         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
944                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
945
946         if (direct) {
947             i = ff_find_unused_picture(s, 1);
948             if (i < 0)
949                 return i;
950
951             pic = &s->picture[i];
952             pic->reference = 3;
953
954             if ((ret = av_frame_ref(&pic->f, pic_arg)) < 0)
955                 return ret;
956             if (ff_alloc_picture(s, pic, 1) < 0) {
957                 return -1;
958             }
959         } else {
960             i = ff_find_unused_picture(s, 0);
961             if (i < 0)
962                 return i;
963
964             pic = &s->picture[i];
965             pic->reference = 3;
966
967             if (ff_alloc_picture(s, pic, 0) < 0) {
968                 return -1;
969             }
970
971             if (pic->f.data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
972                 pic->f.data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
973                 pic->f.data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
974                 // empty
975             } else {
976                 int h_chroma_shift, v_chroma_shift;
977                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
978                                                  &h_chroma_shift,
979                                                  &v_chroma_shift);
980
981                 for (i = 0; i < 3; i++) {
982                     int src_stride = pic_arg->linesize[i];
983                     int dst_stride = i ? s->uvlinesize : s->linesize;
984                     int h_shift = i ? h_chroma_shift : 0;
985                     int v_shift = i ? v_chroma_shift : 0;
986                     int w = s->width  >> h_shift;
987                     int h = s->height >> v_shift;
988                     uint8_t *src = pic_arg->data[i];
989                     uint8_t *dst = pic->f.data[i];
990
991                     if (!s->avctx->rc_buffer_size)
992                         dst += INPLACE_OFFSET;
993
994                     if (src_stride == dst_stride)
995                         memcpy(dst, src, src_stride * h);
996                     else {
997                         while (h--) {
998                             memcpy(dst, src, w);
999                             dst += dst_stride;
1000                             src += src_stride;
1001                         }
1002                     }
1003                 }
1004             }
1005         }
1006         ret = av_frame_copy_props(&pic->f, pic_arg);
1007         if (ret < 0)
1008             return ret;
1009
1010         pic->f.display_picture_number = display_picture_number;
1011         pic->f.pts = pts; // we set this here to avoid modifiying pic_arg
1012     }
1013
1014     /* shift buffer entries */
1015     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1016         s->input_picture[i - 1] = s->input_picture[i];
1017
1018     s->input_picture[encoding_delay] = (Picture*) pic;
1019
1020     return 0;
1021 }
1022
1023 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1024 {
1025     int x, y, plane;
1026     int score = 0;
1027     int64_t score64 = 0;
1028
1029     for (plane = 0; plane < 3; plane++) {
1030         const int stride = p->f.linesize[plane];
1031         const int bw = plane ? 1 : 2;
1032         for (y = 0; y < s->mb_height * bw; y++) {
1033             for (x = 0; x < s->mb_width * bw; x++) {
1034                 int off = p->shared ? 0 : 16;
1035                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1036                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1037                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1038
1039                 switch (s->avctx->frame_skip_exp) {
1040                 case 0: score    =  FFMAX(score, v);          break;
1041                 case 1: score   += FFABS(v);                  break;
1042                 case 2: score   += v * v;                     break;
1043                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1044                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1045                 }
1046             }
1047         }
1048     }
1049
1050     if (score)
1051         score64 = score;
1052
1053     if (score64 < s->avctx->frame_skip_threshold)
1054         return 1;
1055     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1056         return 1;
1057     return 0;
1058 }
1059
1060 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1061 {
1062     AVPacket pkt = { 0 };
1063     int ret, got_output;
1064
1065     av_init_packet(&pkt);
1066     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1067     if (ret < 0)
1068         return ret;
1069
1070     ret = pkt.size;
1071     av_free_packet(&pkt);
1072     return ret;
1073 }
1074
1075 static int estimate_best_b_count(MpegEncContext *s)
1076 {
1077     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1078     AVCodecContext *c = avcodec_alloc_context3(NULL);
1079     const int scale = s->avctx->brd_scale;
1080     int i, j, out_size, p_lambda, b_lambda, lambda2;
1081     int64_t best_rd  = INT64_MAX;
1082     int best_b_count = -1;
1083
1084     assert(scale >= 0 && scale <= 3);
1085
1086     //emms_c();
1087     //s->next_picture_ptr->quality;
1088     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1089     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1090     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1091     if (!b_lambda) // FIXME we should do this somewhere else
1092         b_lambda = p_lambda;
1093     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1094                FF_LAMBDA_SHIFT;
1095
1096     c->width        = s->width  >> scale;
1097     c->height       = s->height >> scale;
1098     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1099                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1100     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1101     c->mb_decision  = s->avctx->mb_decision;
1102     c->me_cmp       = s->avctx->me_cmp;
1103     c->mb_cmp       = s->avctx->mb_cmp;
1104     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1105     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1106     c->time_base    = s->avctx->time_base;
1107     c->max_b_frames = s->max_b_frames;
1108
1109     if (avcodec_open2(c, codec, NULL) < 0)
1110         return -1;
1111
1112     for (i = 0; i < s->max_b_frames + 2; i++) {
1113         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1114                                                 s->next_picture_ptr;
1115
1116         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1117             pre_input = *pre_input_ptr;
1118
1119             if (!pre_input.shared && i) {
1120                 pre_input.f.data[0] += INPLACE_OFFSET;
1121                 pre_input.f.data[1] += INPLACE_OFFSET;
1122                 pre_input.f.data[2] += INPLACE_OFFSET;
1123             }
1124
1125             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1126                                  pre_input.f.data[0], pre_input.f.linesize[0],
1127                                  c->width,      c->height);
1128             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1129                                  pre_input.f.data[1], pre_input.f.linesize[1],
1130                                  c->width >> 1, c->height >> 1);
1131             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1132                                  pre_input.f.data[2], pre_input.f.linesize[2],
1133                                  c->width >> 1, c->height >> 1);
1134         }
1135     }
1136
1137     for (j = 0; j < s->max_b_frames + 1; j++) {
1138         int64_t rd = 0;
1139
1140         if (!s->input_picture[j])
1141             break;
1142
1143         c->error[0] = c->error[1] = c->error[2] = 0;
1144
1145         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1146         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1147
1148         out_size = encode_frame(c, s->tmp_frames[0]);
1149
1150         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1151
1152         for (i = 0; i < s->max_b_frames + 1; i++) {
1153             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1154
1155             s->tmp_frames[i + 1]->pict_type = is_p ?
1156                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1157             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1158
1159             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1160
1161             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1162         }
1163
1164         /* get the delayed frames */
1165         while (out_size) {
1166             out_size = encode_frame(c, NULL);
1167             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1168         }
1169
1170         rd += c->error[0] + c->error[1] + c->error[2];
1171
1172         if (rd < best_rd) {
1173             best_rd = rd;
1174             best_b_count = j;
1175         }
1176     }
1177
1178     avcodec_close(c);
1179     av_freep(&c);
1180
1181     return best_b_count;
1182 }
1183
1184 static int select_input_picture(MpegEncContext *s)
1185 {
1186     int i, ret;
1187
1188     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1189         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1190     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1191
1192     /* set next picture type & ordering */
1193     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1194         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1195             s->next_picture_ptr == NULL || s->intra_only) {
1196             s->reordered_input_picture[0] = s->input_picture[0];
1197             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1198             s->reordered_input_picture[0]->f.coded_picture_number =
1199                 s->coded_picture_number++;
1200         } else {
1201             int b_frames;
1202
1203             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1204                 if (s->picture_in_gop_number < s->gop_size &&
1205                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1206                     // FIXME check that te gop check above is +-1 correct
1207                     av_frame_unref(&s->input_picture[0]->f);
1208
1209                     emms_c();
1210                     ff_vbv_update(s, 0);
1211
1212                     goto no_output_pic;
1213                 }
1214             }
1215
1216             if (s->flags & CODEC_FLAG_PASS2) {
1217                 for (i = 0; i < s->max_b_frames + 1; i++) {
1218                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1219
1220                     if (pict_num >= s->rc_context.num_entries)
1221                         break;
1222                     if (!s->input_picture[i]) {
1223                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1224                         break;
1225                     }
1226
1227                     s->input_picture[i]->f.pict_type =
1228                         s->rc_context.entry[pict_num].new_pict_type;
1229                 }
1230             }
1231
1232             if (s->avctx->b_frame_strategy == 0) {
1233                 b_frames = s->max_b_frames;
1234                 while (b_frames && !s->input_picture[b_frames])
1235                     b_frames--;
1236             } else if (s->avctx->b_frame_strategy == 1) {
1237                 for (i = 1; i < s->max_b_frames + 1; i++) {
1238                     if (s->input_picture[i] &&
1239                         s->input_picture[i]->b_frame_score == 0) {
1240                         s->input_picture[i]->b_frame_score =
1241                             get_intra_count(s,
1242                                             s->input_picture[i    ]->f.data[0],
1243                                             s->input_picture[i - 1]->f.data[0],
1244                                             s->linesize) + 1;
1245                     }
1246                 }
1247                 for (i = 0; i < s->max_b_frames + 1; i++) {
1248                     if (s->input_picture[i] == NULL ||
1249                         s->input_picture[i]->b_frame_score - 1 >
1250                             s->mb_num / s->avctx->b_sensitivity)
1251                         break;
1252                 }
1253
1254                 b_frames = FFMAX(0, i - 1);
1255
1256                 /* reset scores */
1257                 for (i = 0; i < b_frames + 1; i++) {
1258                     s->input_picture[i]->b_frame_score = 0;
1259                 }
1260             } else if (s->avctx->b_frame_strategy == 2) {
1261                 b_frames = estimate_best_b_count(s);
1262             } else {
1263                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1264                 b_frames = 0;
1265             }
1266
1267             emms_c();
1268
1269             for (i = b_frames - 1; i >= 0; i--) {
1270                 int type = s->input_picture[i]->f.pict_type;
1271                 if (type && type != AV_PICTURE_TYPE_B)
1272                     b_frames = i;
1273             }
1274             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1275                 b_frames == s->max_b_frames) {
1276                 av_log(s->avctx, AV_LOG_ERROR,
1277                        "warning, too many b frames in a row\n");
1278             }
1279
1280             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1281                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1282                     s->gop_size > s->picture_in_gop_number) {
1283                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1284                 } else {
1285                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1286                         b_frames = 0;
1287                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1288                 }
1289             }
1290
1291             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1292                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1293                 b_frames--;
1294
1295             s->reordered_input_picture[0] = s->input_picture[b_frames];
1296             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1297                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1298             s->reordered_input_picture[0]->f.coded_picture_number =
1299                 s->coded_picture_number++;
1300             for (i = 0; i < b_frames; i++) {
1301                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1302                 s->reordered_input_picture[i + 1]->f.pict_type =
1303                     AV_PICTURE_TYPE_B;
1304                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1305                     s->coded_picture_number++;
1306             }
1307         }
1308     }
1309 no_output_pic:
1310     if (s->reordered_input_picture[0]) {
1311         s->reordered_input_picture[0]->reference =
1312            s->reordered_input_picture[0]->f.pict_type !=
1313                AV_PICTURE_TYPE_B ? 3 : 0;
1314
1315         ff_mpeg_unref_picture(s, &s->new_picture);
1316         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1317             return ret;
1318
1319         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1320             // input is a shared pix, so we can't modifiy it -> alloc a new
1321             // one & ensure that the shared one is reuseable
1322
1323             Picture *pic;
1324             int i = ff_find_unused_picture(s, 0);
1325             if (i < 0)
1326                 return i;
1327             pic = &s->picture[i];
1328
1329             pic->reference = s->reordered_input_picture[0]->reference;
1330             if (ff_alloc_picture(s, pic, 0) < 0) {
1331                 return -1;
1332             }
1333
1334             ret = av_frame_copy_props(&pic->f, &s->reordered_input_picture[0]->f);
1335             if (ret < 0)
1336                 return ret;
1337
1338             /* mark us unused / free shared pic */
1339             av_frame_unref(&s->reordered_input_picture[0]->f);
1340             s->reordered_input_picture[0]->shared = 0;
1341
1342             s->current_picture_ptr = pic;
1343         } else {
1344             // input is not a shared pix -> reuse buffer for current_pix
1345             s->current_picture_ptr = s->reordered_input_picture[0];
1346             for (i = 0; i < 4; i++) {
1347                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1348             }
1349         }
1350         ff_mpeg_unref_picture(s, &s->current_picture);
1351         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1352                                        s->current_picture_ptr)) < 0)
1353             return ret;
1354
1355         s->picture_number = s->new_picture.f.display_picture_number;
1356     } else {
1357         ff_mpeg_unref_picture(s, &s->new_picture);
1358     }
1359     return 0;
1360 }
1361
1362 static void frame_end(MpegEncContext *s)
1363 {
1364     int i;
1365
1366     if (s->unrestricted_mv &&
1367         s->current_picture.reference &&
1368         !s->intra_only) {
1369         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1370         int hshift = desc->log2_chroma_w;
1371         int vshift = desc->log2_chroma_h;
1372         s->dsp.draw_edges(s->current_picture.f.data[0], s->linesize,
1373                           s->h_edge_pos, s->v_edge_pos,
1374                           EDGE_WIDTH, EDGE_WIDTH,
1375                           EDGE_TOP | EDGE_BOTTOM);
1376         s->dsp.draw_edges(s->current_picture.f.data[1], s->uvlinesize,
1377                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1378                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1379                           EDGE_TOP | EDGE_BOTTOM);
1380         s->dsp.draw_edges(s->current_picture.f.data[2], s->uvlinesize,
1381                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1382                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1383                           EDGE_TOP | EDGE_BOTTOM);
1384     }
1385
1386     emms_c();
1387
1388     s->last_pict_type                 = s->pict_type;
1389     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f.quality;
1390     if (s->pict_type!= AV_PICTURE_TYPE_B)
1391         s->last_non_b_pict_type = s->pict_type;
1392
1393     if (s->encoding) {
1394         /* release non-reference frames */
1395         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1396             if (!s->picture[i].reference)
1397                 ff_mpeg_unref_picture(s, &s->picture[i]);
1398         }
1399     }
1400
1401     s->avctx->coded_frame = &s->current_picture_ptr->f;
1402
1403 }
1404
1405 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1406                           const AVFrame *pic_arg, int *got_packet)
1407 {
1408     MpegEncContext *s = avctx->priv_data;
1409     int i, stuffing_count, ret;
1410     int context_count = s->slice_context_count;
1411
1412     s->picture_in_gop_number++;
1413
1414     if (load_input_picture(s, pic_arg) < 0)
1415         return -1;
1416
1417     if (select_input_picture(s) < 0) {
1418         return -1;
1419     }
1420
1421     /* output? */
1422     if (s->new_picture.f.data[0]) {
1423         if (!pkt->data &&
1424             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1425             return ret;
1426         if (s->mb_info) {
1427             s->mb_info_ptr = av_packet_new_side_data(pkt,
1428                                  AV_PKT_DATA_H263_MB_INFO,
1429                                  s->mb_width*s->mb_height*12);
1430             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1431         }
1432
1433         for (i = 0; i < context_count; i++) {
1434             int start_y = s->thread_context[i]->start_mb_y;
1435             int   end_y = s->thread_context[i]->  end_mb_y;
1436             int h       = s->mb_height;
1437             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1438             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1439
1440             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1441         }
1442
1443         s->pict_type = s->new_picture.f.pict_type;
1444         //emms_c();
1445         ff_MPV_frame_start(s, avctx);
1446 vbv_retry:
1447         if (encode_picture(s, s->picture_number) < 0)
1448             return -1;
1449
1450         avctx->header_bits = s->header_bits;
1451         avctx->mv_bits     = s->mv_bits;
1452         avctx->misc_bits   = s->misc_bits;
1453         avctx->i_tex_bits  = s->i_tex_bits;
1454         avctx->p_tex_bits  = s->p_tex_bits;
1455         avctx->i_count     = s->i_count;
1456         // FIXME f/b_count in avctx
1457         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1458         avctx->skip_count  = s->skip_count;
1459
1460         frame_end(s);
1461
1462         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1463             ff_mjpeg_encode_picture_trailer(s);
1464
1465         if (avctx->rc_buffer_size) {
1466             RateControlContext *rcc = &s->rc_context;
1467             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1468
1469             if (put_bits_count(&s->pb) > max_size &&
1470                 s->lambda < s->avctx->lmax) {
1471                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1472                                        (s->qscale + 1) / s->qscale);
1473                 if (s->adaptive_quant) {
1474                     int i;
1475                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1476                         s->lambda_table[i] =
1477                             FFMAX(s->lambda_table[i] + 1,
1478                                   s->lambda_table[i] * (s->qscale + 1) /
1479                                   s->qscale);
1480                 }
1481                 s->mb_skipped = 0;        // done in MPV_frame_start()
1482                 // done in encode_picture() so we must undo it
1483                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1484                     if (s->flipflop_rounding          ||
1485                         s->codec_id == AV_CODEC_ID_H263P ||
1486                         s->codec_id == AV_CODEC_ID_MPEG4)
1487                         s->no_rounding ^= 1;
1488                 }
1489                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1490                     s->time_base       = s->last_time_base;
1491                     s->last_non_b_time = s->time - s->pp_time;
1492                 }
1493                 for (i = 0; i < context_count; i++) {
1494                     PutBitContext *pb = &s->thread_context[i]->pb;
1495                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1496                 }
1497                 goto vbv_retry;
1498             }
1499
1500             assert(s->avctx->rc_max_rate);
1501         }
1502
1503         if (s->flags & CODEC_FLAG_PASS1)
1504             ff_write_pass1_stats(s);
1505
1506         for (i = 0; i < 4; i++) {
1507             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1508             avctx->error[i] += s->current_picture_ptr->f.error[i];
1509         }
1510
1511         if (s->flags & CODEC_FLAG_PASS1)
1512             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1513                    avctx->i_tex_bits + avctx->p_tex_bits ==
1514                        put_bits_count(&s->pb));
1515         flush_put_bits(&s->pb);
1516         s->frame_bits  = put_bits_count(&s->pb);
1517
1518         stuffing_count = ff_vbv_update(s, s->frame_bits);
1519         if (stuffing_count) {
1520             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1521                     stuffing_count + 50) {
1522                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1523                 return -1;
1524             }
1525
1526             switch (s->codec_id) {
1527             case AV_CODEC_ID_MPEG1VIDEO:
1528             case AV_CODEC_ID_MPEG2VIDEO:
1529                 while (stuffing_count--) {
1530                     put_bits(&s->pb, 8, 0);
1531                 }
1532             break;
1533             case AV_CODEC_ID_MPEG4:
1534                 put_bits(&s->pb, 16, 0);
1535                 put_bits(&s->pb, 16, 0x1C3);
1536                 stuffing_count -= 4;
1537                 while (stuffing_count--) {
1538                     put_bits(&s->pb, 8, 0xFF);
1539                 }
1540             break;
1541             default:
1542                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1543             }
1544             flush_put_bits(&s->pb);
1545             s->frame_bits  = put_bits_count(&s->pb);
1546         }
1547
1548         /* update mpeg1/2 vbv_delay for CBR */
1549         if (s->avctx->rc_max_rate                          &&
1550             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1551             s->out_format == FMT_MPEG1                     &&
1552             90000LL * (avctx->rc_buffer_size - 1) <=
1553                 s->avctx->rc_max_rate * 0xFFFFLL) {
1554             int vbv_delay, min_delay;
1555             double inbits  = s->avctx->rc_max_rate *
1556                              av_q2d(s->avctx->time_base);
1557             int    minbits = s->frame_bits - 8 *
1558                              (s->vbv_delay_ptr - s->pb.buf - 1);
1559             double bits    = s->rc_context.buffer_index + minbits - inbits;
1560
1561             if (bits < 0)
1562                 av_log(s->avctx, AV_LOG_ERROR,
1563                        "Internal error, negative bits\n");
1564
1565             assert(s->repeat_first_field == 0);
1566
1567             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1568             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1569                         s->avctx->rc_max_rate;
1570
1571             vbv_delay = FFMAX(vbv_delay, min_delay);
1572
1573             assert(vbv_delay < 0xFFFF);
1574
1575             s->vbv_delay_ptr[0] &= 0xF8;
1576             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1577             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1578             s->vbv_delay_ptr[2] &= 0x07;
1579             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1580             avctx->vbv_delay     = vbv_delay * 300;
1581         }
1582         s->total_bits     += s->frame_bits;
1583         avctx->frame_bits  = s->frame_bits;
1584
1585         pkt->pts = s->current_picture.f.pts;
1586         if (!s->low_delay) {
1587             if (!s->current_picture.f.coded_picture_number)
1588                 pkt->dts = pkt->pts - s->dts_delta;
1589             else
1590                 pkt->dts = s->reordered_pts;
1591             s->reordered_pts = s->input_picture[0]->f.pts;
1592         } else
1593             pkt->dts = pkt->pts;
1594         if (s->current_picture.f.key_frame)
1595             pkt->flags |= AV_PKT_FLAG_KEY;
1596         if (s->mb_info)
1597             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1598     } else {
1599         s->frame_bits = 0;
1600     }
1601     assert((s->frame_bits & 7) == 0);
1602
1603     pkt->size = s->frame_bits / 8;
1604     *got_packet = !!pkt->size;
1605     return 0;
1606 }
1607
1608 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1609                                                 int n, int threshold)
1610 {
1611     static const char tab[64] = {
1612         3, 2, 2, 1, 1, 1, 1, 1,
1613         1, 1, 1, 1, 1, 1, 1, 1,
1614         1, 1, 1, 1, 1, 1, 1, 1,
1615         0, 0, 0, 0, 0, 0, 0, 0,
1616         0, 0, 0, 0, 0, 0, 0, 0,
1617         0, 0, 0, 0, 0, 0, 0, 0,
1618         0, 0, 0, 0, 0, 0, 0, 0,
1619         0, 0, 0, 0, 0, 0, 0, 0
1620     };
1621     int score = 0;
1622     int run = 0;
1623     int i;
1624     int16_t *block = s->block[n];
1625     const int last_index = s->block_last_index[n];
1626     int skip_dc;
1627
1628     if (threshold < 0) {
1629         skip_dc = 0;
1630         threshold = -threshold;
1631     } else
1632         skip_dc = 1;
1633
1634     /* Are all we could set to zero already zero? */
1635     if (last_index <= skip_dc - 1)
1636         return;
1637
1638     for (i = 0; i <= last_index; i++) {
1639         const int j = s->intra_scantable.permutated[i];
1640         const int level = FFABS(block[j]);
1641         if (level == 1) {
1642             if (skip_dc && i == 0)
1643                 continue;
1644             score += tab[run];
1645             run = 0;
1646         } else if (level > 1) {
1647             return;
1648         } else {
1649             run++;
1650         }
1651     }
1652     if (score >= threshold)
1653         return;
1654     for (i = skip_dc; i <= last_index; i++) {
1655         const int j = s->intra_scantable.permutated[i];
1656         block[j] = 0;
1657     }
1658     if (block[0])
1659         s->block_last_index[n] = 0;
1660     else
1661         s->block_last_index[n] = -1;
1662 }
1663
1664 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1665                                int last_index)
1666 {
1667     int i;
1668     const int maxlevel = s->max_qcoeff;
1669     const int minlevel = s->min_qcoeff;
1670     int overflow = 0;
1671
1672     if (s->mb_intra) {
1673         i = 1; // skip clipping of intra dc
1674     } else
1675         i = 0;
1676
1677     for (; i <= last_index; i++) {
1678         const int j = s->intra_scantable.permutated[i];
1679         int level = block[j];
1680
1681         if (level > maxlevel) {
1682             level = maxlevel;
1683             overflow++;
1684         } else if (level < minlevel) {
1685             level = minlevel;
1686             overflow++;
1687         }
1688
1689         block[j] = level;
1690     }
1691
1692     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1693         av_log(s->avctx, AV_LOG_INFO,
1694                "warning, clipping %d dct coefficients to %d..%d\n",
1695                overflow, minlevel, maxlevel);
1696 }
1697
1698 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1699 {
1700     int x, y;
1701     // FIXME optimize
1702     for (y = 0; y < 8; y++) {
1703         for (x = 0; x < 8; x++) {
1704             int x2, y2;
1705             int sum = 0;
1706             int sqr = 0;
1707             int count = 0;
1708
1709             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1710                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1711                     int v = ptr[x2 + y2 * stride];
1712                     sum += v;
1713                     sqr += v * v;
1714                     count++;
1715                 }
1716             }
1717             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1718         }
1719     }
1720 }
1721
1722 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1723                                                 int motion_x, int motion_y,
1724                                                 int mb_block_height,
1725                                                 int mb_block_count)
1726 {
1727     int16_t weight[8][64];
1728     int16_t orig[8][64];
1729     const int mb_x = s->mb_x;
1730     const int mb_y = s->mb_y;
1731     int i;
1732     int skip_dct[8];
1733     int dct_offset = s->linesize * 8; // default for progressive frames
1734     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1735     ptrdiff_t wrap_y, wrap_c;
1736
1737     for (i = 0; i < mb_block_count; i++)
1738         skip_dct[i] = s->skipdct;
1739
1740     if (s->adaptive_quant) {
1741         const int last_qp = s->qscale;
1742         const int mb_xy = mb_x + mb_y * s->mb_stride;
1743
1744         s->lambda = s->lambda_table[mb_xy];
1745         update_qscale(s);
1746
1747         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1748             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1749             s->dquant = s->qscale - last_qp;
1750
1751             if (s->out_format == FMT_H263) {
1752                 s->dquant = av_clip(s->dquant, -2, 2);
1753
1754                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1755                     if (!s->mb_intra) {
1756                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1757                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1758                                 s->dquant = 0;
1759                         }
1760                         if (s->mv_type == MV_TYPE_8X8)
1761                             s->dquant = 0;
1762                     }
1763                 }
1764             }
1765         }
1766         ff_set_qscale(s, last_qp + s->dquant);
1767     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1768         ff_set_qscale(s, s->qscale + s->dquant);
1769
1770     wrap_y = s->linesize;
1771     wrap_c = s->uvlinesize;
1772     ptr_y  = s->new_picture.f.data[0] +
1773              (mb_y * 16 * wrap_y)              + mb_x * 16;
1774     ptr_cb = s->new_picture.f.data[1] +
1775              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1776     ptr_cr = s->new_picture.f.data[2] +
1777              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1778
1779     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1780         uint8_t *ebuf = s->edge_emu_buffer + 32;
1781         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1782                                  wrap_y, wrap_y,
1783                                  16, 16, mb_x * 16, mb_y * 16,
1784                                  s->width, s->height);
1785         ptr_y = ebuf;
1786         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1787                                  wrap_c, wrap_c,
1788                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1789                                  s->width >> 1, s->height >> 1);
1790         ptr_cb = ebuf + 18 * wrap_y;
1791         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1792                                  wrap_c, wrap_c,
1793                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1794                                  s->width >> 1, s->height >> 1);
1795         ptr_cr = ebuf + 18 * wrap_y + 8;
1796     }
1797
1798     if (s->mb_intra) {
1799         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1800             int progressive_score, interlaced_score;
1801
1802             s->interlaced_dct = 0;
1803             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1804                                                     NULL, wrap_y, 8) +
1805                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1806                                                     NULL, wrap_y, 8) - 400;
1807
1808             if (progressive_score > 0) {
1809                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1810                                                        NULL, wrap_y * 2, 8) +
1811                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1812                                                        NULL, wrap_y * 2, 8);
1813                 if (progressive_score > interlaced_score) {
1814                     s->interlaced_dct = 1;
1815
1816                     dct_offset = wrap_y;
1817                     wrap_y <<= 1;
1818                     if (s->chroma_format == CHROMA_422)
1819                         wrap_c <<= 1;
1820                 }
1821             }
1822         }
1823
1824         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1825         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1826         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1827         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1828
1829         if (s->flags & CODEC_FLAG_GRAY) {
1830             skip_dct[4] = 1;
1831             skip_dct[5] = 1;
1832         } else {
1833             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1834             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1835             if (!s->chroma_y_shift) { /* 422 */
1836                 s->dsp.get_pixels(s->block[6],
1837                                   ptr_cb + (dct_offset >> 1), wrap_c);
1838                 s->dsp.get_pixels(s->block[7],
1839                                   ptr_cr + (dct_offset >> 1), wrap_c);
1840             }
1841         }
1842     } else {
1843         op_pixels_func (*op_pix)[4];
1844         qpel_mc_func (*op_qpix)[16];
1845         uint8_t *dest_y, *dest_cb, *dest_cr;
1846
1847         dest_y  = s->dest[0];
1848         dest_cb = s->dest[1];
1849         dest_cr = s->dest[2];
1850
1851         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1852             op_pix  = s->hdsp.put_pixels_tab;
1853             op_qpix = s->dsp.put_qpel_pixels_tab;
1854         } else {
1855             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1856             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1857         }
1858
1859         if (s->mv_dir & MV_DIR_FORWARD) {
1860             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1861                           s->last_picture.f.data,
1862                           op_pix, op_qpix);
1863             op_pix  = s->hdsp.avg_pixels_tab;
1864             op_qpix = s->dsp.avg_qpel_pixels_tab;
1865         }
1866         if (s->mv_dir & MV_DIR_BACKWARD) {
1867             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1868                           s->next_picture.f.data,
1869                           op_pix, op_qpix);
1870         }
1871
1872         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1873             int progressive_score, interlaced_score;
1874
1875             s->interlaced_dct = 0;
1876             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1877                                                     ptr_y,              wrap_y,
1878                                                     8) +
1879                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1880                                                     ptr_y + wrap_y * 8, wrap_y,
1881                                                     8) - 400;
1882
1883             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1884                 progressive_score -= 400;
1885
1886             if (progressive_score > 0) {
1887                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1888                                                        ptr_y,
1889                                                        wrap_y * 2, 8) +
1890                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1891                                                        ptr_y + wrap_y,
1892                                                        wrap_y * 2, 8);
1893
1894                 if (progressive_score > interlaced_score) {
1895                     s->interlaced_dct = 1;
1896
1897                     dct_offset = wrap_y;
1898                     wrap_y <<= 1;
1899                     if (s->chroma_format == CHROMA_422)
1900                         wrap_c <<= 1;
1901                 }
1902             }
1903         }
1904
1905         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1906         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1907         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1908                            dest_y + dct_offset, wrap_y);
1909         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1910                            dest_y + dct_offset + 8, wrap_y);
1911
1912         if (s->flags & CODEC_FLAG_GRAY) {
1913             skip_dct[4] = 1;
1914             skip_dct[5] = 1;
1915         } else {
1916             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1917             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1918             if (!s->chroma_y_shift) { /* 422 */
1919                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1920                                    dest_cb + (dct_offset >> 1), wrap_c);
1921                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1922                                    dest_cr + (dct_offset >> 1), wrap_c);
1923             }
1924         }
1925         /* pre quantization */
1926         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1927                 2 * s->qscale * s->qscale) {
1928             // FIXME optimize
1929             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1930                               wrap_y, 8) < 20 * s->qscale)
1931                 skip_dct[0] = 1;
1932             if (s->dsp.sad[1](NULL, ptr_y + 8,
1933                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1934                 skip_dct[1] = 1;
1935             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1936                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1937                 skip_dct[2] = 1;
1938             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1939                               dest_y + dct_offset + 8,
1940                               wrap_y, 8) < 20 * s->qscale)
1941                 skip_dct[3] = 1;
1942             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1943                               wrap_c, 8) < 20 * s->qscale)
1944                 skip_dct[4] = 1;
1945             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1946                               wrap_c, 8) < 20 * s->qscale)
1947                 skip_dct[5] = 1;
1948             if (!s->chroma_y_shift) { /* 422 */
1949                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1950                                   dest_cb + (dct_offset >> 1),
1951                                   wrap_c, 8) < 20 * s->qscale)
1952                     skip_dct[6] = 1;
1953                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1954                                   dest_cr + (dct_offset >> 1),
1955                                   wrap_c, 8) < 20 * s->qscale)
1956                     skip_dct[7] = 1;
1957             }
1958         }
1959     }
1960
1961     if (s->quantizer_noise_shaping) {
1962         if (!skip_dct[0])
1963             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1964         if (!skip_dct[1])
1965             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1966         if (!skip_dct[2])
1967             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1968         if (!skip_dct[3])
1969             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1970         if (!skip_dct[4])
1971             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1972         if (!skip_dct[5])
1973             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1974         if (!s->chroma_y_shift) { /* 422 */
1975             if (!skip_dct[6])
1976                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1977                                   wrap_c);
1978             if (!skip_dct[7])
1979                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1980                                   wrap_c);
1981         }
1982         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
1983     }
1984
1985     /* DCT & quantize */
1986     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1987     {
1988         for (i = 0; i < mb_block_count; i++) {
1989             if (!skip_dct[i]) {
1990                 int overflow;
1991                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1992                 // FIXME we could decide to change to quantizer instead of
1993                 // clipping
1994                 // JS: I don't think that would be a good idea it could lower
1995                 //     quality instead of improve it. Just INTRADC clipping
1996                 //     deserves changes in quantizer
1997                 if (overflow)
1998                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
1999             } else
2000                 s->block_last_index[i] = -1;
2001         }
2002         if (s->quantizer_noise_shaping) {
2003             for (i = 0; i < mb_block_count; i++) {
2004                 if (!skip_dct[i]) {
2005                     s->block_last_index[i] =
2006                         dct_quantize_refine(s, s->block[i], weight[i],
2007                                             orig[i], i, s->qscale);
2008                 }
2009             }
2010         }
2011
2012         if (s->luma_elim_threshold && !s->mb_intra)
2013             for (i = 0; i < 4; i++)
2014                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2015         if (s->chroma_elim_threshold && !s->mb_intra)
2016             for (i = 4; i < mb_block_count; i++)
2017                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2018
2019         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2020             for (i = 0; i < mb_block_count; i++) {
2021                 if (s->block_last_index[i] == -1)
2022                     s->coded_score[i] = INT_MAX / 256;
2023             }
2024         }
2025     }
2026
2027     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2028         s->block_last_index[4] =
2029         s->block_last_index[5] = 0;
2030         s->block[4][0] =
2031         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2032     }
2033
2034     // non c quantize code returns incorrect block_last_index FIXME
2035     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2036         for (i = 0; i < mb_block_count; i++) {
2037             int j;
2038             if (s->block_last_index[i] > 0) {
2039                 for (j = 63; j > 0; j--) {
2040                     if (s->block[i][s->intra_scantable.permutated[j]])
2041                         break;
2042                 }
2043                 s->block_last_index[i] = j;
2044             }
2045         }
2046     }
2047
2048     /* huffman encode */
2049     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2050     case AV_CODEC_ID_MPEG1VIDEO:
2051     case AV_CODEC_ID_MPEG2VIDEO:
2052         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2053             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2054         break;
2055     case AV_CODEC_ID_MPEG4:
2056         if (CONFIG_MPEG4_ENCODER)
2057             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2058         break;
2059     case AV_CODEC_ID_MSMPEG4V2:
2060     case AV_CODEC_ID_MSMPEG4V3:
2061     case AV_CODEC_ID_WMV1:
2062         if (CONFIG_MSMPEG4_ENCODER)
2063             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2064         break;
2065     case AV_CODEC_ID_WMV2:
2066         if (CONFIG_WMV2_ENCODER)
2067             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2068         break;
2069     case AV_CODEC_ID_H261:
2070         if (CONFIG_H261_ENCODER)
2071             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2072         break;
2073     case AV_CODEC_ID_H263:
2074     case AV_CODEC_ID_H263P:
2075     case AV_CODEC_ID_FLV1:
2076     case AV_CODEC_ID_RV10:
2077     case AV_CODEC_ID_RV20:
2078         if (CONFIG_H263_ENCODER)
2079             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2080         break;
2081     case AV_CODEC_ID_MJPEG:
2082         if (CONFIG_MJPEG_ENCODER)
2083             ff_mjpeg_encode_mb(s, s->block);
2084         break;
2085     default:
2086         assert(0);
2087     }
2088 }
2089
2090 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2091 {
2092     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2093     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2094 }
2095
2096 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2097     int i;
2098
2099     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2100
2101     /* mpeg1 */
2102     d->mb_skip_run= s->mb_skip_run;
2103     for(i=0; i<3; i++)
2104         d->last_dc[i] = s->last_dc[i];
2105
2106     /* statistics */
2107     d->mv_bits= s->mv_bits;
2108     d->i_tex_bits= s->i_tex_bits;
2109     d->p_tex_bits= s->p_tex_bits;
2110     d->i_count= s->i_count;
2111     d->f_count= s->f_count;
2112     d->b_count= s->b_count;
2113     d->skip_count= s->skip_count;
2114     d->misc_bits= s->misc_bits;
2115     d->last_bits= 0;
2116
2117     d->mb_skipped= 0;
2118     d->qscale= s->qscale;
2119     d->dquant= s->dquant;
2120
2121     d->esc3_level_length= s->esc3_level_length;
2122 }
2123
2124 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2125     int i;
2126
2127     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2128     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2129
2130     /* mpeg1 */
2131     d->mb_skip_run= s->mb_skip_run;
2132     for(i=0; i<3; i++)
2133         d->last_dc[i] = s->last_dc[i];
2134
2135     /* statistics */
2136     d->mv_bits= s->mv_bits;
2137     d->i_tex_bits= s->i_tex_bits;
2138     d->p_tex_bits= s->p_tex_bits;
2139     d->i_count= s->i_count;
2140     d->f_count= s->f_count;
2141     d->b_count= s->b_count;
2142     d->skip_count= s->skip_count;
2143     d->misc_bits= s->misc_bits;
2144
2145     d->mb_intra= s->mb_intra;
2146     d->mb_skipped= s->mb_skipped;
2147     d->mv_type= s->mv_type;
2148     d->mv_dir= s->mv_dir;
2149     d->pb= s->pb;
2150     if(s->data_partitioning){
2151         d->pb2= s->pb2;
2152         d->tex_pb= s->tex_pb;
2153     }
2154     d->block= s->block;
2155     for(i=0; i<8; i++)
2156         d->block_last_index[i]= s->block_last_index[i];
2157     d->interlaced_dct= s->interlaced_dct;
2158     d->qscale= s->qscale;
2159
2160     d->esc3_level_length= s->esc3_level_length;
2161 }
2162
2163 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2164                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2165                            int *dmin, int *next_block, int motion_x, int motion_y)
2166 {
2167     int score;
2168     uint8_t *dest_backup[3];
2169
2170     copy_context_before_encode(s, backup, type);
2171
2172     s->block= s->blocks[*next_block];
2173     s->pb= pb[*next_block];
2174     if(s->data_partitioning){
2175         s->pb2   = pb2   [*next_block];
2176         s->tex_pb= tex_pb[*next_block];
2177     }
2178
2179     if(*next_block){
2180         memcpy(dest_backup, s->dest, sizeof(s->dest));
2181         s->dest[0] = s->rd_scratchpad;
2182         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2183         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2184         assert(s->linesize >= 32); //FIXME
2185     }
2186
2187     encode_mb(s, motion_x, motion_y);
2188
2189     score= put_bits_count(&s->pb);
2190     if(s->data_partitioning){
2191         score+= put_bits_count(&s->pb2);
2192         score+= put_bits_count(&s->tex_pb);
2193     }
2194
2195     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2196         ff_MPV_decode_mb(s, s->block);
2197
2198         score *= s->lambda2;
2199         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2200     }
2201
2202     if(*next_block){
2203         memcpy(s->dest, dest_backup, sizeof(s->dest));
2204     }
2205
2206     if(score<*dmin){
2207         *dmin= score;
2208         *next_block^=1;
2209
2210         copy_context_after_encode(best, s, type);
2211     }
2212 }
2213
2214 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2215     uint32_t *sq = ff_squareTbl + 256;
2216     int acc=0;
2217     int x,y;
2218
2219     if(w==16 && h==16)
2220         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2221     else if(w==8 && h==8)
2222         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2223
2224     for(y=0; y<h; y++){
2225         for(x=0; x<w; x++){
2226             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2227         }
2228     }
2229
2230     assert(acc>=0);
2231
2232     return acc;
2233 }
2234
2235 static int sse_mb(MpegEncContext *s){
2236     int w= 16;
2237     int h= 16;
2238
2239     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2240     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2241
2242     if(w==16 && h==16)
2243       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2244         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2245                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2246                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2247       }else{
2248         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2249                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2250                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2251       }
2252     else
2253         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2254                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2255                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2256 }
2257
2258 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2259     MpegEncContext *s= *(void**)arg;
2260
2261
2262     s->me.pre_pass=1;
2263     s->me.dia_size= s->avctx->pre_dia_size;
2264     s->first_slice_line=1;
2265     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2266         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2267             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2268         }
2269         s->first_slice_line=0;
2270     }
2271
2272     s->me.pre_pass=0;
2273
2274     return 0;
2275 }
2276
2277 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2278     MpegEncContext *s= *(void**)arg;
2279
2280     ff_check_alignment();
2281
2282     s->me.dia_size= s->avctx->dia_size;
2283     s->first_slice_line=1;
2284     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2285         s->mb_x=0; //for block init below
2286         ff_init_block_index(s);
2287         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2288             s->block_index[0]+=2;
2289             s->block_index[1]+=2;
2290             s->block_index[2]+=2;
2291             s->block_index[3]+=2;
2292
2293             /* compute motion vector & mb_type and store in context */
2294             if(s->pict_type==AV_PICTURE_TYPE_B)
2295                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2296             else
2297                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2298         }
2299         s->first_slice_line=0;
2300     }
2301     return 0;
2302 }
2303
2304 static int mb_var_thread(AVCodecContext *c, void *arg){
2305     MpegEncContext *s= *(void**)arg;
2306     int mb_x, mb_y;
2307
2308     ff_check_alignment();
2309
2310     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2311         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2312             int xx = mb_x * 16;
2313             int yy = mb_y * 16;
2314             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2315             int varc;
2316             int sum = s->dsp.pix_sum(pix, s->linesize);
2317
2318             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2319
2320             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2321             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2322             s->me.mb_var_sum_temp    += varc;
2323         }
2324     }
2325     return 0;
2326 }
2327
2328 static void write_slice_end(MpegEncContext *s){
2329     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2330         if(s->partitioned_frame){
2331             ff_mpeg4_merge_partitions(s);
2332         }
2333
2334         ff_mpeg4_stuffing(&s->pb);
2335     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2336         ff_mjpeg_encode_stuffing(&s->pb);
2337     }
2338
2339     avpriv_align_put_bits(&s->pb);
2340     flush_put_bits(&s->pb);
2341
2342     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2343         s->misc_bits+= get_bits_diff(s);
2344 }
2345
2346 static void write_mb_info(MpegEncContext *s)
2347 {
2348     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2349     int offset = put_bits_count(&s->pb);
2350     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2351     int gobn = s->mb_y / s->gob_index;
2352     int pred_x, pred_y;
2353     if (CONFIG_H263_ENCODER)
2354         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2355     bytestream_put_le32(&ptr, offset);
2356     bytestream_put_byte(&ptr, s->qscale);
2357     bytestream_put_byte(&ptr, gobn);
2358     bytestream_put_le16(&ptr, mba);
2359     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2360     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2361     /* 4MV not implemented */
2362     bytestream_put_byte(&ptr, 0); /* hmv2 */
2363     bytestream_put_byte(&ptr, 0); /* vmv2 */
2364 }
2365
2366 static void update_mb_info(MpegEncContext *s, int startcode)
2367 {
2368     if (!s->mb_info)
2369         return;
2370     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2371         s->mb_info_size += 12;
2372         s->prev_mb_info = s->last_mb_info;
2373     }
2374     if (startcode) {
2375         s->prev_mb_info = put_bits_count(&s->pb)/8;
2376         /* This might have incremented mb_info_size above, and we return without
2377          * actually writing any info into that slot yet. But in that case,
2378          * this will be called again at the start of the after writing the
2379          * start code, actually writing the mb info. */
2380         return;
2381     }
2382
2383     s->last_mb_info = put_bits_count(&s->pb)/8;
2384     if (!s->mb_info_size)
2385         s->mb_info_size += 12;
2386     write_mb_info(s);
2387 }
2388
2389 static int encode_thread(AVCodecContext *c, void *arg){
2390     MpegEncContext *s= *(void**)arg;
2391     int mb_x, mb_y, pdif = 0;
2392     int chr_h= 16>>s->chroma_y_shift;
2393     int i, j;
2394     MpegEncContext best_s, backup_s;
2395     uint8_t bit_buf[2][MAX_MB_BYTES];
2396     uint8_t bit_buf2[2][MAX_MB_BYTES];
2397     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2398     PutBitContext pb[2], pb2[2], tex_pb[2];
2399
2400     ff_check_alignment();
2401
2402     for(i=0; i<2; i++){
2403         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2404         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2405         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2406     }
2407
2408     s->last_bits= put_bits_count(&s->pb);
2409     s->mv_bits=0;
2410     s->misc_bits=0;
2411     s->i_tex_bits=0;
2412     s->p_tex_bits=0;
2413     s->i_count=0;
2414     s->f_count=0;
2415     s->b_count=0;
2416     s->skip_count=0;
2417
2418     for(i=0; i<3; i++){
2419         /* init last dc values */
2420         /* note: quant matrix value (8) is implied here */
2421         s->last_dc[i] = 128 << s->intra_dc_precision;
2422
2423         s->current_picture.f.error[i] = 0;
2424     }
2425     s->mb_skip_run = 0;
2426     memset(s->last_mv, 0, sizeof(s->last_mv));
2427
2428     s->last_mv_dir = 0;
2429
2430     switch(s->codec_id){
2431     case AV_CODEC_ID_H263:
2432     case AV_CODEC_ID_H263P:
2433     case AV_CODEC_ID_FLV1:
2434         if (CONFIG_H263_ENCODER)
2435             s->gob_index = ff_h263_get_gob_height(s);
2436         break;
2437     case AV_CODEC_ID_MPEG4:
2438         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2439             ff_mpeg4_init_partitions(s);
2440         break;
2441     }
2442
2443     s->resync_mb_x=0;
2444     s->resync_mb_y=0;
2445     s->first_slice_line = 1;
2446     s->ptr_lastgob = s->pb.buf;
2447     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2448         s->mb_x=0;
2449         s->mb_y= mb_y;
2450
2451         ff_set_qscale(s, s->qscale);
2452         ff_init_block_index(s);
2453
2454         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2455             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2456             int mb_type= s->mb_type[xy];
2457 //            int d;
2458             int dmin= INT_MAX;
2459             int dir;
2460
2461             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2462                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2463                 return -1;
2464             }
2465             if(s->data_partitioning){
2466                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2467                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2468                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2469                     return -1;
2470                 }
2471             }
2472
2473             s->mb_x = mb_x;
2474             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2475             ff_update_block_index(s);
2476
2477             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2478                 ff_h261_reorder_mb_index(s);
2479                 xy= s->mb_y*s->mb_stride + s->mb_x;
2480                 mb_type= s->mb_type[xy];
2481             }
2482
2483             /* write gob / video packet header  */
2484             if(s->rtp_mode){
2485                 int current_packet_size, is_gob_start;
2486
2487                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2488
2489                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2490
2491                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2492
2493                 switch(s->codec_id){
2494                 case AV_CODEC_ID_H263:
2495                 case AV_CODEC_ID_H263P:
2496                     if(!s->h263_slice_structured)
2497                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2498                     break;
2499                 case AV_CODEC_ID_MPEG2VIDEO:
2500                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2501                 case AV_CODEC_ID_MPEG1VIDEO:
2502                     if(s->mb_skip_run) is_gob_start=0;
2503                     break;
2504                 }
2505
2506                 if(is_gob_start){
2507                     if(s->start_mb_y != mb_y || mb_x!=0){
2508                         write_slice_end(s);
2509
2510                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2511                             ff_mpeg4_init_partitions(s);
2512                         }
2513                     }
2514
2515                     assert((put_bits_count(&s->pb)&7) == 0);
2516                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2517
2518                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2519                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2520                         int d = 100 / s->error_rate;
2521                         if(r % d == 0){
2522                             current_packet_size=0;
2523                             s->pb.buf_ptr= s->ptr_lastgob;
2524                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2525                         }
2526                     }
2527
2528                     if (s->avctx->rtp_callback){
2529                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2530                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2531                     }
2532                     update_mb_info(s, 1);
2533
2534                     switch(s->codec_id){
2535                     case AV_CODEC_ID_MPEG4:
2536                         if (CONFIG_MPEG4_ENCODER) {
2537                             ff_mpeg4_encode_video_packet_header(s);
2538                             ff_mpeg4_clean_buffers(s);
2539                         }
2540                     break;
2541                     case AV_CODEC_ID_MPEG1VIDEO:
2542                     case AV_CODEC_ID_MPEG2VIDEO:
2543                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2544                             ff_mpeg1_encode_slice_header(s);
2545                             ff_mpeg1_clean_buffers(s);
2546                         }
2547                     break;
2548                     case AV_CODEC_ID_H263:
2549                     case AV_CODEC_ID_H263P:
2550                         if (CONFIG_H263_ENCODER)
2551                             ff_h263_encode_gob_header(s, mb_y);
2552                     break;
2553                     }
2554
2555                     if(s->flags&CODEC_FLAG_PASS1){
2556                         int bits= put_bits_count(&s->pb);
2557                         s->misc_bits+= bits - s->last_bits;
2558                         s->last_bits= bits;
2559                     }
2560
2561                     s->ptr_lastgob += current_packet_size;
2562                     s->first_slice_line=1;
2563                     s->resync_mb_x=mb_x;
2564                     s->resync_mb_y=mb_y;
2565                 }
2566             }
2567
2568             if(  (s->resync_mb_x   == s->mb_x)
2569                && s->resync_mb_y+1 == s->mb_y){
2570                 s->first_slice_line=0;
2571             }
2572
2573             s->mb_skipped=0;
2574             s->dquant=0; //only for QP_RD
2575
2576             update_mb_info(s, 0);
2577
2578             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2579                 int next_block=0;
2580                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2581
2582                 copy_context_before_encode(&backup_s, s, -1);
2583                 backup_s.pb= s->pb;
2584                 best_s.data_partitioning= s->data_partitioning;
2585                 best_s.partitioned_frame= s->partitioned_frame;
2586                 if(s->data_partitioning){
2587                     backup_s.pb2= s->pb2;
2588                     backup_s.tex_pb= s->tex_pb;
2589                 }
2590
2591                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2592                     s->mv_dir = MV_DIR_FORWARD;
2593                     s->mv_type = MV_TYPE_16X16;
2594                     s->mb_intra= 0;
2595                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2596                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2597                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2598                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2599                 }
2600                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2601                     s->mv_dir = MV_DIR_FORWARD;
2602                     s->mv_type = MV_TYPE_FIELD;
2603                     s->mb_intra= 0;
2604                     for(i=0; i<2; i++){
2605                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2606                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2607                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2608                     }
2609                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2610                                  &dmin, &next_block, 0, 0);
2611                 }
2612                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2613                     s->mv_dir = MV_DIR_FORWARD;
2614                     s->mv_type = MV_TYPE_16X16;
2615                     s->mb_intra= 0;
2616                     s->mv[0][0][0] = 0;
2617                     s->mv[0][0][1] = 0;
2618                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2619                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2620                 }
2621                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2622                     s->mv_dir = MV_DIR_FORWARD;
2623                     s->mv_type = MV_TYPE_8X8;
2624                     s->mb_intra= 0;
2625                     for(i=0; i<4; i++){
2626                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2627                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2628                     }
2629                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2630                                  &dmin, &next_block, 0, 0);
2631                 }
2632                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2633                     s->mv_dir = MV_DIR_FORWARD;
2634                     s->mv_type = MV_TYPE_16X16;
2635                     s->mb_intra= 0;
2636                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2637                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2638                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2639                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2640                 }
2641                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2642                     s->mv_dir = MV_DIR_BACKWARD;
2643                     s->mv_type = MV_TYPE_16X16;
2644                     s->mb_intra= 0;
2645                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2646                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2647                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2648                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2649                 }
2650                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2651                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2652                     s->mv_type = MV_TYPE_16X16;
2653                     s->mb_intra= 0;
2654                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2655                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2656                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2657                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2658                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2659                                  &dmin, &next_block, 0, 0);
2660                 }
2661                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2662                     s->mv_dir = MV_DIR_FORWARD;
2663                     s->mv_type = MV_TYPE_FIELD;
2664                     s->mb_intra= 0;
2665                     for(i=0; i<2; i++){
2666                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2667                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2668                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2669                     }
2670                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2671                                  &dmin, &next_block, 0, 0);
2672                 }
2673                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2674                     s->mv_dir = MV_DIR_BACKWARD;
2675                     s->mv_type = MV_TYPE_FIELD;
2676                     s->mb_intra= 0;
2677                     for(i=0; i<2; i++){
2678                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2679                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2680                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2681                     }
2682                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2683                                  &dmin, &next_block, 0, 0);
2684                 }
2685                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2686                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2687                     s->mv_type = MV_TYPE_FIELD;
2688                     s->mb_intra= 0;
2689                     for(dir=0; dir<2; dir++){
2690                         for(i=0; i<2; i++){
2691                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2692                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2693                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2694                         }
2695                     }
2696                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2697                                  &dmin, &next_block, 0, 0);
2698                 }
2699                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2700                     s->mv_dir = 0;
2701                     s->mv_type = MV_TYPE_16X16;
2702                     s->mb_intra= 1;
2703                     s->mv[0][0][0] = 0;
2704                     s->mv[0][0][1] = 0;
2705                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2706                                  &dmin, &next_block, 0, 0);
2707                     if(s->h263_pred || s->h263_aic){
2708                         if(best_s.mb_intra)
2709                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2710                         else
2711                             ff_clean_intra_table_entries(s); //old mode?
2712                     }
2713                 }
2714
2715                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2716                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2717                         const int last_qp= backup_s.qscale;
2718                         int qpi, qp, dc[6];
2719                         int16_t ac[6][16];
2720                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2721                         static const int dquant_tab[4]={-1,1,-2,2};
2722
2723                         assert(backup_s.dquant == 0);
2724
2725                         //FIXME intra
2726                         s->mv_dir= best_s.mv_dir;
2727                         s->mv_type = MV_TYPE_16X16;
2728                         s->mb_intra= best_s.mb_intra;
2729                         s->mv[0][0][0] = best_s.mv[0][0][0];
2730                         s->mv[0][0][1] = best_s.mv[0][0][1];
2731                         s->mv[1][0][0] = best_s.mv[1][0][0];
2732                         s->mv[1][0][1] = best_s.mv[1][0][1];
2733
2734                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2735                         for(; qpi<4; qpi++){
2736                             int dquant= dquant_tab[qpi];
2737                             qp= last_qp + dquant;
2738                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2739                                 continue;
2740                             backup_s.dquant= dquant;
2741                             if(s->mb_intra && s->dc_val[0]){
2742                                 for(i=0; i<6; i++){
2743                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2744                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2745                                 }
2746                             }
2747
2748                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2749                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2750                             if(best_s.qscale != qp){
2751                                 if(s->mb_intra && s->dc_val[0]){
2752                                     for(i=0; i<6; i++){
2753                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2754                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2755                                     }
2756                                 }
2757                             }
2758                         }
2759                     }
2760                 }
2761                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2762                     int mx= s->b_direct_mv_table[xy][0];
2763                     int my= s->b_direct_mv_table[xy][1];
2764
2765                     backup_s.dquant = 0;
2766                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2767                     s->mb_intra= 0;
2768                     ff_mpeg4_set_direct_mv(s, mx, my);
2769                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2770                                  &dmin, &next_block, mx, my);
2771                 }
2772                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2773                     backup_s.dquant = 0;
2774                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2775                     s->mb_intra= 0;
2776                     ff_mpeg4_set_direct_mv(s, 0, 0);
2777                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2778                                  &dmin, &next_block, 0, 0);
2779                 }
2780                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2781                     int coded=0;
2782                     for(i=0; i<6; i++)
2783                         coded |= s->block_last_index[i];
2784                     if(coded){
2785                         int mx,my;
2786                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2787                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2788                             mx=my=0; //FIXME find the one we actually used
2789                             ff_mpeg4_set_direct_mv(s, mx, my);
2790                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2791                             mx= s->mv[1][0][0];
2792                             my= s->mv[1][0][1];
2793                         }else{
2794                             mx= s->mv[0][0][0];
2795                             my= s->mv[0][0][1];
2796                         }
2797
2798                         s->mv_dir= best_s.mv_dir;
2799                         s->mv_type = best_s.mv_type;
2800                         s->mb_intra= 0;
2801 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2802                         s->mv[0][0][1] = best_s.mv[0][0][1];
2803                         s->mv[1][0][0] = best_s.mv[1][0][0];
2804                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2805                         backup_s.dquant= 0;
2806                         s->skipdct=1;
2807                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2808                                         &dmin, &next_block, mx, my);
2809                         s->skipdct=0;
2810                     }
2811                 }
2812
2813                 s->current_picture.qscale_table[xy] = best_s.qscale;
2814
2815                 copy_context_after_encode(s, &best_s, -1);
2816
2817                 pb_bits_count= put_bits_count(&s->pb);
2818                 flush_put_bits(&s->pb);
2819                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2820                 s->pb= backup_s.pb;
2821
2822                 if(s->data_partitioning){
2823                     pb2_bits_count= put_bits_count(&s->pb2);
2824                     flush_put_bits(&s->pb2);
2825                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2826                     s->pb2= backup_s.pb2;
2827
2828                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2829                     flush_put_bits(&s->tex_pb);
2830                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2831                     s->tex_pb= backup_s.tex_pb;
2832                 }
2833                 s->last_bits= put_bits_count(&s->pb);
2834
2835                 if (CONFIG_H263_ENCODER &&
2836                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2837                     ff_h263_update_motion_val(s);
2838
2839                 if(next_block==0){ //FIXME 16 vs linesize16
2840                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2841                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2842                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2843                 }
2844
2845                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2846                     ff_MPV_decode_mb(s, s->block);
2847             } else {
2848                 int motion_x = 0, motion_y = 0;
2849                 s->mv_type=MV_TYPE_16X16;
2850                 // only one MB-Type possible
2851
2852                 switch(mb_type){
2853                 case CANDIDATE_MB_TYPE_INTRA:
2854                     s->mv_dir = 0;
2855                     s->mb_intra= 1;
2856                     motion_x= s->mv[0][0][0] = 0;
2857                     motion_y= s->mv[0][0][1] = 0;
2858                     break;
2859                 case CANDIDATE_MB_TYPE_INTER:
2860                     s->mv_dir = MV_DIR_FORWARD;
2861                     s->mb_intra= 0;
2862                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2863                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2864                     break;
2865                 case CANDIDATE_MB_TYPE_INTER_I:
2866                     s->mv_dir = MV_DIR_FORWARD;
2867                     s->mv_type = MV_TYPE_FIELD;
2868                     s->mb_intra= 0;
2869                     for(i=0; i<2; i++){
2870                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2871                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2872                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2873                     }
2874                     break;
2875                 case CANDIDATE_MB_TYPE_INTER4V:
2876                     s->mv_dir = MV_DIR_FORWARD;
2877                     s->mv_type = MV_TYPE_8X8;
2878                     s->mb_intra= 0;
2879                     for(i=0; i<4; i++){
2880                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2881                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2882                     }
2883                     break;
2884                 case CANDIDATE_MB_TYPE_DIRECT:
2885                     if (CONFIG_MPEG4_ENCODER) {
2886                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2887                         s->mb_intra= 0;
2888                         motion_x=s->b_direct_mv_table[xy][0];
2889                         motion_y=s->b_direct_mv_table[xy][1];
2890                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2891                     }
2892                     break;
2893                 case CANDIDATE_MB_TYPE_DIRECT0:
2894                     if (CONFIG_MPEG4_ENCODER) {
2895                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2896                         s->mb_intra= 0;
2897                         ff_mpeg4_set_direct_mv(s, 0, 0);
2898                     }
2899                     break;
2900                 case CANDIDATE_MB_TYPE_BIDIR:
2901                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2902                     s->mb_intra= 0;
2903                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2904                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2905                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2906                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2907                     break;
2908                 case CANDIDATE_MB_TYPE_BACKWARD:
2909                     s->mv_dir = MV_DIR_BACKWARD;
2910                     s->mb_intra= 0;
2911                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2912                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2913                     break;
2914                 case CANDIDATE_MB_TYPE_FORWARD:
2915                     s->mv_dir = MV_DIR_FORWARD;
2916                     s->mb_intra= 0;
2917                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2918                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2919                     break;
2920                 case CANDIDATE_MB_TYPE_FORWARD_I:
2921                     s->mv_dir = MV_DIR_FORWARD;
2922                     s->mv_type = MV_TYPE_FIELD;
2923                     s->mb_intra= 0;
2924                     for(i=0; i<2; i++){
2925                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2926                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2927                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2928                     }
2929                     break;
2930                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2931                     s->mv_dir = MV_DIR_BACKWARD;
2932                     s->mv_type = MV_TYPE_FIELD;
2933                     s->mb_intra= 0;
2934                     for(i=0; i<2; i++){
2935                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2936                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2937                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2938                     }
2939                     break;
2940                 case CANDIDATE_MB_TYPE_BIDIR_I:
2941                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2942                     s->mv_type = MV_TYPE_FIELD;
2943                     s->mb_intra= 0;
2944                     for(dir=0; dir<2; dir++){
2945                         for(i=0; i<2; i++){
2946                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2947                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2948                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2949                         }
2950                     }
2951                     break;
2952                 default:
2953                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2954                 }
2955
2956                 encode_mb(s, motion_x, motion_y);
2957
2958                 // RAL: Update last macroblock type
2959                 s->last_mv_dir = s->mv_dir;
2960
2961                 if (CONFIG_H263_ENCODER &&
2962                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2963                     ff_h263_update_motion_val(s);
2964
2965                 ff_MPV_decode_mb(s, s->block);
2966             }
2967
2968             /* clean the MV table in IPS frames for direct mode in B frames */
2969             if(s->mb_intra /* && I,P,S_TYPE */){
2970                 s->p_mv_table[xy][0]=0;
2971                 s->p_mv_table[xy][1]=0;
2972             }
2973
2974             if(s->flags&CODEC_FLAG_PSNR){
2975                 int w= 16;
2976                 int h= 16;
2977
2978                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2979                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2980
2981                 s->current_picture.f.error[0] += sse(
2982                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2983                     s->dest[0], w, h, s->linesize);
2984                 s->current_picture.f.error[1] += sse(
2985                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2986                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2987                 s->current_picture.f.error[2] += sse(
2988                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2989                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2990             }
2991             if(s->loop_filter){
2992                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2993                     ff_h263_loop_filter(s);
2994             }
2995             av_dlog(s->avctx, "MB %d %d bits\n",
2996                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
2997         }
2998     }
2999
3000     //not beautiful here but we must write it before flushing so it has to be here
3001     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3002         ff_msmpeg4_encode_ext_header(s);
3003
3004     write_slice_end(s);
3005
3006     /* Send the last GOB if RTP */
3007     if (s->avctx->rtp_callback) {
3008         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3009         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3010         /* Call the RTP callback to send the last GOB */
3011         emms_c();
3012         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3013     }
3014
3015     return 0;
3016 }
3017
3018 #define MERGE(field) dst->field += src->field; src->field=0
3019 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3020     MERGE(me.scene_change_score);
3021     MERGE(me.mc_mb_var_sum_temp);
3022     MERGE(me.mb_var_sum_temp);
3023 }
3024
3025 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3026     int i;
3027
3028     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3029     MERGE(dct_count[1]);
3030     MERGE(mv_bits);
3031     MERGE(i_tex_bits);
3032     MERGE(p_tex_bits);
3033     MERGE(i_count);
3034     MERGE(f_count);
3035     MERGE(b_count);
3036     MERGE(skip_count);
3037     MERGE(misc_bits);
3038     MERGE(er.error_count);
3039     MERGE(padding_bug_score);
3040     MERGE(current_picture.f.error[0]);
3041     MERGE(current_picture.f.error[1]);
3042     MERGE(current_picture.f.error[2]);
3043
3044     if(dst->avctx->noise_reduction){
3045         for(i=0; i<64; i++){
3046             MERGE(dct_error_sum[0][i]);
3047             MERGE(dct_error_sum[1][i]);
3048         }
3049     }
3050
3051     assert(put_bits_count(&src->pb) % 8 ==0);
3052     assert(put_bits_count(&dst->pb) % 8 ==0);
3053     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3054     flush_put_bits(&dst->pb);
3055 }
3056
3057 static int estimate_qp(MpegEncContext *s, int dry_run){
3058     if (s->next_lambda){
3059         s->current_picture_ptr->f.quality =
3060         s->current_picture.f.quality = s->next_lambda;
3061         if(!dry_run) s->next_lambda= 0;
3062     } else if (!s->fixed_qscale) {
3063         s->current_picture_ptr->f.quality =
3064         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3065         if (s->current_picture.f.quality < 0)
3066             return -1;
3067     }
3068
3069     if(s->adaptive_quant){
3070         switch(s->codec_id){
3071         case AV_CODEC_ID_MPEG4:
3072             if (CONFIG_MPEG4_ENCODER)
3073                 ff_clean_mpeg4_qscales(s);
3074             break;
3075         case AV_CODEC_ID_H263:
3076         case AV_CODEC_ID_H263P:
3077         case AV_CODEC_ID_FLV1:
3078             if (CONFIG_H263_ENCODER)
3079                 ff_clean_h263_qscales(s);
3080             break;
3081         default:
3082             ff_init_qscale_tab(s);
3083         }
3084
3085         s->lambda= s->lambda_table[0];
3086         //FIXME broken
3087     }else
3088         s->lambda = s->current_picture.f.quality;
3089     update_qscale(s);
3090     return 0;
3091 }
3092
3093 /* must be called before writing the header */
3094 static void set_frame_distances(MpegEncContext * s){
3095     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3096     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3097
3098     if(s->pict_type==AV_PICTURE_TYPE_B){
3099         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3100         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3101     }else{
3102         s->pp_time= s->time - s->last_non_b_time;
3103         s->last_non_b_time= s->time;
3104         assert(s->picture_number==0 || s->pp_time > 0);
3105     }
3106 }
3107
3108 static int encode_picture(MpegEncContext *s, int picture_number)
3109 {
3110     int i, ret;
3111     int bits;
3112     int context_count = s->slice_context_count;
3113
3114     s->picture_number = picture_number;
3115
3116     /* Reset the average MB variance */
3117     s->me.mb_var_sum_temp    =
3118     s->me.mc_mb_var_sum_temp = 0;
3119
3120     /* we need to initialize some time vars before we can encode b-frames */
3121     // RAL: Condition added for MPEG1VIDEO
3122     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3123         set_frame_distances(s);
3124     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3125         ff_set_mpeg4_time(s);
3126
3127     s->me.scene_change_score=0;
3128
3129 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3130
3131     if(s->pict_type==AV_PICTURE_TYPE_I){
3132         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3133         else                        s->no_rounding=0;
3134     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3135         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3136             s->no_rounding ^= 1;
3137     }
3138
3139     if(s->flags & CODEC_FLAG_PASS2){
3140         if (estimate_qp(s,1) < 0)
3141             return -1;
3142         ff_get_2pass_fcode(s);
3143     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3144         if(s->pict_type==AV_PICTURE_TYPE_B)
3145             s->lambda= s->last_lambda_for[s->pict_type];
3146         else
3147             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3148         update_qscale(s);
3149     }
3150
3151     s->mb_intra=0; //for the rate distortion & bit compare functions
3152     for(i=1; i<context_count; i++){
3153         ret = ff_update_duplicate_context(s->thread_context[i], s);
3154         if (ret < 0)
3155             return ret;
3156     }
3157
3158     if(ff_init_me(s)<0)
3159         return -1;
3160
3161     /* Estimate motion for every MB */
3162     if(s->pict_type != AV_PICTURE_TYPE_I){
3163         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3164         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3165         if (s->pict_type != AV_PICTURE_TYPE_B) {
3166             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3167                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3168             }
3169         }
3170
3171         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3172     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3173         /* I-Frame */
3174         for(i=0; i<s->mb_stride*s->mb_height; i++)
3175             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3176
3177         if(!s->fixed_qscale){
3178             /* finding spatial complexity for I-frame rate control */
3179             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3180         }
3181     }
3182     for(i=1; i<context_count; i++){
3183         merge_context_after_me(s, s->thread_context[i]);
3184     }
3185     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3186     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3187     emms_c();
3188
3189     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3190         s->pict_type= AV_PICTURE_TYPE_I;
3191         for(i=0; i<s->mb_stride*s->mb_height; i++)
3192             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3193         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3194                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3195     }
3196
3197     if(!s->umvplus){
3198         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3199             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3200
3201             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3202                 int a,b;
3203                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3204                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3205                 s->f_code= FFMAX3(s->f_code, a, b);
3206             }
3207
3208             ff_fix_long_p_mvs(s);
3209             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3210             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3211                 int j;
3212                 for(i=0; i<2; i++){
3213                     for(j=0; j<2; j++)
3214                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3215                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3216                 }
3217             }
3218         }
3219
3220         if(s->pict_type==AV_PICTURE_TYPE_B){
3221             int a, b;
3222
3223             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3224             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3225             s->f_code = FFMAX(a, b);
3226
3227             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3228             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3229             s->b_code = FFMAX(a, b);
3230
3231             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3232             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3233             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3234             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3235             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3236                 int dir, j;
3237                 for(dir=0; dir<2; dir++){
3238                     for(i=0; i<2; i++){
3239                         for(j=0; j<2; j++){
3240                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3241                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3242                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3243                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3244                         }
3245                     }
3246                 }
3247             }
3248         }
3249     }
3250
3251     if (estimate_qp(s, 0) < 0)
3252         return -1;
3253
3254     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3255         s->qscale= 3; //reduce clipping problems
3256
3257     if (s->out_format == FMT_MJPEG) {
3258         /* for mjpeg, we do include qscale in the matrix */
3259         for(i=1;i<64;i++){
3260             int j= s->dsp.idct_permutation[i];
3261
3262             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3263         }
3264         s->y_dc_scale_table=
3265         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3266         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3267         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3268                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3269         s->qscale= 8;
3270     }
3271
3272     //FIXME var duplication
3273     s->current_picture_ptr->f.key_frame =
3274     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3275     s->current_picture_ptr->f.pict_type =
3276     s->current_picture.f.pict_type = s->pict_type;
3277
3278     if (s->current_picture.f.key_frame)
3279         s->picture_in_gop_number=0;
3280
3281     s->last_bits= put_bits_count(&s->pb);
3282     switch(s->out_format) {
3283     case FMT_MJPEG:
3284         if (CONFIG_MJPEG_ENCODER)
3285             ff_mjpeg_encode_picture_header(s);
3286         break;
3287     case FMT_H261:
3288         if (CONFIG_H261_ENCODER)
3289             ff_h261_encode_picture_header(s, picture_number);
3290         break;
3291     case FMT_H263:
3292         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3293             ff_wmv2_encode_picture_header(s, picture_number);
3294         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3295             ff_msmpeg4_encode_picture_header(s, picture_number);
3296         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3297             ff_mpeg4_encode_picture_header(s, picture_number);
3298         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3299             ff_rv10_encode_picture_header(s, picture_number);
3300         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3301             ff_rv20_encode_picture_header(s, picture_number);
3302         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3303             ff_flv_encode_picture_header(s, picture_number);
3304         else if (CONFIG_H263_ENCODER)
3305             ff_h263_encode_picture_header(s, picture_number);
3306         break;
3307     case FMT_MPEG1:
3308         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3309             ff_mpeg1_encode_picture_header(s, picture_number);
3310         break;
3311     default:
3312         assert(0);
3313     }
3314     bits= put_bits_count(&s->pb);
3315     s->header_bits= bits - s->last_bits;
3316
3317     for(i=1; i<context_count; i++){
3318         update_duplicate_context_after_me(s->thread_context[i], s);
3319     }
3320     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3321     for(i=1; i<context_count; i++){
3322         merge_context_after_encode(s, s->thread_context[i]);
3323     }
3324     emms_c();
3325     return 0;
3326 }
3327
3328 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3329     const int intra= s->mb_intra;
3330     int i;
3331
3332     s->dct_count[intra]++;
3333
3334     for(i=0; i<64; i++){
3335         int level= block[i];
3336
3337         if(level){
3338             if(level>0){
3339                 s->dct_error_sum[intra][i] += level;
3340                 level -= s->dct_offset[intra][i];
3341                 if(level<0) level=0;
3342             }else{
3343                 s->dct_error_sum[intra][i] -= level;
3344                 level += s->dct_offset[intra][i];
3345                 if(level>0) level=0;
3346             }
3347             block[i]= level;
3348         }
3349     }
3350 }
3351
3352 static int dct_quantize_trellis_c(MpegEncContext *s,
3353                                   int16_t *block, int n,
3354                                   int qscale, int *overflow){
3355     const int *qmat;
3356     const uint8_t *scantable= s->intra_scantable.scantable;
3357     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3358     int max=0;
3359     unsigned int threshold1, threshold2;
3360     int bias=0;
3361     int run_tab[65];
3362     int level_tab[65];
3363     int score_tab[65];
3364     int survivor[65];
3365     int survivor_count;
3366     int last_run=0;
3367     int last_level=0;
3368     int last_score= 0;
3369     int last_i;
3370     int coeff[2][64];
3371     int coeff_count[64];
3372     int qmul, qadd, start_i, last_non_zero, i, dc;
3373     const int esc_length= s->ac_esc_length;
3374     uint8_t * length;
3375     uint8_t * last_length;
3376     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3377
3378     s->dsp.fdct (block);
3379
3380     if(s->dct_error_sum)
3381         s->denoise_dct(s, block);
3382     qmul= qscale*16;
3383     qadd= ((qscale-1)|1)*8;
3384
3385     if (s->mb_intra) {
3386         int q;
3387         if (!s->h263_aic) {
3388             if (n < 4)
3389                 q = s->y_dc_scale;
3390             else
3391                 q = s->c_dc_scale;
3392             q = q << 3;
3393         } else{
3394             /* For AIC we skip quant/dequant of INTRADC */
3395             q = 1 << 3;
3396             qadd=0;
3397         }
3398
3399         /* note: block[0] is assumed to be positive */
3400         block[0] = (block[0] + (q >> 1)) / q;
3401         start_i = 1;
3402         last_non_zero = 0;
3403         qmat = s->q_intra_matrix[qscale];
3404         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3405             bias= 1<<(QMAT_SHIFT-1);
3406         length     = s->intra_ac_vlc_length;
3407         last_length= s->intra_ac_vlc_last_length;
3408     } else {
3409         start_i = 0;
3410         last_non_zero = -1;
3411         qmat = s->q_inter_matrix[qscale];
3412         length     = s->inter_ac_vlc_length;
3413         last_length= s->inter_ac_vlc_last_length;
3414     }
3415     last_i= start_i;
3416
3417     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3418     threshold2= (threshold1<<1);
3419
3420     for(i=63; i>=start_i; i--) {
3421         const int j = scantable[i];
3422         int level = block[j] * qmat[j];
3423
3424         if(((unsigned)(level+threshold1))>threshold2){
3425             last_non_zero = i;
3426             break;
3427         }
3428     }
3429
3430     for(i=start_i; i<=last_non_zero; i++) {
3431         const int j = scantable[i];
3432         int level = block[j] * qmat[j];
3433
3434 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3435 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3436         if(((unsigned)(level+threshold1))>threshold2){
3437             if(level>0){
3438                 level= (bias + level)>>QMAT_SHIFT;
3439                 coeff[0][i]= level;
3440                 coeff[1][i]= level-1;
3441 //                coeff[2][k]= level-2;
3442             }else{
3443                 level= (bias - level)>>QMAT_SHIFT;
3444                 coeff[0][i]= -level;
3445                 coeff[1][i]= -level+1;
3446 //                coeff[2][k]= -level+2;
3447             }
3448             coeff_count[i]= FFMIN(level, 2);
3449             assert(coeff_count[i]);
3450             max |=level;
3451         }else{
3452             coeff[0][i]= (level>>31)|1;
3453             coeff_count[i]= 1;
3454         }
3455     }
3456
3457     *overflow= s->max_qcoeff < max; //overflow might have happened
3458
3459     if(last_non_zero < start_i){
3460         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3461         return last_non_zero;
3462     }
3463
3464     score_tab[start_i]= 0;
3465     survivor[0]= start_i;
3466     survivor_count= 1;
3467
3468     for(i=start_i; i<=last_non_zero; i++){
3469         int level_index, j, zero_distortion;
3470         int dct_coeff= FFABS(block[ scantable[i] ]);
3471         int best_score=256*256*256*120;
3472
3473         if (s->dsp.fdct == ff_fdct_ifast)
3474             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3475         zero_distortion= dct_coeff*dct_coeff;
3476
3477         for(level_index=0; level_index < coeff_count[i]; level_index++){
3478             int distortion;
3479             int level= coeff[level_index][i];
3480             const int alevel= FFABS(level);
3481             int unquant_coeff;
3482
3483             assert(level);
3484
3485             if(s->out_format == FMT_H263){
3486                 unquant_coeff= alevel*qmul + qadd;
3487             }else{ //MPEG1
3488                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3489                 if(s->mb_intra){
3490                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3491                         unquant_coeff =   (unquant_coeff - 1) | 1;
3492                 }else{
3493                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3494                         unquant_coeff =   (unquant_coeff - 1) | 1;
3495                 }
3496                 unquant_coeff<<= 3;
3497             }
3498
3499             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3500             level+=64;
3501             if((level&(~127)) == 0){
3502                 for(j=survivor_count-1; j>=0; j--){
3503                     int run= i - survivor[j];
3504                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3505                     score += score_tab[i-run];
3506
3507                     if(score < best_score){
3508                         best_score= score;
3509                         run_tab[i+1]= run;
3510                         level_tab[i+1]= level-64;
3511                     }
3512                 }
3513
3514                 if(s->out_format == FMT_H263){
3515                     for(j=survivor_count-1; j>=0; j--){
3516                         int run= i - survivor[j];
3517                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3518                         score += score_tab[i-run];
3519                         if(score < last_score){
3520                             last_score= score;
3521                             last_run= run;
3522                             last_level= level-64;
3523                             last_i= i+1;
3524                         }
3525                     }
3526                 }
3527             }else{
3528                 distortion += esc_length*lambda;
3529                 for(j=survivor_count-1; j>=0; j--){
3530                     int run= i - survivor[j];
3531                     int score= distortion + score_tab[i-run];
3532
3533                     if(score < best_score){
3534                         best_score= score;
3535                         run_tab[i+1]= run;
3536                         level_tab[i+1]= level-64;
3537                     }
3538                 }
3539
3540                 if(s->out_format == FMT_H263){
3541                   for(j=survivor_count-1; j>=0; j--){
3542                         int run= i - survivor[j];
3543                         int score= distortion + score_tab[i-run];
3544                         if(score < last_score){
3545                             last_score= score;
3546                             last_run= run;
3547                             last_level= level-64;
3548                             last_i= i+1;
3549                         }
3550                     }
3551                 }
3552             }
3553         }
3554
3555         score_tab[i+1]= best_score;
3556
3557         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3558         if(last_non_zero <= 27){
3559             for(; survivor_count; survivor_count--){
3560                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3561                     break;
3562             }
3563         }else{
3564             for(; survivor_count; survivor_count--){
3565                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3566                     break;
3567             }
3568         }
3569
3570         survivor[ survivor_count++ ]= i+1;
3571     }
3572
3573     if(s->out_format != FMT_H263){
3574         last_score= 256*256*256*120;
3575         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3576             int score= score_tab[i];
3577             if(i) score += lambda*2; //FIXME exacter?
3578
3579             if(score < last_score){
3580                 last_score= score;
3581                 last_i= i;
3582                 last_level= level_tab[i];
3583                 last_run= run_tab[i];
3584             }
3585         }
3586     }
3587
3588     s->coded_score[n] = last_score;
3589
3590     dc= FFABS(block[0]);
3591     last_non_zero= last_i - 1;
3592     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3593
3594     if(last_non_zero < start_i)
3595         return last_non_zero;
3596
3597     if(last_non_zero == 0 && start_i == 0){
3598         int best_level= 0;
3599         int best_score= dc * dc;
3600
3601         for(i=0; i<coeff_count[0]; i++){
3602             int level= coeff[i][0];
3603             int alevel= FFABS(level);
3604             int unquant_coeff, score, distortion;
3605
3606             if(s->out_format == FMT_H263){
3607                     unquant_coeff= (alevel*qmul + qadd)>>3;
3608             }else{ //MPEG1
3609                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3610                     unquant_coeff =   (unquant_coeff - 1) | 1;
3611             }
3612             unquant_coeff = (unquant_coeff + 4) >> 3;
3613             unquant_coeff<<= 3 + 3;
3614
3615             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3616             level+=64;
3617             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3618             else                    score= distortion + esc_length*lambda;
3619
3620             if(score < best_score){
3621                 best_score= score;
3622                 best_level= level - 64;
3623             }
3624         }
3625         block[0]= best_level;
3626         s->coded_score[n] = best_score - dc*dc;
3627         if(best_level == 0) return -1;
3628         else                return last_non_zero;
3629     }
3630
3631     i= last_i;
3632     assert(last_level);
3633
3634     block[ perm_scantable[last_non_zero] ]= last_level;
3635     i -= last_run + 1;
3636
3637     for(; i>start_i; i -= run_tab[i] + 1){
3638         block[ perm_scantable[i-1] ]= level_tab[i];
3639     }
3640
3641     return last_non_zero;
3642 }
3643
3644 //#define REFINE_STATS 1
3645 static int16_t basis[64][64];
3646
3647 static void build_basis(uint8_t *perm){
3648     int i, j, x, y;
3649     emms_c();
3650     for(i=0; i<8; i++){
3651         for(j=0; j<8; j++){
3652             for(y=0; y<8; y++){
3653                 for(x=0; x<8; x++){
3654                     double s= 0.25*(1<<BASIS_SHIFT);
3655                     int index= 8*i + j;
3656                     int perm_index= perm[index];
3657                     if(i==0) s*= sqrt(0.5);
3658                     if(j==0) s*= sqrt(0.5);
3659                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3660                 }
3661             }
3662         }
3663     }
3664 }
3665
3666 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3667                         int16_t *block, int16_t *weight, int16_t *orig,
3668                         int n, int qscale){
3669     int16_t rem[64];
3670     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3671     const uint8_t *scantable= s->intra_scantable.scantable;
3672     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3673 //    unsigned int threshold1, threshold2;
3674 //    int bias=0;
3675     int run_tab[65];
3676     int prev_run=0;
3677     int prev_level=0;
3678     int qmul, qadd, start_i, last_non_zero, i, dc;
3679     uint8_t * length;
3680     uint8_t * last_length;
3681     int lambda;
3682     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3683 #ifdef REFINE_STATS
3684 static int count=0;
3685 static int after_last=0;
3686 static int to_zero=0;
3687 static int from_zero=0;
3688 static int raise=0;
3689 static int lower=0;
3690 static int messed_sign=0;
3691 #endif
3692
3693     if(basis[0][0] == 0)
3694         build_basis(s->dsp.idct_permutation);
3695
3696     qmul= qscale*2;
3697     qadd= (qscale-1)|1;
3698     if (s->mb_intra) {
3699         if (!s->h263_aic) {
3700             if (n < 4)
3701                 q = s->y_dc_scale;
3702             else
3703                 q = s->c_dc_scale;
3704         } else{
3705             /* For AIC we skip quant/dequant of INTRADC */
3706             q = 1;
3707             qadd=0;
3708         }
3709         q <<= RECON_SHIFT-3;
3710         /* note: block[0] is assumed to be positive */
3711         dc= block[0]*q;
3712 //        block[0] = (block[0] + (q >> 1)) / q;
3713         start_i = 1;
3714 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3715 //            bias= 1<<(QMAT_SHIFT-1);
3716         length     = s->intra_ac_vlc_length;
3717         last_length= s->intra_ac_vlc_last_length;
3718     } else {
3719         dc= 0;
3720         start_i = 0;
3721         length     = s->inter_ac_vlc_length;
3722         last_length= s->inter_ac_vlc_last_length;
3723     }
3724     last_non_zero = s->block_last_index[n];
3725
3726 #ifdef REFINE_STATS
3727 {START_TIMER
3728 #endif
3729     dc += (1<<(RECON_SHIFT-1));
3730     for(i=0; i<64; i++){
3731         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3732     }
3733 #ifdef REFINE_STATS
3734 STOP_TIMER("memset rem[]")}
3735 #endif
3736     sum=0;
3737     for(i=0; i<64; i++){
3738         int one= 36;
3739         int qns=4;
3740         int w;
3741
3742         w= FFABS(weight[i]) + qns*one;
3743         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3744
3745         weight[i] = w;
3746 //        w=weight[i] = (63*qns + (w/2)) / w;
3747
3748         assert(w>0);
3749         assert(w<(1<<6));
3750         sum += w*w;
3751     }
3752     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3753 #ifdef REFINE_STATS
3754 {START_TIMER
3755 #endif
3756     run=0;
3757     rle_index=0;
3758     for(i=start_i; i<=last_non_zero; i++){
3759         int j= perm_scantable[i];
3760         const int level= block[j];
3761         int coeff;
3762
3763         if(level){
3764             if(level<0) coeff= qmul*level - qadd;
3765             else        coeff= qmul*level + qadd;
3766             run_tab[rle_index++]=run;
3767             run=0;
3768
3769             s->dsp.add_8x8basis(rem, basis[j], coeff);
3770         }else{
3771             run++;
3772         }
3773     }
3774 #ifdef REFINE_STATS
3775 if(last_non_zero>0){
3776 STOP_TIMER("init rem[]")
3777 }
3778 }
3779
3780 {START_TIMER
3781 #endif
3782     for(;;){
3783         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3784         int best_coeff=0;
3785         int best_change=0;
3786         int run2, best_unquant_change=0, analyze_gradient;
3787 #ifdef REFINE_STATS
3788 {START_TIMER
3789 #endif
3790         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3791
3792         if(analyze_gradient){
3793 #ifdef REFINE_STATS
3794 {START_TIMER
3795 #endif
3796             for(i=0; i<64; i++){
3797                 int w= weight[i];
3798
3799                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3800             }
3801 #ifdef REFINE_STATS
3802 STOP_TIMER("rem*w*w")}
3803 {START_TIMER
3804 #endif
3805             s->dsp.fdct(d1);
3806 #ifdef REFINE_STATS
3807 STOP_TIMER("dct")}
3808 #endif
3809         }
3810
3811         if(start_i){
3812             const int level= block[0];
3813             int change, old_coeff;
3814
3815             assert(s->mb_intra);
3816
3817             old_coeff= q*level;
3818
3819             for(change=-1; change<=1; change+=2){
3820                 int new_level= level + change;
3821                 int score, new_coeff;
3822
3823                 new_coeff= q*new_level;
3824                 if(new_coeff >= 2048 || new_coeff < 0)
3825                     continue;
3826
3827                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3828                 if(score<best_score){
3829                     best_score= score;
3830                     best_coeff= 0;
3831                     best_change= change;
3832                     best_unquant_change= new_coeff - old_coeff;
3833                 }
3834             }
3835         }
3836
3837         run=0;
3838         rle_index=0;
3839         run2= run_tab[rle_index++];
3840         prev_level=0;
3841         prev_run=0;
3842
3843         for(i=start_i; i<64; i++){
3844             int j= perm_scantable[i];
3845             const int level= block[j];
3846             int change, old_coeff;
3847
3848             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3849                 break;
3850
3851             if(level){
3852                 if(level<0) old_coeff= qmul*level - qadd;
3853                 else        old_coeff= qmul*level + qadd;
3854                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3855             }else{
3856                 old_coeff=0;
3857                 run2--;
3858                 assert(run2>=0 || i >= last_non_zero );
3859             }
3860
3861             for(change=-1; change<=1; change+=2){
3862                 int new_level= level + change;
3863                 int score, new_coeff, unquant_change;
3864
3865                 score=0;
3866                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3867                    continue;
3868
3869                 if(new_level){
3870                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3871                     else            new_coeff= qmul*new_level + qadd;
3872                     if(new_coeff >= 2048 || new_coeff <= -2048)
3873                         continue;
3874                     //FIXME check for overflow
3875
3876                     if(level){
3877                         if(level < 63 && level > -63){
3878                             if(i < last_non_zero)
3879                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3880                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3881                             else
3882                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3883                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3884                         }
3885                     }else{
3886                         assert(FFABS(new_level)==1);
3887
3888                         if(analyze_gradient){
3889                             int g= d1[ scantable[i] ];
3890                             if(g && (g^new_level) >= 0)
3891                                 continue;
3892                         }
3893
3894                         if(i < last_non_zero){
3895                             int next_i= i + run2 + 1;
3896                             int next_level= block[ perm_scantable[next_i] ] + 64;
3897
3898                             if(next_level&(~127))
3899                                 next_level= 0;
3900
3901                             if(next_i < last_non_zero)
3902                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3903                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3904                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3905                             else
3906                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3907                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3908                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3909                         }else{
3910                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3911                             if(prev_level){
3912                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3913                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3914                             }
3915                         }
3916                     }
3917                 }else{
3918                     new_coeff=0;
3919                     assert(FFABS(level)==1);
3920
3921                     if(i < last_non_zero){
3922                         int next_i= i + run2 + 1;
3923                         int next_level= block[ perm_scantable[next_i] ] + 64;
3924
3925                         if(next_level&(~127))
3926                             next_level= 0;
3927
3928                         if(next_i < last_non_zero)
3929                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3930                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3931                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3932                         else
3933                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3934                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3935                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3936                     }else{
3937                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3938                         if(prev_level){
3939                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3940                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3941                         }
3942                     }
3943                 }
3944
3945                 score *= lambda;
3946
3947                 unquant_change= new_coeff - old_coeff;
3948                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3949
3950                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3951                 if(score<best_score){
3952                     best_score= score;
3953                     best_coeff= i;
3954                     best_change= change;
3955                     best_unquant_change= unquant_change;
3956                 }
3957             }
3958             if(level){
3959                 prev_level= level + 64;
3960                 if(prev_level&(~127))
3961                     prev_level= 0;
3962                 prev_run= run;
3963                 run=0;
3964             }else{
3965                 run++;
3966             }
3967         }
3968 #ifdef REFINE_STATS
3969 STOP_TIMER("iterative step")}
3970 #endif
3971
3972         if(best_change){
3973             int j= perm_scantable[ best_coeff ];
3974
3975             block[j] += best_change;
3976
3977             if(best_coeff > last_non_zero){
3978                 last_non_zero= best_coeff;
3979                 assert(block[j]);
3980 #ifdef REFINE_STATS
3981 after_last++;
3982 #endif
3983             }else{
3984 #ifdef REFINE_STATS
3985 if(block[j]){
3986     if(block[j] - best_change){
3987         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3988             raise++;
3989         }else{
3990             lower++;
3991         }
3992     }else{
3993         from_zero++;
3994     }
3995 }else{
3996     to_zero++;
3997 }
3998 #endif
3999                 for(; last_non_zero>=start_i; last_non_zero--){
4000                     if(block[perm_scantable[last_non_zero]])
4001                         break;
4002                 }
4003             }
4004 #ifdef REFINE_STATS
4005 count++;
4006 if(256*256*256*64 % count == 0){
4007     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4008 }
4009 #endif
4010             run=0;
4011             rle_index=0;
4012             for(i=start_i; i<=last_non_zero; i++){
4013                 int j= perm_scantable[i];
4014                 const int level= block[j];
4015
4016                  if(level){
4017                      run_tab[rle_index++]=run;
4018                      run=0;
4019                  }else{
4020                      run++;
4021                  }
4022             }
4023
4024             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4025         }else{
4026             break;
4027         }
4028     }
4029 #ifdef REFINE_STATS
4030 if(last_non_zero>0){
4031 STOP_TIMER("iterative search")
4032 }
4033 }
4034 #endif
4035
4036     return last_non_zero;
4037 }
4038
4039 int ff_dct_quantize_c(MpegEncContext *s,
4040                         int16_t *block, int n,
4041                         int qscale, int *overflow)
4042 {
4043     int i, j, level, last_non_zero, q, start_i;
4044     const int *qmat;
4045     const uint8_t *scantable= s->intra_scantable.scantable;
4046     int bias;
4047     int max=0;
4048     unsigned int threshold1, threshold2;
4049
4050     s->dsp.fdct (block);
4051
4052     if(s->dct_error_sum)
4053         s->denoise_dct(s, block);
4054
4055     if (s->mb_intra) {
4056         if (!s->h263_aic) {
4057             if (n < 4)
4058                 q = s->y_dc_scale;
4059             else
4060                 q = s->c_dc_scale;
4061             q = q << 3;
4062         } else
4063             /* For AIC we skip quant/dequant of INTRADC */
4064             q = 1 << 3;
4065
4066         /* note: block[0] is assumed to be positive */
4067         block[0] = (block[0] + (q >> 1)) / q;
4068         start_i = 1;
4069         last_non_zero = 0;
4070         qmat = s->q_intra_matrix[qscale];
4071         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4072     } else {
4073         start_i = 0;
4074         last_non_zero = -1;
4075         qmat = s->q_inter_matrix[qscale];
4076         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4077     }
4078     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4079     threshold2= (threshold1<<1);
4080     for(i=63;i>=start_i;i--) {
4081         j = scantable[i];
4082         level = block[j] * qmat[j];
4083
4084         if(((unsigned)(level+threshold1))>threshold2){
4085             last_non_zero = i;
4086             break;
4087         }else{
4088             block[j]=0;
4089         }
4090     }
4091     for(i=start_i; i<=last_non_zero; i++) {
4092         j = scantable[i];
4093         level = block[j] * qmat[j];
4094
4095 //        if(   bias+level >= (1<<QMAT_SHIFT)
4096 //           || bias-level >= (1<<QMAT_SHIFT)){
4097         if(((unsigned)(level+threshold1))>threshold2){
4098             if(level>0){
4099                 level= (bias + level)>>QMAT_SHIFT;
4100                 block[j]= level;
4101             }else{
4102                 level= (bias - level)>>QMAT_SHIFT;
4103                 block[j]= -level;
4104             }
4105             max |=level;
4106         }else{
4107             block[j]=0;
4108         }
4109     }
4110     *overflow= s->max_qcoeff < max; //overflow might have happened
4111
4112     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4113     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4114         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4115
4116     return last_non_zero;
4117 }
4118
4119 #define OFFSET(x) offsetof(MpegEncContext, x)
4120 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4121 static const AVOption h263_options[] = {
4122     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4123     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4124     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4125     FF_MPV_COMMON_OPTS
4126     { NULL },
4127 };
4128
4129 static const AVClass h263_class = {
4130     .class_name = "H.263 encoder",
4131     .item_name  = av_default_item_name,
4132     .option     = h263_options,
4133     .version    = LIBAVUTIL_VERSION_INT,
4134 };
4135
4136 AVCodec ff_h263_encoder = {
4137     .name           = "h263",
4138     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4139     .type           = AVMEDIA_TYPE_VIDEO,
4140     .id             = AV_CODEC_ID_H263,
4141     .priv_data_size = sizeof(MpegEncContext),
4142     .init           = ff_MPV_encode_init,
4143     .encode2        = ff_MPV_encode_picture,
4144     .close          = ff_MPV_encode_end,
4145     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4146     .priv_class     = &h263_class,
4147 };
4148
4149 static const AVOption h263p_options[] = {
4150     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4151     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4152     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4153     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4154     FF_MPV_COMMON_OPTS
4155     { NULL },
4156 };
4157 static const AVClass h263p_class = {
4158     .class_name = "H.263p encoder",
4159     .item_name  = av_default_item_name,
4160     .option     = h263p_options,
4161     .version    = LIBAVUTIL_VERSION_INT,
4162 };
4163
4164 AVCodec ff_h263p_encoder = {
4165     .name           = "h263p",
4166     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4167     .type           = AVMEDIA_TYPE_VIDEO,
4168     .id             = AV_CODEC_ID_H263P,
4169     .priv_data_size = sizeof(MpegEncContext),
4170     .init           = ff_MPV_encode_init,
4171     .encode2        = ff_MPV_encode_picture,
4172     .close          = ff_MPV_encode_end,
4173     .capabilities   = CODEC_CAP_SLICE_THREADS,
4174     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4175     .priv_class     = &h263p_class,
4176 };
4177
4178 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4179
4180 AVCodec ff_msmpeg4v2_encoder = {
4181     .name           = "msmpeg4v2",
4182     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4183     .type           = AVMEDIA_TYPE_VIDEO,
4184     .id             = AV_CODEC_ID_MSMPEG4V2,
4185     .priv_data_size = sizeof(MpegEncContext),
4186     .init           = ff_MPV_encode_init,
4187     .encode2        = ff_MPV_encode_picture,
4188     .close          = ff_MPV_encode_end,
4189     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4190     .priv_class     = &msmpeg4v2_class,
4191 };
4192
4193 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4194
4195 AVCodec ff_msmpeg4v3_encoder = {
4196     .name           = "msmpeg4",
4197     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4198     .type           = AVMEDIA_TYPE_VIDEO,
4199     .id             = AV_CODEC_ID_MSMPEG4V3,
4200     .priv_data_size = sizeof(MpegEncContext),
4201     .init           = ff_MPV_encode_init,
4202     .encode2        = ff_MPV_encode_picture,
4203     .close          = ff_MPV_encode_end,
4204     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4205     .priv_class     = &msmpeg4v3_class,
4206 };
4207
4208 FF_MPV_GENERIC_CLASS(wmv1)
4209
4210 AVCodec ff_wmv1_encoder = {
4211     .name           = "wmv1",
4212     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4213     .type           = AVMEDIA_TYPE_VIDEO,
4214     .id             = AV_CODEC_ID_WMV1,
4215     .priv_data_size = sizeof(MpegEncContext),
4216     .init           = ff_MPV_encode_init,
4217     .encode2        = ff_MPV_encode_picture,
4218     .close          = ff_MPV_encode_end,
4219     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4220     .priv_class     = &wmv1_class,
4221 };