]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
ratecontrol: Move Xvid-related functions to the place they are actually used
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "mpegvideodata.h"
44 #include "h261.h"
45 #include "h263.h"
46 #include "h263data.h"
47 #include "mjpegenc_common.h"
48 #include "mathops.h"
49 #include "mpegutils.h"
50 #include "mjpegenc.h"
51 #include "msmpeg4.h"
52 #include "pixblockdsp.h"
53 #include "qpeldsp.h"
54 #include "faandct.h"
55 #include "thread.h"
56 #include "aandcttab.h"
57 #include "flv.h"
58 #include "mpeg4video.h"
59 #include "internal.h"
60 #include "bytestream.h"
61 #include "wmv2.h"
62 #include "rv10.h"
63 #include "libxvid.h"
64 #include <limits.h>
65
66 #define QUANT_BIAS_SHIFT 8
67
68 #define QMAT_SHIFT_MMX 16
69 #define QMAT_SHIFT 22
70
71 static int encode_picture(MpegEncContext *s, int picture_number);
72 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
73 static int sse_mb(MpegEncContext *s);
74 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
75 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
76
77 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
78 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
79
80 const AVOption ff_mpv_generic_options[] = {
81     FF_MPV_COMMON_OPTS
82     { NULL },
83 };
84
85 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
86                        uint16_t (*qmat16)[2][64],
87                        const uint16_t *quant_matrix,
88                        int bias, int qmin, int qmax, int intra)
89 {
90     FDCTDSPContext *fdsp = &s->fdsp;
91     int qscale;
92     int shift = 0;
93
94     for (qscale = qmin; qscale <= qmax; qscale++) {
95         int i;
96         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
97 #if CONFIG_FAANDCT
98             fdsp->fdct == ff_faandct            ||
99 #endif /* CONFIG_FAANDCT */
100             fdsp->fdct == ff_jpeg_fdct_islow_10) {
101             for (i = 0; i < 64; i++) {
102                 const int j = s->idsp.idct_permutation[i];
103                 int64_t den = (int64_t) qscale * quant_matrix[j];
104                 /* 16 <= qscale * quant_matrix[i] <= 7905
105                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
106                  *             19952 <=              x  <= 249205026
107                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
108                  *           3444240 >= (1 << 36) / (x) >= 275 */
109
110                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
111             }
112         } else if (fdsp->fdct == ff_fdct_ifast) {
113             for (i = 0; i < 64; i++) {
114                 const int j = s->idsp.idct_permutation[i];
115                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
116                 /* 16 <= qscale * quant_matrix[i] <= 7905
117                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
118                  *             19952 <=              x  <= 249205026
119                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
120                  *           3444240 >= (1 << 36) / (x) >= 275 */
121
122                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
123             }
124         } else {
125             for (i = 0; i < 64; i++) {
126                 const int j = s->idsp.idct_permutation[i];
127                 int64_t den = (int64_t) qscale * quant_matrix[j];
128                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
129                  * Assume x = qscale * quant_matrix[i]
130                  * So             16 <=              x  <= 7905
131                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
132                  * so          32768 >= (1 << 19) / (x) >= 67 */
133                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
134                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
135                 //                    (qscale * quant_matrix[i]);
136                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
137
138                 if (qmat16[qscale][0][i] == 0 ||
139                     qmat16[qscale][0][i] == 128 * 256)
140                     qmat16[qscale][0][i] = 128 * 256 - 1;
141                 qmat16[qscale][1][i] =
142                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
143                                 qmat16[qscale][0][i]);
144             }
145         }
146
147         for (i = intra; i < 64; i++) {
148             int64_t max = 8191;
149             if (fdsp->fdct == ff_fdct_ifast) {
150                 max = (8191LL * ff_aanscales[i]) >> 14;
151             }
152             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
153                 shift++;
154             }
155         }
156     }
157     if (shift) {
158         av_log(NULL, AV_LOG_INFO,
159                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
160                QMAT_SHIFT - shift);
161     }
162 }
163
164 static inline void update_qscale(MpegEncContext *s)
165 {
166     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
167                 (FF_LAMBDA_SHIFT + 7);
168     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
169
170     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
171                  FF_LAMBDA_SHIFT;
172 }
173
174 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
175 {
176     int i;
177
178     if (matrix) {
179         put_bits(pb, 1, 1);
180         for (i = 0; i < 64; i++) {
181             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
182         }
183     } else
184         put_bits(pb, 1, 0);
185 }
186
187 /**
188  * init s->current_picture.qscale_table from s->lambda_table
189  */
190 void ff_init_qscale_tab(MpegEncContext *s)
191 {
192     int8_t * const qscale_table = s->current_picture.qscale_table;
193     int i;
194
195     for (i = 0; i < s->mb_num; i++) {
196         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
197         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
198         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
199                                                   s->avctx->qmax);
200     }
201 }
202
203 static void update_duplicate_context_after_me(MpegEncContext *dst,
204                                               MpegEncContext *src)
205 {
206 #define COPY(a) dst->a= src->a
207     COPY(pict_type);
208     COPY(current_picture);
209     COPY(f_code);
210     COPY(b_code);
211     COPY(qscale);
212     COPY(lambda);
213     COPY(lambda2);
214     COPY(picture_in_gop_number);
215     COPY(gop_picture_number);
216     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
217     COPY(progressive_frame);    // FIXME don't set in encode_header
218     COPY(partitioned_frame);    // FIXME don't set in encode_header
219 #undef COPY
220 }
221
222 /**
223  * Set the given MpegEncContext to defaults for encoding.
224  * the changed fields will not depend upon the prior state of the MpegEncContext.
225  */
226 static void mpv_encode_defaults(MpegEncContext *s)
227 {
228     int i;
229     ff_mpv_common_defaults(s);
230
231     for (i = -16; i < 16; i++) {
232         default_fcode_tab[i + MAX_MV] = 1;
233     }
234     s->me.mv_penalty = default_mv_penalty;
235     s->fcode_tab     = default_fcode_tab;
236
237     s->input_picture_number  = 0;
238     s->picture_in_gop_number = 0;
239 }
240
241 /* init video encoder */
242 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
243 {
244     MpegEncContext *s = avctx->priv_data;
245     AVCPBProperties *cpb_props;
246     int i, ret, format_supported;
247
248     mpv_encode_defaults(s);
249
250     switch (avctx->codec_id) {
251     case AV_CODEC_ID_MPEG2VIDEO:
252         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
253             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
254             av_log(avctx, AV_LOG_ERROR,
255                    "only YUV420 and YUV422 are supported\n");
256             return -1;
257         }
258         break;
259     case AV_CODEC_ID_MJPEG:
260         format_supported = 0;
261         /* JPEG color space */
262         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
263             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
264             (avctx->color_range == AVCOL_RANGE_JPEG &&
265              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
266               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
267             format_supported = 1;
268         /* MPEG color space */
269         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
270                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
271                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
272             format_supported = 1;
273
274         if (!format_supported) {
275             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
276             return -1;
277         }
278         break;
279     default:
280         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
281             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
282             return -1;
283         }
284     }
285
286     switch (avctx->pix_fmt) {
287     case AV_PIX_FMT_YUVJ422P:
288     case AV_PIX_FMT_YUV422P:
289         s->chroma_format = CHROMA_422;
290         break;
291     case AV_PIX_FMT_YUVJ420P:
292     case AV_PIX_FMT_YUV420P:
293     default:
294         s->chroma_format = CHROMA_420;
295         break;
296     }
297
298 #if FF_API_PRIVATE_OPT
299 FF_DISABLE_DEPRECATION_WARNINGS
300     if (avctx->rtp_payload_size)
301         s->rtp_payload_size = avctx->rtp_payload_size;
302     if (avctx->me_penalty_compensation)
303         s->me_penalty_compensation = avctx->me_penalty_compensation;
304     if (avctx->pre_me)
305         s->me_pre = avctx->pre_me;
306 FF_ENABLE_DEPRECATION_WARNINGS
307 #endif
308
309     s->bit_rate = avctx->bit_rate;
310     s->width    = avctx->width;
311     s->height   = avctx->height;
312     if (avctx->gop_size > 600 &&
313         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
314         av_log(avctx, AV_LOG_ERROR,
315                "Warning keyframe interval too large! reducing it ...\n");
316         avctx->gop_size = 600;
317     }
318     s->gop_size     = avctx->gop_size;
319     s->avctx        = avctx;
320     if (avctx->max_b_frames > MAX_B_FRAMES) {
321         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
322                "is %d.\n", MAX_B_FRAMES);
323     }
324     s->max_b_frames = avctx->max_b_frames;
325     s->codec_id     = avctx->codec->id;
326     s->strict_std_compliance = avctx->strict_std_compliance;
327     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
328     s->rtp_mode           = !!s->rtp_payload_size;
329     s->intra_dc_precision = avctx->intra_dc_precision;
330     s->user_specified_pts = AV_NOPTS_VALUE;
331
332     if (s->gop_size <= 1) {
333         s->intra_only = 1;
334         s->gop_size   = 12;
335     } else {
336         s->intra_only = 0;
337     }
338
339 #if FF_API_MOTION_EST
340 FF_DISABLE_DEPRECATION_WARNINGS
341     s->me_method = avctx->me_method;
342 FF_ENABLE_DEPRECATION_WARNINGS
343 #endif
344
345     /* Fixed QSCALE */
346     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
347
348 #if FF_API_MPV_OPT
349     FF_DISABLE_DEPRECATION_WARNINGS
350     if (avctx->border_masking != 0.0)
351         s->border_masking = avctx->border_masking;
352     FF_ENABLE_DEPRECATION_WARNINGS
353 #endif
354
355     s->adaptive_quant = (s->avctx->lumi_masking ||
356                          s->avctx->dark_masking ||
357                          s->avctx->temporal_cplx_masking ||
358                          s->avctx->spatial_cplx_masking  ||
359                          s->avctx->p_masking      ||
360                          s->border_masking ||
361                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
362                         !s->fixed_qscale;
363
364     s->loop_filter = !!(s->avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
365
366     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
367         av_log(avctx, AV_LOG_ERROR,
368                "a vbv buffer size is needed, "
369                "for encoding with a maximum bitrate\n");
370         return -1;
371     }
372
373     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
374         av_log(avctx, AV_LOG_INFO,
375                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
376     }
377
378     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
379         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
380         return -1;
381     }
382
383     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
384         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
385         return -1;
386     }
387
388     if (avctx->rc_max_rate &&
389         avctx->rc_max_rate == avctx->bit_rate &&
390         avctx->rc_max_rate != avctx->rc_min_rate) {
391         av_log(avctx, AV_LOG_INFO,
392                "impossible bitrate constraints, this will fail\n");
393     }
394
395     if (avctx->rc_buffer_size &&
396         avctx->bit_rate * (int64_t)avctx->time_base.num >
397             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
398         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
399         return -1;
400     }
401
402     if (!s->fixed_qscale &&
403         avctx->bit_rate * av_q2d(avctx->time_base) >
404             avctx->bit_rate_tolerance) {
405         av_log(avctx, AV_LOG_ERROR,
406                "bitrate tolerance too small for bitrate\n");
407         return -1;
408     }
409
410     if (s->avctx->rc_max_rate &&
411         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
412         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
413          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
414         90000LL * (avctx->rc_buffer_size - 1) >
415             s->avctx->rc_max_rate * 0xFFFFLL) {
416         av_log(avctx, AV_LOG_INFO,
417                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
418                "specified vbv buffer is too large for the given bitrate!\n");
419     }
420
421     if ((s->avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
422         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
423         s->codec_id != AV_CODEC_ID_FLV1) {
424         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
425         return -1;
426     }
427
428     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
429         av_log(avctx, AV_LOG_ERROR,
430                "OBMC is only supported with simple mb decision\n");
431         return -1;
432     }
433
434     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
435         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
436         return -1;
437     }
438
439     if (s->max_b_frames                    &&
440         s->codec_id != AV_CODEC_ID_MPEG4      &&
441         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
442         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
443         av_log(avctx, AV_LOG_ERROR, "B-frames not supported by codec\n");
444         return -1;
445     }
446
447     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
448          s->codec_id == AV_CODEC_ID_H263  ||
449          s->codec_id == AV_CODEC_ID_H263P) &&
450         (avctx->sample_aspect_ratio.num > 255 ||
451          avctx->sample_aspect_ratio.den > 255)) {
452         av_log(avctx, AV_LOG_ERROR,
453                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
454                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
455         return -1;
456     }
457
458     if ((s->avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
459         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
460         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
461         return -1;
462     }
463
464 #if FF_API_PRIVATE_OPT
465     FF_DISABLE_DEPRECATION_WARNINGS
466     if (avctx->mpeg_quant)
467         s->mpeg_quant = avctx->mpeg_quant;
468     FF_ENABLE_DEPRECATION_WARNINGS
469 #endif
470
471     // FIXME mpeg2 uses that too
472     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
473         av_log(avctx, AV_LOG_ERROR,
474                "mpeg2 style quantization not supported by codec\n");
475         return -1;
476     }
477
478     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
479         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
480         return -1;
481     }
482
483     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
484         s->avctx->mb_decision != FF_MB_DECISION_RD) {
485         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
486         return -1;
487     }
488
489 #if FF_API_PRIVATE_OPT
490 FF_DISABLE_DEPRECATION_WARNINGS
491     if (avctx->scenechange_threshold)
492         s->scenechange_threshold = avctx->scenechange_threshold;
493 FF_ENABLE_DEPRECATION_WARNINGS
494 #endif
495
496     if (s->scenechange_threshold < 1000000000 &&
497         (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
498         av_log(avctx, AV_LOG_ERROR,
499                "closed gop with scene change detection are not supported yet, "
500                "set threshold to 1000000000\n");
501         return -1;
502     }
503
504     if (s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
505         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
506             av_log(avctx, AV_LOG_ERROR,
507                   "low delay forcing is only available for mpeg2\n");
508             return -1;
509         }
510         if (s->max_b_frames != 0) {
511             av_log(avctx, AV_LOG_ERROR,
512                    "B-frames cannot be used with low delay\n");
513             return -1;
514         }
515     }
516
517     if (s->q_scale_type == 1) {
518         if (avctx->qmax > 12) {
519             av_log(avctx, AV_LOG_ERROR,
520                    "non linear quant only supports qmax <= 12 currently\n");
521             return -1;
522         }
523     }
524
525     if (avctx->slices > 1 &&
526         (avctx->codec_id == AV_CODEC_ID_FLV1 || avctx->codec_id == AV_CODEC_ID_H261)) {
527         av_log(avctx, AV_LOG_ERROR, "Multiple slices are not supported by this codec\n");
528         return AVERROR(EINVAL);
529     }
530
531     if (s->avctx->thread_count > 1         &&
532         s->codec_id != AV_CODEC_ID_MPEG4      &&
533         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
534         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
535         (s->codec_id != AV_CODEC_ID_H263P)) {
536         av_log(avctx, AV_LOG_ERROR,
537                "multi threaded encoding not supported by codec\n");
538         return -1;
539     }
540
541     if (s->avctx->thread_count < 1) {
542         av_log(avctx, AV_LOG_ERROR,
543                "automatic thread number detection not supported by codec,"
544                "patch welcome\n");
545         return -1;
546     }
547
548     if (!avctx->time_base.den || !avctx->time_base.num) {
549         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
550         return -1;
551     }
552
553 #if FF_API_PRIVATE_OPT
554 FF_DISABLE_DEPRECATION_WARNINGS
555     if (avctx->b_frame_strategy)
556         s->b_frame_strategy = avctx->b_frame_strategy;
557     if (avctx->b_sensitivity != 40)
558         s->b_sensitivity = avctx->b_sensitivity;
559 FF_ENABLE_DEPRECATION_WARNINGS
560 #endif
561
562     if (s->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
563         av_log(avctx, AV_LOG_INFO,
564                "notice: b_frame_strategy only affects the first pass\n");
565         s->b_frame_strategy = 0;
566     }
567
568     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
569     if (i > 1) {
570         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
571         avctx->time_base.den /= i;
572         avctx->time_base.num /= i;
573         //return -1;
574     }
575
576     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
577         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
578         // (a + x * 3 / 8) / x
579         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
580         s->inter_quant_bias = 0;
581     } else {
582         s->intra_quant_bias = 0;
583         // (a - x / 4) / x
584         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
585     }
586
587 #if FF_API_QUANT_BIAS
588 FF_DISABLE_DEPRECATION_WARNINGS
589     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
590         s->intra_quant_bias = avctx->intra_quant_bias;
591     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
592         s->inter_quant_bias = avctx->inter_quant_bias;
593 FF_ENABLE_DEPRECATION_WARNINGS
594 #endif
595
596     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
597         s->avctx->time_base.den > (1 << 16) - 1) {
598         av_log(avctx, AV_LOG_ERROR,
599                "timebase %d/%d not supported by MPEG 4 standard, "
600                "the maximum admitted value for the timebase denominator "
601                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
602                (1 << 16) - 1);
603         return -1;
604     }
605     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
606
607     switch (avctx->codec->id) {
608     case AV_CODEC_ID_MPEG1VIDEO:
609         s->out_format = FMT_MPEG1;
610         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
611         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
612         break;
613     case AV_CODEC_ID_MPEG2VIDEO:
614         s->out_format = FMT_MPEG1;
615         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
616         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
617         s->rtp_mode   = 1;
618         break;
619     case AV_CODEC_ID_MJPEG:
620         s->out_format = FMT_MJPEG;
621         s->intra_only = 1; /* force intra only for jpeg */
622         if (!CONFIG_MJPEG_ENCODER ||
623             ff_mjpeg_encode_init(s) < 0)
624             return -1;
625         avctx->delay = 0;
626         s->low_delay = 1;
627         break;
628     case AV_CODEC_ID_H261:
629         if (!CONFIG_H261_ENCODER)
630             return -1;
631         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
632             av_log(avctx, AV_LOG_ERROR,
633                    "The specified picture size of %dx%d is not valid for the "
634                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
635                     s->width, s->height);
636             return -1;
637         }
638         s->out_format = FMT_H261;
639         avctx->delay  = 0;
640         s->low_delay  = 1;
641         s->rtp_mode   = 0; /* Sliced encoding not supported */
642         break;
643     case AV_CODEC_ID_H263:
644         if (!CONFIG_H263_ENCODER)
645         return -1;
646         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
647                              s->width, s->height) == 8) {
648             av_log(avctx, AV_LOG_INFO,
649                    "The specified picture size of %dx%d is not valid for "
650                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
651                    "352x288, 704x576, and 1408x1152."
652                    "Try H.263+.\n", s->width, s->height);
653             return -1;
654         }
655         s->out_format = FMT_H263;
656         avctx->delay  = 0;
657         s->low_delay  = 1;
658         break;
659     case AV_CODEC_ID_H263P:
660         s->out_format = FMT_H263;
661         s->h263_plus  = 1;
662         /* Fx */
663         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
664         s->modified_quant  = s->h263_aic;
665         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
666         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
667
668         /* /Fx */
669         /* These are just to be sure */
670         avctx->delay = 0;
671         s->low_delay = 1;
672         break;
673     case AV_CODEC_ID_FLV1:
674         s->out_format      = FMT_H263;
675         s->h263_flv        = 2; /* format = 1; 11-bit codes */
676         s->unrestricted_mv = 1;
677         s->rtp_mode  = 0; /* don't allow GOB */
678         avctx->delay = 0;
679         s->low_delay = 1;
680         break;
681     case AV_CODEC_ID_RV10:
682         s->out_format = FMT_H263;
683         avctx->delay  = 0;
684         s->low_delay  = 1;
685         break;
686     case AV_CODEC_ID_RV20:
687         s->out_format      = FMT_H263;
688         avctx->delay       = 0;
689         s->low_delay       = 1;
690         s->modified_quant  = 1;
691         s->h263_aic        = 1;
692         s->h263_plus       = 1;
693         s->loop_filter     = 1;
694         s->unrestricted_mv = 0;
695         break;
696     case AV_CODEC_ID_MPEG4:
697         s->out_format      = FMT_H263;
698         s->h263_pred       = 1;
699         s->unrestricted_mv = 1;
700         s->low_delay       = s->max_b_frames ? 0 : 1;
701         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
702         break;
703     case AV_CODEC_ID_MSMPEG4V2:
704         s->out_format      = FMT_H263;
705         s->h263_pred       = 1;
706         s->unrestricted_mv = 1;
707         s->msmpeg4_version = 2;
708         avctx->delay       = 0;
709         s->low_delay       = 1;
710         break;
711     case AV_CODEC_ID_MSMPEG4V3:
712         s->out_format        = FMT_H263;
713         s->h263_pred         = 1;
714         s->unrestricted_mv   = 1;
715         s->msmpeg4_version   = 3;
716         s->flipflop_rounding = 1;
717         avctx->delay         = 0;
718         s->low_delay         = 1;
719         break;
720     case AV_CODEC_ID_WMV1:
721         s->out_format        = FMT_H263;
722         s->h263_pred         = 1;
723         s->unrestricted_mv   = 1;
724         s->msmpeg4_version   = 4;
725         s->flipflop_rounding = 1;
726         avctx->delay         = 0;
727         s->low_delay         = 1;
728         break;
729     case AV_CODEC_ID_WMV2:
730         s->out_format        = FMT_H263;
731         s->h263_pred         = 1;
732         s->unrestricted_mv   = 1;
733         s->msmpeg4_version   = 5;
734         s->flipflop_rounding = 1;
735         avctx->delay         = 0;
736         s->low_delay         = 1;
737         break;
738     default:
739         return -1;
740     }
741
742 #if FF_API_PRIVATE_OPT
743     FF_DISABLE_DEPRECATION_WARNINGS
744     if (avctx->noise_reduction)
745         s->noise_reduction = avctx->noise_reduction;
746     FF_ENABLE_DEPRECATION_WARNINGS
747 #endif
748
749     avctx->has_b_frames = !s->low_delay;
750
751     s->encoding = 1;
752
753     s->progressive_frame    =
754     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
755                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
756                                 s->alternate_scan);
757
758     /* init */
759     ff_mpv_idct_init(s);
760     if (ff_mpv_common_init(s) < 0)
761         return -1;
762
763     if (ARCH_X86)
764         ff_mpv_encode_init_x86(s);
765
766     ff_fdctdsp_init(&s->fdsp, avctx);
767     ff_me_cmp_init(&s->mecc, avctx);
768     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
769     ff_pixblockdsp_init(&s->pdsp, avctx);
770     ff_qpeldsp_init(&s->qdsp);
771
772     if (s->msmpeg4_version) {
773         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
774                           2 * 2 * (MAX_LEVEL + 1) *
775                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
776     }
777     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
778
779     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
780     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
781     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
782     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
783     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
784                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
785     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
786                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
787
788
789     if (s->noise_reduction) {
790         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
791                           2 * 64 * sizeof(uint16_t), fail);
792     }
793
794     if (CONFIG_H263_ENCODER)
795         ff_h263dsp_init(&s->h263dsp);
796     if (!s->dct_quantize)
797         s->dct_quantize = ff_dct_quantize_c;
798     if (!s->denoise_dct)
799         s->denoise_dct  = denoise_dct_c;
800     s->fast_dct_quantize = s->dct_quantize;
801     if (avctx->trellis)
802         s->dct_quantize  = dct_quantize_trellis_c;
803
804     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
805         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
806
807     if (s->slice_context_count > 1) {
808         s->rtp_mode = 1;
809
810         if (avctx->codec_id == AV_CODEC_ID_H263 || avctx->codec_id == AV_CODEC_ID_H263P)
811             s->h263_slice_structured = 1;
812     }
813
814     s->quant_precision = 5;
815
816 #if FF_API_PRIVATE_OPT
817 FF_DISABLE_DEPRECATION_WARNINGS
818     if (avctx->frame_skip_threshold)
819         s->frame_skip_threshold = avctx->frame_skip_threshold;
820     if (avctx->frame_skip_factor)
821         s->frame_skip_factor = avctx->frame_skip_factor;
822     if (avctx->frame_skip_exp)
823         s->frame_skip_exp = avctx->frame_skip_exp;
824     if (avctx->frame_skip_cmp != FF_CMP_DCTMAX)
825         s->frame_skip_cmp = avctx->frame_skip_cmp;
826 FF_ENABLE_DEPRECATION_WARNINGS
827 #endif
828
829     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
830     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->frame_skip_cmp);
831
832     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
833         ff_h261_encode_init(s);
834     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
835         ff_h263_encode_init(s);
836     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
837         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
838             return ret;
839     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
840         && s->out_format == FMT_MPEG1)
841         ff_mpeg1_encode_init(s);
842
843     /* init q matrix */
844     for (i = 0; i < 64; i++) {
845         int j = s->idsp.idct_permutation[i];
846         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
847             s->mpeg_quant) {
848             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
849             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
850         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
851             s->intra_matrix[j] =
852             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
853         } else {
854             /* MPEG-1/2 */
855             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
856             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
857         }
858         if (s->avctx->intra_matrix)
859             s->intra_matrix[j] = s->avctx->intra_matrix[i];
860         if (s->avctx->inter_matrix)
861             s->inter_matrix[j] = s->avctx->inter_matrix[i];
862     }
863
864     /* precompute matrix */
865     /* for mjpeg, we do include qscale in the matrix */
866     if (s->out_format != FMT_MJPEG) {
867         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
868                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
869                           31, 1);
870         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
871                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
872                           31, 0);
873     }
874
875 #if FF_API_RC_STRATEGY
876 FF_DISABLE_DEPRECATION_WARNINGS
877     if (!s->rc_strategy)
878         s->rc_strategy = s->avctx->rc_strategy;
879 FF_ENABLE_DEPRECATION_WARNINGS
880 #endif
881
882     if (ff_rate_control_init(s) < 0)
883         return -1;
884
885     if ((s->avctx->flags & AV_CODEC_FLAG_PASS2) && s->rc_strategy == 1) {
886 #if CONFIG_LIBXVID
887         ret = ff_xvid_rate_control_init(s);
888 #else
889         ret = AVERROR(ENOSYS);
890         av_log(s->avctx, AV_LOG_ERROR,
891                "Xvid ratecontrol requires libavcodec compiled with Xvid support.\n");
892 #endif
893         if (ret < 0)
894             return ret;
895     }
896
897 #if FF_API_ERROR_RATE
898     FF_DISABLE_DEPRECATION_WARNINGS
899     if (avctx->error_rate)
900         s->error_rate = avctx->error_rate;
901     FF_ENABLE_DEPRECATION_WARNINGS;
902 #endif
903
904 #if FF_API_NORMALIZE_AQP
905     FF_DISABLE_DEPRECATION_WARNINGS
906     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
907         s->mpv_flags |= FF_MPV_FLAG_NAQ;
908     FF_ENABLE_DEPRECATION_WARNINGS;
909 #endif
910
911 #if FF_API_MV0
912     FF_DISABLE_DEPRECATION_WARNINGS
913     if (avctx->flags & CODEC_FLAG_MV0)
914         s->mpv_flags |= FF_MPV_FLAG_MV0;
915     FF_ENABLE_DEPRECATION_WARNINGS
916 #endif
917
918 #if FF_API_MPV_OPT
919     FF_DISABLE_DEPRECATION_WARNINGS
920     if (avctx->rc_qsquish != 0.0)
921         s->rc_qsquish = avctx->rc_qsquish;
922     if (avctx->rc_qmod_amp != 0.0)
923         s->rc_qmod_amp = avctx->rc_qmod_amp;
924     if (avctx->rc_qmod_freq)
925         s->rc_qmod_freq = avctx->rc_qmod_freq;
926     if (avctx->rc_buffer_aggressivity != 1.0)
927         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
928     if (avctx->rc_initial_cplx != 0.0)
929         s->rc_initial_cplx = avctx->rc_initial_cplx;
930     if (avctx->lmin)
931         s->lmin = avctx->lmin;
932     if (avctx->lmax)
933         s->lmax = avctx->lmax;
934
935     if (avctx->rc_eq) {
936         av_freep(&s->rc_eq);
937         s->rc_eq = av_strdup(avctx->rc_eq);
938         if (!s->rc_eq)
939             return AVERROR(ENOMEM);
940     }
941     FF_ENABLE_DEPRECATION_WARNINGS
942 #endif
943
944 #if FF_API_PRIVATE_OPT
945     FF_DISABLE_DEPRECATION_WARNINGS
946     if (avctx->brd_scale)
947         s->brd_scale = avctx->brd_scale;
948
949     if (avctx->prediction_method)
950         s->pred = avctx->prediction_method + 1;
951     FF_ENABLE_DEPRECATION_WARNINGS
952 #endif
953
954     if (s->b_frame_strategy == 2) {
955         for (i = 0; i < s->max_b_frames + 2; i++) {
956             s->tmp_frames[i] = av_frame_alloc();
957             if (!s->tmp_frames[i])
958                 return AVERROR(ENOMEM);
959
960             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
961             s->tmp_frames[i]->width  = s->width  >> s->brd_scale;
962             s->tmp_frames[i]->height = s->height >> s->brd_scale;
963
964             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
965             if (ret < 0)
966                 return ret;
967         }
968     }
969
970     cpb_props = ff_add_cpb_side_data(avctx);
971     if (!cpb_props)
972         return AVERROR(ENOMEM);
973     cpb_props->max_bitrate = avctx->rc_max_rate;
974     cpb_props->min_bitrate = avctx->rc_min_rate;
975     cpb_props->avg_bitrate = avctx->bit_rate;
976     cpb_props->buffer_size = avctx->rc_buffer_size;
977
978     return 0;
979 fail:
980     ff_mpv_encode_end(avctx);
981     return AVERROR_UNKNOWN;
982 }
983
984 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
985 {
986     MpegEncContext *s = avctx->priv_data;
987     int i;
988
989     ff_rate_control_uninit(s);
990 #if CONFIG_LIBXVID
991     if ((avctx->flags & AV_CODEC_FLAG_PASS2) && s->rc_strategy == 1)
992         ff_xvid_rate_control_uninit(s);
993 #endif
994
995     ff_mpv_common_end(s);
996     if (CONFIG_MJPEG_ENCODER &&
997         s->out_format == FMT_MJPEG)
998         ff_mjpeg_encode_close(s);
999
1000     av_freep(&avctx->extradata);
1001
1002     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1003         av_frame_free(&s->tmp_frames[i]);
1004
1005     ff_free_picture_tables(&s->new_picture);
1006     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1007
1008     av_freep(&s->avctx->stats_out);
1009     av_freep(&s->ac_stats);
1010
1011     av_freep(&s->q_intra_matrix);
1012     av_freep(&s->q_inter_matrix);
1013     av_freep(&s->q_intra_matrix16);
1014     av_freep(&s->q_inter_matrix16);
1015     av_freep(&s->input_picture);
1016     av_freep(&s->reordered_input_picture);
1017     av_freep(&s->dct_offset);
1018
1019     return 0;
1020 }
1021
1022 static int get_sae(uint8_t *src, int ref, int stride)
1023 {
1024     int x,y;
1025     int acc = 0;
1026
1027     for (y = 0; y < 16; y++) {
1028         for (x = 0; x < 16; x++) {
1029             acc += FFABS(src[x + y * stride] - ref);
1030         }
1031     }
1032
1033     return acc;
1034 }
1035
1036 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1037                            uint8_t *ref, int stride)
1038 {
1039     int x, y, w, h;
1040     int acc = 0;
1041
1042     w = s->width  & ~15;
1043     h = s->height & ~15;
1044
1045     for (y = 0; y < h; y += 16) {
1046         for (x = 0; x < w; x += 16) {
1047             int offset = x + y * stride;
1048             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1049                                       stride, 16);
1050             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1051             int sae  = get_sae(src + offset, mean, stride);
1052
1053             acc += sae + 500 < sad;
1054         }
1055     }
1056     return acc;
1057 }
1058
1059 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1060 {
1061     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1062                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1063                             s->mb_stride, s->mb_height, s->b8_stride,
1064                             &s->linesize, &s->uvlinesize);
1065 }
1066
1067 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1068 {
1069     Picture *pic = NULL;
1070     int64_t pts;
1071     int i, display_picture_number = 0, ret;
1072     int encoding_delay = s->max_b_frames ? s->max_b_frames
1073                                          : (s->low_delay ? 0 : 1);
1074     int flush_offset = 1;
1075     int direct = 1;
1076
1077     if (pic_arg) {
1078         pts = pic_arg->pts;
1079         display_picture_number = s->input_picture_number++;
1080
1081         if (pts != AV_NOPTS_VALUE) {
1082             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1083                 int64_t time = pts;
1084                 int64_t last = s->user_specified_pts;
1085
1086                 if (time <= last) {
1087                     av_log(s->avctx, AV_LOG_ERROR,
1088                            "Error, Invalid timestamp=%"PRId64", "
1089                            "last=%"PRId64"\n", pts, s->user_specified_pts);
1090                     return -1;
1091                 }
1092
1093                 if (!s->low_delay && display_picture_number == 1)
1094                     s->dts_delta = time - last;
1095             }
1096             s->user_specified_pts = pts;
1097         } else {
1098             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1099                 s->user_specified_pts =
1100                 pts = s->user_specified_pts + 1;
1101                 av_log(s->avctx, AV_LOG_INFO,
1102                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1103                        pts);
1104             } else {
1105                 pts = display_picture_number;
1106             }
1107         }
1108
1109         if (!pic_arg->buf[0] ||
1110             pic_arg->linesize[0] != s->linesize ||
1111             pic_arg->linesize[1] != s->uvlinesize ||
1112             pic_arg->linesize[2] != s->uvlinesize)
1113             direct = 0;
1114         if ((s->width & 15) || (s->height & 15))
1115             direct = 0;
1116
1117         ff_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
1118                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1119
1120         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1121         if (i < 0)
1122             return i;
1123
1124         pic = &s->picture[i];
1125         pic->reference = 3;
1126
1127         if (direct) {
1128             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1129                 return ret;
1130         }
1131         ret = alloc_picture(s, pic, direct);
1132         if (ret < 0)
1133             return ret;
1134
1135         if (!direct) {
1136             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1137                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1138                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1139                 // empty
1140             } else {
1141                 int h_chroma_shift, v_chroma_shift;
1142                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1143                                                  &h_chroma_shift,
1144                                                  &v_chroma_shift);
1145
1146                 for (i = 0; i < 3; i++) {
1147                     int src_stride = pic_arg->linesize[i];
1148                     int dst_stride = i ? s->uvlinesize : s->linesize;
1149                     int h_shift = i ? h_chroma_shift : 0;
1150                     int v_shift = i ? v_chroma_shift : 0;
1151                     int w = s->width  >> h_shift;
1152                     int h = s->height >> v_shift;
1153                     uint8_t *src = pic_arg->data[i];
1154                     uint8_t *dst = pic->f->data[i];
1155
1156                     if (!s->avctx->rc_buffer_size)
1157                         dst += INPLACE_OFFSET;
1158
1159                     if (src_stride == dst_stride)
1160                         memcpy(dst, src, src_stride * h);
1161                     else {
1162                         int h2 = h;
1163                         uint8_t *dst2 = dst;
1164                         while (h2--) {
1165                             memcpy(dst2, src, w);
1166                             dst2 += dst_stride;
1167                             src += src_stride;
1168                         }
1169                     }
1170                     if ((s->width & 15) || (s->height & 15)) {
1171                         s->mpvencdsp.draw_edges(dst, dst_stride,
1172                                                 w, h,
1173                                                 16 >> h_shift,
1174                                                 16 >> v_shift,
1175                                                 EDGE_BOTTOM);
1176                     }
1177                 }
1178             }
1179         }
1180         ret = av_frame_copy_props(pic->f, pic_arg);
1181         if (ret < 0)
1182             return ret;
1183
1184         pic->f->display_picture_number = display_picture_number;
1185         pic->f->pts = pts; // we set this here to avoid modifying pic_arg
1186     } else {
1187         /* Flushing: When we have not received enough input frames,
1188          * ensure s->input_picture[0] contains the first picture */
1189         for (flush_offset = 0; flush_offset < encoding_delay + 1; flush_offset++)
1190             if (s->input_picture[flush_offset])
1191                 break;
1192
1193         if (flush_offset <= 1)
1194             flush_offset = 1;
1195         else
1196             encoding_delay = encoding_delay - flush_offset + 1;
1197     }
1198
1199     /* shift buffer entries */
1200     for (i = flush_offset; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1201         s->input_picture[i - flush_offset] = s->input_picture[i];
1202
1203     s->input_picture[encoding_delay] = (Picture*) pic;
1204
1205     return 0;
1206 }
1207
1208 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1209 {
1210     int x, y, plane;
1211     int score = 0;
1212     int64_t score64 = 0;
1213
1214     for (plane = 0; plane < 3; plane++) {
1215         const int stride = p->f->linesize[plane];
1216         const int bw = plane ? 1 : 2;
1217         for (y = 0; y < s->mb_height * bw; y++) {
1218             for (x = 0; x < s->mb_width * bw; x++) {
1219                 int off = p->shared ? 0 : 16;
1220                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1221                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1222                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1223
1224                 switch (s->frame_skip_exp) {
1225                 case 0: score    =  FFMAX(score, v);          break;
1226                 case 1: score   += FFABS(v);                  break;
1227                 case 2: score   += v * v;                     break;
1228                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1229                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1230                 }
1231             }
1232         }
1233     }
1234
1235     if (score)
1236         score64 = score;
1237
1238     if (score64 < s->frame_skip_threshold)
1239         return 1;
1240     if (score64 < ((s->frame_skip_factor * (int64_t) s->lambda) >> 8))
1241         return 1;
1242     return 0;
1243 }
1244
1245 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1246 {
1247     AVPacket pkt = { 0 };
1248     int ret, got_output;
1249
1250     av_init_packet(&pkt);
1251     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1252     if (ret < 0)
1253         return ret;
1254
1255     ret = pkt.size;
1256     av_packet_unref(&pkt);
1257     return ret;
1258 }
1259
1260 static int estimate_best_b_count(MpegEncContext *s)
1261 {
1262     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1263     AVCodecContext *c = avcodec_alloc_context3(NULL);
1264     const int scale = s->brd_scale;
1265     int i, j, out_size, p_lambda, b_lambda, lambda2;
1266     int64_t best_rd  = INT64_MAX;
1267     int best_b_count = -1;
1268
1269     if (!c)
1270         return AVERROR(ENOMEM);
1271     assert(scale >= 0 && scale <= 3);
1272
1273     //emms_c();
1274     //s->next_picture_ptr->quality;
1275     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1276     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1277     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1278     if (!b_lambda) // FIXME we should do this somewhere else
1279         b_lambda = p_lambda;
1280     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1281                FF_LAMBDA_SHIFT;
1282
1283     c->width        = s->width  >> scale;
1284     c->height       = s->height >> scale;
1285     c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1286     c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1287     c->mb_decision  = s->avctx->mb_decision;
1288     c->me_cmp       = s->avctx->me_cmp;
1289     c->mb_cmp       = s->avctx->mb_cmp;
1290     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1291     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1292     c->time_base    = s->avctx->time_base;
1293     c->max_b_frames = s->max_b_frames;
1294
1295     if (avcodec_open2(c, codec, NULL) < 0)
1296         return -1;
1297
1298     for (i = 0; i < s->max_b_frames + 2; i++) {
1299         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1300                                                 s->next_picture_ptr;
1301
1302         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1303             pre_input = *pre_input_ptr;
1304
1305             if (!pre_input.shared && i) {
1306                 pre_input.f->data[0] += INPLACE_OFFSET;
1307                 pre_input.f->data[1] += INPLACE_OFFSET;
1308                 pre_input.f->data[2] += INPLACE_OFFSET;
1309             }
1310
1311             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1312                                        s->tmp_frames[i]->linesize[0],
1313                                        pre_input.f->data[0],
1314                                        pre_input.f->linesize[0],
1315                                        c->width, c->height);
1316             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1317                                        s->tmp_frames[i]->linesize[1],
1318                                        pre_input.f->data[1],
1319                                        pre_input.f->linesize[1],
1320                                        c->width >> 1, c->height >> 1);
1321             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1322                                        s->tmp_frames[i]->linesize[2],
1323                                        pre_input.f->data[2],
1324                                        pre_input.f->linesize[2],
1325                                        c->width >> 1, c->height >> 1);
1326         }
1327     }
1328
1329     for (j = 0; j < s->max_b_frames + 1; j++) {
1330         int64_t rd = 0;
1331
1332         if (!s->input_picture[j])
1333             break;
1334
1335         c->error[0] = c->error[1] = c->error[2] = 0;
1336
1337         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1338         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1339
1340         out_size = encode_frame(c, s->tmp_frames[0]);
1341
1342         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1343
1344         for (i = 0; i < s->max_b_frames + 1; i++) {
1345             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1346
1347             s->tmp_frames[i + 1]->pict_type = is_p ?
1348                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1349             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1350
1351             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1352
1353             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1354         }
1355
1356         /* get the delayed frames */
1357         while (out_size) {
1358             out_size = encode_frame(c, NULL);
1359             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1360         }
1361
1362         rd += c->error[0] + c->error[1] + c->error[2];
1363
1364         if (rd < best_rd) {
1365             best_rd = rd;
1366             best_b_count = j;
1367         }
1368     }
1369
1370     avcodec_free_context(&c);
1371
1372     return best_b_count;
1373 }
1374
1375 static int select_input_picture(MpegEncContext *s)
1376 {
1377     int i, ret;
1378
1379     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1380         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1381     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1382
1383     /* set next picture type & ordering */
1384     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1385         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1386             !s->next_picture_ptr || s->intra_only) {
1387             s->reordered_input_picture[0] = s->input_picture[0];
1388             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1389             s->reordered_input_picture[0]->f->coded_picture_number =
1390                 s->coded_picture_number++;
1391         } else {
1392             int b_frames = 0;
1393
1394             if (s->frame_skip_threshold || s->frame_skip_factor) {
1395                 if (s->picture_in_gop_number < s->gop_size &&
1396                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1397                     // FIXME check that the gop check above is +-1 correct
1398                     av_frame_unref(s->input_picture[0]->f);
1399
1400                     emms_c();
1401                     ff_vbv_update(s, 0);
1402
1403                     goto no_output_pic;
1404                 }
1405             }
1406
1407             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1408                 for (i = 0; i < s->max_b_frames + 1; i++) {
1409                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1410
1411                     if (pict_num >= s->rc_context.num_entries)
1412                         break;
1413                     if (!s->input_picture[i]) {
1414                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1415                         break;
1416                     }
1417
1418                     s->input_picture[i]->f->pict_type =
1419                         s->rc_context.entry[pict_num].new_pict_type;
1420                 }
1421             }
1422
1423             if (s->b_frame_strategy == 0) {
1424                 b_frames = s->max_b_frames;
1425                 while (b_frames && !s->input_picture[b_frames])
1426                     b_frames--;
1427             } else if (s->b_frame_strategy == 1) {
1428                 for (i = 1; i < s->max_b_frames + 1; i++) {
1429                     if (s->input_picture[i] &&
1430                         s->input_picture[i]->b_frame_score == 0) {
1431                         s->input_picture[i]->b_frame_score =
1432                             get_intra_count(s,
1433                                             s->input_picture[i    ]->f->data[0],
1434                                             s->input_picture[i - 1]->f->data[0],
1435                                             s->linesize) + 1;
1436                     }
1437                 }
1438                 for (i = 0; i < s->max_b_frames + 1; i++) {
1439                     if (!s->input_picture[i] ||
1440                         s->input_picture[i]->b_frame_score - 1 >
1441                             s->mb_num / s->b_sensitivity)
1442                         break;
1443                 }
1444
1445                 b_frames = FFMAX(0, i - 1);
1446
1447                 /* reset scores */
1448                 for (i = 0; i < b_frames + 1; i++) {
1449                     s->input_picture[i]->b_frame_score = 0;
1450                 }
1451             } else if (s->b_frame_strategy == 2) {
1452                 b_frames = estimate_best_b_count(s);
1453             }
1454
1455             emms_c();
1456
1457             for (i = b_frames - 1; i >= 0; i--) {
1458                 int type = s->input_picture[i]->f->pict_type;
1459                 if (type && type != AV_PICTURE_TYPE_B)
1460                     b_frames = i;
1461             }
1462             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1463                 b_frames == s->max_b_frames) {
1464                 av_log(s->avctx, AV_LOG_ERROR,
1465                        "warning, too many B-frames in a row\n");
1466             }
1467
1468             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1469                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1470                     s->gop_size > s->picture_in_gop_number) {
1471                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1472                 } else {
1473                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1474                         b_frames = 0;
1475                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1476                 }
1477             }
1478
1479             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1480                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1481                 b_frames--;
1482
1483             s->reordered_input_picture[0] = s->input_picture[b_frames];
1484             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1485                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1486             s->reordered_input_picture[0]->f->coded_picture_number =
1487                 s->coded_picture_number++;
1488             for (i = 0; i < b_frames; i++) {
1489                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1490                 s->reordered_input_picture[i + 1]->f->pict_type =
1491                     AV_PICTURE_TYPE_B;
1492                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1493                     s->coded_picture_number++;
1494             }
1495         }
1496     }
1497 no_output_pic:
1498     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1499
1500     if (s->reordered_input_picture[0]) {
1501         s->reordered_input_picture[0]->reference =
1502            s->reordered_input_picture[0]->f->pict_type !=
1503                AV_PICTURE_TYPE_B ? 3 : 0;
1504
1505         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1506             return ret;
1507
1508         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1509             // input is a shared pix, so we can't modify it -> allocate a new
1510             // one & ensure that the shared one is reuseable
1511
1512             Picture *pic;
1513             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1514             if (i < 0)
1515                 return i;
1516             pic = &s->picture[i];
1517
1518             pic->reference = s->reordered_input_picture[0]->reference;
1519             if (alloc_picture(s, pic, 0) < 0) {
1520                 return -1;
1521             }
1522
1523             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1524             if (ret < 0)
1525                 return ret;
1526
1527             /* mark us unused / free shared pic */
1528             av_frame_unref(s->reordered_input_picture[0]->f);
1529             s->reordered_input_picture[0]->shared = 0;
1530
1531             s->current_picture_ptr = pic;
1532         } else {
1533             // input is not a shared pix -> reuse buffer for current_pix
1534             s->current_picture_ptr = s->reordered_input_picture[0];
1535             for (i = 0; i < 4; i++) {
1536                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1537             }
1538         }
1539         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1540         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1541                                        s->current_picture_ptr)) < 0)
1542             return ret;
1543
1544         s->picture_number = s->new_picture.f->display_picture_number;
1545     }
1546     return 0;
1547 }
1548
1549 static void frame_end(MpegEncContext *s)
1550 {
1551     int i;
1552
1553     if (s->unrestricted_mv &&
1554         s->current_picture.reference &&
1555         !s->intra_only) {
1556         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1557         int hshift = desc->log2_chroma_w;
1558         int vshift = desc->log2_chroma_h;
1559         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1560                                 s->h_edge_pos, s->v_edge_pos,
1561                                 EDGE_WIDTH, EDGE_WIDTH,
1562                                 EDGE_TOP | EDGE_BOTTOM);
1563         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1564                                 s->h_edge_pos >> hshift,
1565                                 s->v_edge_pos >> vshift,
1566                                 EDGE_WIDTH >> hshift,
1567                                 EDGE_WIDTH >> vshift,
1568                                 EDGE_TOP | EDGE_BOTTOM);
1569         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1570                                 s->h_edge_pos >> hshift,
1571                                 s->v_edge_pos >> vshift,
1572                                 EDGE_WIDTH >> hshift,
1573                                 EDGE_WIDTH >> vshift,
1574                                 EDGE_TOP | EDGE_BOTTOM);
1575     }
1576
1577     emms_c();
1578
1579     s->last_pict_type                 = s->pict_type;
1580     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1581     if (s->pict_type!= AV_PICTURE_TYPE_B)
1582         s->last_non_b_pict_type = s->pict_type;
1583
1584     if (s->encoding) {
1585         /* release non-reference frames */
1586         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1587             if (!s->picture[i].reference)
1588                 ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1589         }
1590     }
1591
1592 #if FF_API_CODED_FRAME
1593 FF_DISABLE_DEPRECATION_WARNINGS
1594     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1595 FF_ENABLE_DEPRECATION_WARNINGS
1596 #endif
1597 #if FF_API_ERROR_FRAME
1598 FF_DISABLE_DEPRECATION_WARNINGS
1599     memcpy(s->current_picture.f->error, s->current_picture.encoding_error,
1600            sizeof(s->current_picture.encoding_error));
1601 FF_ENABLE_DEPRECATION_WARNINGS
1602 #endif
1603 }
1604
1605 static void update_noise_reduction(MpegEncContext *s)
1606 {
1607     int intra, i;
1608
1609     for (intra = 0; intra < 2; intra++) {
1610         if (s->dct_count[intra] > (1 << 16)) {
1611             for (i = 0; i < 64; i++) {
1612                 s->dct_error_sum[intra][i] >>= 1;
1613             }
1614             s->dct_count[intra] >>= 1;
1615         }
1616
1617         for (i = 0; i < 64; i++) {
1618             s->dct_offset[intra][i] = (s->noise_reduction *
1619                                        s->dct_count[intra] +
1620                                        s->dct_error_sum[intra][i] / 2) /
1621                                       (s->dct_error_sum[intra][i] + 1);
1622         }
1623     }
1624 }
1625
1626 static int frame_start(MpegEncContext *s)
1627 {
1628     int ret;
1629
1630     /* mark & release old frames */
1631     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1632         s->last_picture_ptr != s->next_picture_ptr &&
1633         s->last_picture_ptr->f->buf[0]) {
1634         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1635     }
1636
1637     s->current_picture_ptr->f->pict_type = s->pict_type;
1638     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1639
1640     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1641     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1642                                    s->current_picture_ptr)) < 0)
1643         return ret;
1644
1645     if (s->pict_type != AV_PICTURE_TYPE_B) {
1646         s->last_picture_ptr = s->next_picture_ptr;
1647         if (!s->droppable)
1648             s->next_picture_ptr = s->current_picture_ptr;
1649     }
1650
1651     if (s->last_picture_ptr) {
1652         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1653         if (s->last_picture_ptr->f->buf[0] &&
1654             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1655                                        s->last_picture_ptr)) < 0)
1656             return ret;
1657     }
1658     if (s->next_picture_ptr) {
1659         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1660         if (s->next_picture_ptr->f->buf[0] &&
1661             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1662                                        s->next_picture_ptr)) < 0)
1663             return ret;
1664     }
1665
1666     if (s->picture_structure!= PICT_FRAME) {
1667         int i;
1668         for (i = 0; i < 4; i++) {
1669             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1670                 s->current_picture.f->data[i] +=
1671                     s->current_picture.f->linesize[i];
1672             }
1673             s->current_picture.f->linesize[i] *= 2;
1674             s->last_picture.f->linesize[i]    *= 2;
1675             s->next_picture.f->linesize[i]    *= 2;
1676         }
1677     }
1678
1679     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1680         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1681         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1682     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1683         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1684         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1685     } else {
1686         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1687         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1688     }
1689
1690     if (s->dct_error_sum) {
1691         assert(s->noise_reduction && s->encoding);
1692         update_noise_reduction(s);
1693     }
1694
1695     return 0;
1696 }
1697
1698 static void write_pass1_stats(MpegEncContext *s)
1699 {
1700     snprintf(s->avctx->stats_out, 256,
1701              "in:%d out:%d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d "
1702              "fcode:%d bcode:%d mc-var:%d var:%d icount:%d skipcount:%d "
1703              "hbits:%d;\n",
1704              s->current_picture_ptr->f->display_picture_number,
1705              s->current_picture_ptr->f->coded_picture_number,
1706              s->pict_type,
1707              s->current_picture.f->quality,
1708              s->i_tex_bits,
1709              s->p_tex_bits,
1710              s->mv_bits,
1711              s->misc_bits,
1712              s->f_code,
1713              s->b_code,
1714              s->current_picture.mc_mb_var_sum,
1715              s->current_picture.mb_var_sum,
1716              s->i_count, s->skip_count,
1717              s->header_bits);
1718 }
1719
1720 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1721                           const AVFrame *pic_arg, int *got_packet)
1722 {
1723     MpegEncContext *s = avctx->priv_data;
1724     int i, stuffing_count, ret;
1725     int context_count = s->slice_context_count;
1726
1727     s->picture_in_gop_number++;
1728
1729     if (load_input_picture(s, pic_arg) < 0)
1730         return -1;
1731
1732     if (select_input_picture(s) < 0) {
1733         return -1;
1734     }
1735
1736     /* output? */
1737     if (s->new_picture.f->data[0]) {
1738         uint8_t *sd;
1739         if (!pkt->data &&
1740             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1741             return ret;
1742         if (s->mb_info) {
1743             s->mb_info_ptr = av_packet_new_side_data(pkt,
1744                                  AV_PKT_DATA_H263_MB_INFO,
1745                                  s->mb_width*s->mb_height*12);
1746             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1747         }
1748
1749         for (i = 0; i < context_count; i++) {
1750             int start_y = s->thread_context[i]->start_mb_y;
1751             int   end_y = s->thread_context[i]->  end_mb_y;
1752             int h       = s->mb_height;
1753             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1754             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1755
1756             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1757         }
1758
1759         s->pict_type = s->new_picture.f->pict_type;
1760         //emms_c();
1761         ret = frame_start(s);
1762         if (ret < 0)
1763             return ret;
1764 vbv_retry:
1765         if (encode_picture(s, s->picture_number) < 0)
1766             return -1;
1767
1768 #if FF_API_STAT_BITS
1769 FF_DISABLE_DEPRECATION_WARNINGS
1770         avctx->header_bits = s->header_bits;
1771         avctx->mv_bits     = s->mv_bits;
1772         avctx->misc_bits   = s->misc_bits;
1773         avctx->i_tex_bits  = s->i_tex_bits;
1774         avctx->p_tex_bits  = s->p_tex_bits;
1775         avctx->i_count     = s->i_count;
1776         // FIXME f/b_count in avctx
1777         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1778         avctx->skip_count  = s->skip_count;
1779 FF_ENABLE_DEPRECATION_WARNINGS
1780 #endif
1781
1782         frame_end(s);
1783
1784         sd = av_packet_new_side_data(pkt, AV_PKT_DATA_QUALITY_FACTOR,
1785                                      sizeof(int));
1786         if (!sd)
1787             return AVERROR(ENOMEM);
1788         *(int *)sd = s->current_picture.f->quality;
1789
1790         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1791             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1792
1793         if (avctx->rc_buffer_size) {
1794             RateControlContext *rcc = &s->rc_context;
1795             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1796
1797             if (put_bits_count(&s->pb) > max_size &&
1798                 s->lambda < s->lmax) {
1799                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1800                                        (s->qscale + 1) / s->qscale);
1801                 if (s->adaptive_quant) {
1802                     int i;
1803                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1804                         s->lambda_table[i] =
1805                             FFMAX(s->lambda_table[i] + 1,
1806                                   s->lambda_table[i] * (s->qscale + 1) /
1807                                   s->qscale);
1808                 }
1809                 s->mb_skipped = 0;        // done in frame_start()
1810                 // done in encode_picture() so we must undo it
1811                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1812                     if (s->flipflop_rounding          ||
1813                         s->codec_id == AV_CODEC_ID_H263P ||
1814                         s->codec_id == AV_CODEC_ID_MPEG4)
1815                         s->no_rounding ^= 1;
1816                 }
1817                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1818                     s->time_base       = s->last_time_base;
1819                     s->last_non_b_time = s->time - s->pp_time;
1820                 }
1821                 for (i = 0; i < context_count; i++) {
1822                     PutBitContext *pb = &s->thread_context[i]->pb;
1823                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1824                 }
1825                 goto vbv_retry;
1826             }
1827
1828             assert(s->avctx->rc_max_rate);
1829         }
1830
1831         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1832             write_pass1_stats(s);
1833
1834         for (i = 0; i < 4; i++) {
1835             s->current_picture_ptr->encoding_error[i] = s->current_picture.encoding_error[i];
1836             avctx->error[i] += s->current_picture_ptr->encoding_error[i];
1837         }
1838
1839         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1840             assert(put_bits_count(&s->pb) == s->header_bits + s->mv_bits +
1841                                              s->misc_bits + s->i_tex_bits +
1842                                              s->p_tex_bits);
1843         flush_put_bits(&s->pb);
1844         s->frame_bits  = put_bits_count(&s->pb);
1845
1846         stuffing_count = ff_vbv_update(s, s->frame_bits);
1847         if (stuffing_count) {
1848             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1849                     stuffing_count + 50) {
1850                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1851                 return -1;
1852             }
1853
1854             switch (s->codec_id) {
1855             case AV_CODEC_ID_MPEG1VIDEO:
1856             case AV_CODEC_ID_MPEG2VIDEO:
1857                 while (stuffing_count--) {
1858                     put_bits(&s->pb, 8, 0);
1859                 }
1860             break;
1861             case AV_CODEC_ID_MPEG4:
1862                 put_bits(&s->pb, 16, 0);
1863                 put_bits(&s->pb, 16, 0x1C3);
1864                 stuffing_count -= 4;
1865                 while (stuffing_count--) {
1866                     put_bits(&s->pb, 8, 0xFF);
1867                 }
1868             break;
1869             default:
1870                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1871             }
1872             flush_put_bits(&s->pb);
1873             s->frame_bits  = put_bits_count(&s->pb);
1874         }
1875
1876         /* update MPEG-1/2 vbv_delay for CBR */
1877         if (s->avctx->rc_max_rate                          &&
1878             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1879             s->out_format == FMT_MPEG1                     &&
1880             90000LL * (avctx->rc_buffer_size - 1) <=
1881                 s->avctx->rc_max_rate * 0xFFFFLL) {
1882             AVCPBProperties *props;
1883             size_t props_size;
1884
1885             int vbv_delay, min_delay;
1886             double inbits  = s->avctx->rc_max_rate *
1887                              av_q2d(s->avctx->time_base);
1888             int    minbits = s->frame_bits - 8 *
1889                              (s->vbv_delay_ptr - s->pb.buf - 1);
1890             double bits    = s->rc_context.buffer_index + minbits - inbits;
1891
1892             if (bits < 0)
1893                 av_log(s->avctx, AV_LOG_ERROR,
1894                        "Internal error, negative bits\n");
1895
1896             assert(s->repeat_first_field == 0);
1897
1898             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1899             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1900                         s->avctx->rc_max_rate;
1901
1902             vbv_delay = FFMAX(vbv_delay, min_delay);
1903
1904             assert(vbv_delay < 0xFFFF);
1905
1906             s->vbv_delay_ptr[0] &= 0xF8;
1907             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1908             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1909             s->vbv_delay_ptr[2] &= 0x07;
1910             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1911
1912             props = av_cpb_properties_alloc(&props_size);
1913             if (!props)
1914                 return AVERROR(ENOMEM);
1915             props->vbv_delay = vbv_delay * 300;
1916
1917             ret = av_packet_add_side_data(pkt, AV_PKT_DATA_CPB_PROPERTIES,
1918                                           (uint8_t*)props, props_size);
1919             if (ret < 0) {
1920                 av_freep(&props);
1921                 return ret;
1922             }
1923
1924 #if FF_API_VBV_DELAY
1925 FF_DISABLE_DEPRECATION_WARNINGS
1926             avctx->vbv_delay     = vbv_delay * 300;
1927 FF_ENABLE_DEPRECATION_WARNINGS
1928 #endif
1929         }
1930         s->total_bits     += s->frame_bits;
1931 #if FF_API_STAT_BITS
1932 FF_DISABLE_DEPRECATION_WARNINGS
1933         avctx->frame_bits  = s->frame_bits;
1934 FF_ENABLE_DEPRECATION_WARNINGS
1935 #endif
1936
1937
1938         pkt->pts = s->current_picture.f->pts;
1939         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1940             if (!s->current_picture.f->coded_picture_number)
1941                 pkt->dts = pkt->pts - s->dts_delta;
1942             else
1943                 pkt->dts = s->reordered_pts;
1944             s->reordered_pts = pkt->pts;
1945         } else
1946             pkt->dts = pkt->pts;
1947         if (s->current_picture.f->key_frame)
1948             pkt->flags |= AV_PKT_FLAG_KEY;
1949         if (s->mb_info)
1950             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1951     } else {
1952         s->frame_bits = 0;
1953     }
1954     assert((s->frame_bits & 7) == 0);
1955
1956     pkt->size = s->frame_bits / 8;
1957     *got_packet = !!pkt->size;
1958     return 0;
1959 }
1960
1961 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1962                                                 int n, int threshold)
1963 {
1964     static const char tab[64] = {
1965         3, 2, 2, 1, 1, 1, 1, 1,
1966         1, 1, 1, 1, 1, 1, 1, 1,
1967         1, 1, 1, 1, 1, 1, 1, 1,
1968         0, 0, 0, 0, 0, 0, 0, 0,
1969         0, 0, 0, 0, 0, 0, 0, 0,
1970         0, 0, 0, 0, 0, 0, 0, 0,
1971         0, 0, 0, 0, 0, 0, 0, 0,
1972         0, 0, 0, 0, 0, 0, 0, 0
1973     };
1974     int score = 0;
1975     int run = 0;
1976     int i;
1977     int16_t *block = s->block[n];
1978     const int last_index = s->block_last_index[n];
1979     int skip_dc;
1980
1981     if (threshold < 0) {
1982         skip_dc = 0;
1983         threshold = -threshold;
1984     } else
1985         skip_dc = 1;
1986
1987     /* Are all we could set to zero already zero? */
1988     if (last_index <= skip_dc - 1)
1989         return;
1990
1991     for (i = 0; i <= last_index; i++) {
1992         const int j = s->intra_scantable.permutated[i];
1993         const int level = FFABS(block[j]);
1994         if (level == 1) {
1995             if (skip_dc && i == 0)
1996                 continue;
1997             score += tab[run];
1998             run = 0;
1999         } else if (level > 1) {
2000             return;
2001         } else {
2002             run++;
2003         }
2004     }
2005     if (score >= threshold)
2006         return;
2007     for (i = skip_dc; i <= last_index; i++) {
2008         const int j = s->intra_scantable.permutated[i];
2009         block[j] = 0;
2010     }
2011     if (block[0])
2012         s->block_last_index[n] = 0;
2013     else
2014         s->block_last_index[n] = -1;
2015 }
2016
2017 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
2018                                int last_index)
2019 {
2020     int i;
2021     const int maxlevel = s->max_qcoeff;
2022     const int minlevel = s->min_qcoeff;
2023     int overflow = 0;
2024
2025     if (s->mb_intra) {
2026         i = 1; // skip clipping of intra dc
2027     } else
2028         i = 0;
2029
2030     for (; i <= last_index; i++) {
2031         const int j = s->intra_scantable.permutated[i];
2032         int level = block[j];
2033
2034         if (level > maxlevel) {
2035             level = maxlevel;
2036             overflow++;
2037         } else if (level < minlevel) {
2038             level = minlevel;
2039             overflow++;
2040         }
2041
2042         block[j] = level;
2043     }
2044
2045     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2046         av_log(s->avctx, AV_LOG_INFO,
2047                "warning, clipping %d dct coefficients to %d..%d\n",
2048                overflow, minlevel, maxlevel);
2049 }
2050
2051 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2052 {
2053     int x, y;
2054     // FIXME optimize
2055     for (y = 0; y < 8; y++) {
2056         for (x = 0; x < 8; x++) {
2057             int x2, y2;
2058             int sum = 0;
2059             int sqr = 0;
2060             int count = 0;
2061
2062             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2063                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2064                     int v = ptr[x2 + y2 * stride];
2065                     sum += v;
2066                     sqr += v * v;
2067                     count++;
2068                 }
2069             }
2070             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2071         }
2072     }
2073 }
2074
2075 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2076                                                 int motion_x, int motion_y,
2077                                                 int mb_block_height,
2078                                                 int mb_block_count)
2079 {
2080     int16_t weight[8][64];
2081     int16_t orig[8][64];
2082     const int mb_x = s->mb_x;
2083     const int mb_y = s->mb_y;
2084     int i;
2085     int skip_dct[8];
2086     int dct_offset = s->linesize * 8; // default for progressive frames
2087     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2088     ptrdiff_t wrap_y, wrap_c;
2089
2090     for (i = 0; i < mb_block_count; i++)
2091         skip_dct[i] = s->skipdct;
2092
2093     if (s->adaptive_quant) {
2094         const int last_qp = s->qscale;
2095         const int mb_xy = mb_x + mb_y * s->mb_stride;
2096
2097         s->lambda = s->lambda_table[mb_xy];
2098         update_qscale(s);
2099
2100         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2101             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2102             s->dquant = s->qscale - last_qp;
2103
2104             if (s->out_format == FMT_H263) {
2105                 s->dquant = av_clip(s->dquant, -2, 2);
2106
2107                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2108                     if (!s->mb_intra) {
2109                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2110                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2111                                 s->dquant = 0;
2112                         }
2113                         if (s->mv_type == MV_TYPE_8X8)
2114                             s->dquant = 0;
2115                     }
2116                 }
2117             }
2118         }
2119         ff_set_qscale(s, last_qp + s->dquant);
2120     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2121         ff_set_qscale(s, s->qscale + s->dquant);
2122
2123     wrap_y = s->linesize;
2124     wrap_c = s->uvlinesize;
2125     ptr_y  = s->new_picture.f->data[0] +
2126              (mb_y * 16 * wrap_y)              + mb_x * 16;
2127     ptr_cb = s->new_picture.f->data[1] +
2128              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
2129     ptr_cr = s->new_picture.f->data[2] +
2130              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
2131
2132     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
2133         uint8_t *ebuf = s->sc.edge_emu_buffer + 32;
2134         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2135                                  wrap_y, wrap_y,
2136                                  16, 16, mb_x * 16, mb_y * 16,
2137                                  s->width, s->height);
2138         ptr_y = ebuf;
2139         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
2140                                  wrap_c, wrap_c,
2141                                  8, mb_block_height, mb_x * 8, mb_y * 8,
2142                                  s->width >> 1, s->height >> 1);
2143         ptr_cb = ebuf + 18 * wrap_y;
2144         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
2145                                  wrap_c, wrap_c,
2146                                  8, mb_block_height, mb_x * 8, mb_y * 8,
2147                                  s->width >> 1, s->height >> 1);
2148         ptr_cr = ebuf + 18 * wrap_y + 8;
2149     }
2150
2151     if (s->mb_intra) {
2152         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2153             int progressive_score, interlaced_score;
2154
2155             s->interlaced_dct = 0;
2156             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2157                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2158                                                      NULL, wrap_y, 8) - 400;
2159
2160             if (progressive_score > 0) {
2161                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2162                                                         NULL, wrap_y * 2, 8) +
2163                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2164                                                         NULL, wrap_y * 2, 8);
2165                 if (progressive_score > interlaced_score) {
2166                     s->interlaced_dct = 1;
2167
2168                     dct_offset = wrap_y;
2169                     wrap_y <<= 1;
2170                     if (s->chroma_format == CHROMA_422)
2171                         wrap_c <<= 1;
2172                 }
2173             }
2174         }
2175
2176         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2177         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2178         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2179         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2180
2181         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2182             skip_dct[4] = 1;
2183             skip_dct[5] = 1;
2184         } else {
2185             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2186             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2187             if (!s->chroma_y_shift) { /* 422 */
2188                 s->pdsp.get_pixels(s->block[6],
2189                                    ptr_cb + (dct_offset >> 1), wrap_c);
2190                 s->pdsp.get_pixels(s->block[7],
2191                                    ptr_cr + (dct_offset >> 1), wrap_c);
2192             }
2193         }
2194     } else {
2195         op_pixels_func (*op_pix)[4];
2196         qpel_mc_func (*op_qpix)[16];
2197         uint8_t *dest_y, *dest_cb, *dest_cr;
2198
2199         dest_y  = s->dest[0];
2200         dest_cb = s->dest[1];
2201         dest_cr = s->dest[2];
2202
2203         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2204             op_pix  = s->hdsp.put_pixels_tab;
2205             op_qpix = s->qdsp.put_qpel_pixels_tab;
2206         } else {
2207             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2208             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2209         }
2210
2211         if (s->mv_dir & MV_DIR_FORWARD) {
2212             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2213                           s->last_picture.f->data,
2214                           op_pix, op_qpix);
2215             op_pix  = s->hdsp.avg_pixels_tab;
2216             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2217         }
2218         if (s->mv_dir & MV_DIR_BACKWARD) {
2219             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2220                           s->next_picture.f->data,
2221                           op_pix, op_qpix);
2222         }
2223
2224         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2225             int progressive_score, interlaced_score;
2226
2227             s->interlaced_dct = 0;
2228             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2229                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2230                                                      ptr_y + wrap_y * 8,
2231                                                      wrap_y, 8) - 400;
2232
2233             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2234                 progressive_score -= 400;
2235
2236             if (progressive_score > 0) {
2237                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2238                                                         wrap_y * 2, 8) +
2239                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2240                                                         ptr_y + wrap_y,
2241                                                         wrap_y * 2, 8);
2242
2243                 if (progressive_score > interlaced_score) {
2244                     s->interlaced_dct = 1;
2245
2246                     dct_offset = wrap_y;
2247                     wrap_y <<= 1;
2248                     if (s->chroma_format == CHROMA_422)
2249                         wrap_c <<= 1;
2250                 }
2251             }
2252         }
2253
2254         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2255         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2256         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2257                             dest_y + dct_offset, wrap_y);
2258         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2259                             dest_y + dct_offset + 8, wrap_y);
2260
2261         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2262             skip_dct[4] = 1;
2263             skip_dct[5] = 1;
2264         } else {
2265             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2266             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2267             if (!s->chroma_y_shift) { /* 422 */
2268                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2269                                     dest_cb + (dct_offset >> 1), wrap_c);
2270                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2271                                     dest_cr + (dct_offset >> 1), wrap_c);
2272             }
2273         }
2274         /* pre quantization */
2275         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2276                 2 * s->qscale * s->qscale) {
2277             // FIXME optimize
2278             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2279                 skip_dct[0] = 1;
2280             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2281                 skip_dct[1] = 1;
2282             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2283                                wrap_y, 8) < 20 * s->qscale)
2284                 skip_dct[2] = 1;
2285             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2286                                wrap_y, 8) < 20 * s->qscale)
2287                 skip_dct[3] = 1;
2288             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2289                 skip_dct[4] = 1;
2290             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2291                 skip_dct[5] = 1;
2292             if (!s->chroma_y_shift) { /* 422 */
2293                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2294                                    dest_cb + (dct_offset >> 1),
2295                                    wrap_c, 8) < 20 * s->qscale)
2296                     skip_dct[6] = 1;
2297                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2298                                    dest_cr + (dct_offset >> 1),
2299                                    wrap_c, 8) < 20 * s->qscale)
2300                     skip_dct[7] = 1;
2301             }
2302         }
2303     }
2304
2305     if (s->quantizer_noise_shaping) {
2306         if (!skip_dct[0])
2307             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2308         if (!skip_dct[1])
2309             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2310         if (!skip_dct[2])
2311             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2312         if (!skip_dct[3])
2313             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2314         if (!skip_dct[4])
2315             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2316         if (!skip_dct[5])
2317             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2318         if (!s->chroma_y_shift) { /* 422 */
2319             if (!skip_dct[6])
2320                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2321                                   wrap_c);
2322             if (!skip_dct[7])
2323                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2324                                   wrap_c);
2325         }
2326         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2327     }
2328
2329     /* DCT & quantize */
2330     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2331     {
2332         for (i = 0; i < mb_block_count; i++) {
2333             if (!skip_dct[i]) {
2334                 int overflow;
2335                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2336                 // FIXME we could decide to change to quantizer instead of
2337                 // clipping
2338                 // JS: I don't think that would be a good idea it could lower
2339                 //     quality instead of improve it. Just INTRADC clipping
2340                 //     deserves changes in quantizer
2341                 if (overflow)
2342                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2343             } else
2344                 s->block_last_index[i] = -1;
2345         }
2346         if (s->quantizer_noise_shaping) {
2347             for (i = 0; i < mb_block_count; i++) {
2348                 if (!skip_dct[i]) {
2349                     s->block_last_index[i] =
2350                         dct_quantize_refine(s, s->block[i], weight[i],
2351                                             orig[i], i, s->qscale);
2352                 }
2353             }
2354         }
2355
2356         if (s->luma_elim_threshold && !s->mb_intra)
2357             for (i = 0; i < 4; i++)
2358                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2359         if (s->chroma_elim_threshold && !s->mb_intra)
2360             for (i = 4; i < mb_block_count; i++)
2361                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2362
2363         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2364             for (i = 0; i < mb_block_count; i++) {
2365                 if (s->block_last_index[i] == -1)
2366                     s->coded_score[i] = INT_MAX / 256;
2367             }
2368         }
2369     }
2370
2371     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2372         s->block_last_index[4] =
2373         s->block_last_index[5] = 0;
2374         s->block[4][0] =
2375         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2376     }
2377
2378     // non c quantize code returns incorrect block_last_index FIXME
2379     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2380         for (i = 0; i < mb_block_count; i++) {
2381             int j;
2382             if (s->block_last_index[i] > 0) {
2383                 for (j = 63; j > 0; j--) {
2384                     if (s->block[i][s->intra_scantable.permutated[j]])
2385                         break;
2386                 }
2387                 s->block_last_index[i] = j;
2388             }
2389         }
2390     }
2391
2392     /* huffman encode */
2393     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2394     case AV_CODEC_ID_MPEG1VIDEO:
2395     case AV_CODEC_ID_MPEG2VIDEO:
2396         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2397             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2398         break;
2399     case AV_CODEC_ID_MPEG4:
2400         if (CONFIG_MPEG4_ENCODER)
2401             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2402         break;
2403     case AV_CODEC_ID_MSMPEG4V2:
2404     case AV_CODEC_ID_MSMPEG4V3:
2405     case AV_CODEC_ID_WMV1:
2406         if (CONFIG_MSMPEG4_ENCODER)
2407             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2408         break;
2409     case AV_CODEC_ID_WMV2:
2410         if (CONFIG_WMV2_ENCODER)
2411             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2412         break;
2413     case AV_CODEC_ID_H261:
2414         if (CONFIG_H261_ENCODER)
2415             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2416         break;
2417     case AV_CODEC_ID_H263:
2418     case AV_CODEC_ID_H263P:
2419     case AV_CODEC_ID_FLV1:
2420     case AV_CODEC_ID_RV10:
2421     case AV_CODEC_ID_RV20:
2422         if (CONFIG_H263_ENCODER)
2423             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2424         break;
2425     case AV_CODEC_ID_MJPEG:
2426         if (CONFIG_MJPEG_ENCODER)
2427             ff_mjpeg_encode_mb(s, s->block);
2428         break;
2429     default:
2430         assert(0);
2431     }
2432 }
2433
2434 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2435 {
2436     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2437     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2438 }
2439
2440 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2441     int i;
2442
2443     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2444
2445     /* MPEG-1 */
2446     d->mb_skip_run= s->mb_skip_run;
2447     for(i=0; i<3; i++)
2448         d->last_dc[i] = s->last_dc[i];
2449
2450     /* statistics */
2451     d->mv_bits= s->mv_bits;
2452     d->i_tex_bits= s->i_tex_bits;
2453     d->p_tex_bits= s->p_tex_bits;
2454     d->i_count= s->i_count;
2455     d->f_count= s->f_count;
2456     d->b_count= s->b_count;
2457     d->skip_count= s->skip_count;
2458     d->misc_bits= s->misc_bits;
2459     d->last_bits= 0;
2460
2461     d->mb_skipped= 0;
2462     d->qscale= s->qscale;
2463     d->dquant= s->dquant;
2464
2465     d->esc3_level_length= s->esc3_level_length;
2466 }
2467
2468 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2469     int i;
2470
2471     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2472     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2473
2474     /* MPEG-1 */
2475     d->mb_skip_run= s->mb_skip_run;
2476     for(i=0; i<3; i++)
2477         d->last_dc[i] = s->last_dc[i];
2478
2479     /* statistics */
2480     d->mv_bits= s->mv_bits;
2481     d->i_tex_bits= s->i_tex_bits;
2482     d->p_tex_bits= s->p_tex_bits;
2483     d->i_count= s->i_count;
2484     d->f_count= s->f_count;
2485     d->b_count= s->b_count;
2486     d->skip_count= s->skip_count;
2487     d->misc_bits= s->misc_bits;
2488
2489     d->mb_intra= s->mb_intra;
2490     d->mb_skipped= s->mb_skipped;
2491     d->mv_type= s->mv_type;
2492     d->mv_dir= s->mv_dir;
2493     d->pb= s->pb;
2494     if(s->data_partitioning){
2495         d->pb2= s->pb2;
2496         d->tex_pb= s->tex_pb;
2497     }
2498     d->block= s->block;
2499     for(i=0; i<8; i++)
2500         d->block_last_index[i]= s->block_last_index[i];
2501     d->interlaced_dct= s->interlaced_dct;
2502     d->qscale= s->qscale;
2503
2504     d->esc3_level_length= s->esc3_level_length;
2505 }
2506
2507 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2508                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2509                            int *dmin, int *next_block, int motion_x, int motion_y)
2510 {
2511     int score;
2512     uint8_t *dest_backup[3];
2513
2514     copy_context_before_encode(s, backup, type);
2515
2516     s->block= s->blocks[*next_block];
2517     s->pb= pb[*next_block];
2518     if(s->data_partitioning){
2519         s->pb2   = pb2   [*next_block];
2520         s->tex_pb= tex_pb[*next_block];
2521     }
2522
2523     if(*next_block){
2524         memcpy(dest_backup, s->dest, sizeof(s->dest));
2525         s->dest[0] = s->sc.rd_scratchpad;
2526         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2527         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2528         assert(s->linesize >= 32); //FIXME
2529     }
2530
2531     encode_mb(s, motion_x, motion_y);
2532
2533     score= put_bits_count(&s->pb);
2534     if(s->data_partitioning){
2535         score+= put_bits_count(&s->pb2);
2536         score+= put_bits_count(&s->tex_pb);
2537     }
2538
2539     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2540         ff_mpv_decode_mb(s, s->block);
2541
2542         score *= s->lambda2;
2543         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2544     }
2545
2546     if(*next_block){
2547         memcpy(s->dest, dest_backup, sizeof(s->dest));
2548     }
2549
2550     if(score<*dmin){
2551         *dmin= score;
2552         *next_block^=1;
2553
2554         copy_context_after_encode(best, s, type);
2555     }
2556 }
2557
2558 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2559     uint32_t *sq = ff_square_tab + 256;
2560     int acc=0;
2561     int x,y;
2562
2563     if(w==16 && h==16)
2564         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2565     else if(w==8 && h==8)
2566         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2567
2568     for(y=0; y<h; y++){
2569         for(x=0; x<w; x++){
2570             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2571         }
2572     }
2573
2574     assert(acc>=0);
2575
2576     return acc;
2577 }
2578
2579 static int sse_mb(MpegEncContext *s){
2580     int w= 16;
2581     int h= 16;
2582
2583     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2584     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2585
2586     if(w==16 && h==16)
2587       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2588         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2589                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2590                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2591       }else{
2592         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2593                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2594                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2595       }
2596     else
2597         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2598                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2599                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2600 }
2601
2602 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2603     MpegEncContext *s= *(void**)arg;
2604
2605
2606     s->me.pre_pass=1;
2607     s->me.dia_size= s->avctx->pre_dia_size;
2608     s->first_slice_line=1;
2609     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2610         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2611             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2612         }
2613         s->first_slice_line=0;
2614     }
2615
2616     s->me.pre_pass=0;
2617
2618     return 0;
2619 }
2620
2621 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2622     MpegEncContext *s= *(void**)arg;
2623
2624     s->me.dia_size= s->avctx->dia_size;
2625     s->first_slice_line=1;
2626     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2627         s->mb_x=0; //for block init below
2628         ff_init_block_index(s);
2629         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2630             s->block_index[0]+=2;
2631             s->block_index[1]+=2;
2632             s->block_index[2]+=2;
2633             s->block_index[3]+=2;
2634
2635             /* compute motion vector & mb_type and store in context */
2636             if(s->pict_type==AV_PICTURE_TYPE_B)
2637                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2638             else
2639                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2640         }
2641         s->first_slice_line=0;
2642     }
2643     return 0;
2644 }
2645
2646 static int mb_var_thread(AVCodecContext *c, void *arg){
2647     MpegEncContext *s= *(void**)arg;
2648     int mb_x, mb_y;
2649
2650     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2651         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2652             int xx = mb_x * 16;
2653             int yy = mb_y * 16;
2654             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2655             int varc;
2656             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2657
2658             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2659                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2660
2661             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2662             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2663             s->me.mb_var_sum_temp    += varc;
2664         }
2665     }
2666     return 0;
2667 }
2668
2669 static void write_slice_end(MpegEncContext *s){
2670     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2671         if(s->partitioned_frame){
2672             ff_mpeg4_merge_partitions(s);
2673         }
2674
2675         ff_mpeg4_stuffing(&s->pb);
2676     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2677         ff_mjpeg_encode_stuffing(&s->pb);
2678     }
2679
2680     avpriv_align_put_bits(&s->pb);
2681     flush_put_bits(&s->pb);
2682
2683     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2684         s->misc_bits+= get_bits_diff(s);
2685 }
2686
2687 static void write_mb_info(MpegEncContext *s)
2688 {
2689     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2690     int offset = put_bits_count(&s->pb);
2691     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2692     int gobn = s->mb_y / s->gob_index;
2693     int pred_x, pred_y;
2694     if (CONFIG_H263_ENCODER)
2695         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2696     bytestream_put_le32(&ptr, offset);
2697     bytestream_put_byte(&ptr, s->qscale);
2698     bytestream_put_byte(&ptr, gobn);
2699     bytestream_put_le16(&ptr, mba);
2700     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2701     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2702     /* 4MV not implemented */
2703     bytestream_put_byte(&ptr, 0); /* hmv2 */
2704     bytestream_put_byte(&ptr, 0); /* vmv2 */
2705 }
2706
2707 static void update_mb_info(MpegEncContext *s, int startcode)
2708 {
2709     if (!s->mb_info)
2710         return;
2711     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2712         s->mb_info_size += 12;
2713         s->prev_mb_info = s->last_mb_info;
2714     }
2715     if (startcode) {
2716         s->prev_mb_info = put_bits_count(&s->pb)/8;
2717         /* This might have incremented mb_info_size above, and we return without
2718          * actually writing any info into that slot yet. But in that case,
2719          * this will be called again at the start of the after writing the
2720          * start code, actually writing the mb info. */
2721         return;
2722     }
2723
2724     s->last_mb_info = put_bits_count(&s->pb)/8;
2725     if (!s->mb_info_size)
2726         s->mb_info_size += 12;
2727     write_mb_info(s);
2728 }
2729
2730 static int encode_thread(AVCodecContext *c, void *arg){
2731     MpegEncContext *s= *(void**)arg;
2732     int mb_x, mb_y;
2733     int chr_h= 16>>s->chroma_y_shift;
2734     int i, j;
2735     MpegEncContext best_s = { 0 }, backup_s;
2736     uint8_t bit_buf[2][MAX_MB_BYTES];
2737     uint8_t bit_buf2[2][MAX_MB_BYTES];
2738     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2739     PutBitContext pb[2], pb2[2], tex_pb[2];
2740
2741     for(i=0; i<2; i++){
2742         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2743         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2744         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2745     }
2746
2747     s->last_bits= put_bits_count(&s->pb);
2748     s->mv_bits=0;
2749     s->misc_bits=0;
2750     s->i_tex_bits=0;
2751     s->p_tex_bits=0;
2752     s->i_count=0;
2753     s->f_count=0;
2754     s->b_count=0;
2755     s->skip_count=0;
2756
2757     for(i=0; i<3; i++){
2758         /* init last dc values */
2759         /* note: quant matrix value (8) is implied here */
2760         s->last_dc[i] = 128 << s->intra_dc_precision;
2761
2762         s->current_picture.encoding_error[i] = 0;
2763     }
2764     s->mb_skip_run = 0;
2765     memset(s->last_mv, 0, sizeof(s->last_mv));
2766
2767     s->last_mv_dir = 0;
2768
2769     switch(s->codec_id){
2770     case AV_CODEC_ID_H263:
2771     case AV_CODEC_ID_H263P:
2772     case AV_CODEC_ID_FLV1:
2773         if (CONFIG_H263_ENCODER)
2774             s->gob_index = H263_GOB_HEIGHT(s->height);
2775         break;
2776     case AV_CODEC_ID_MPEG4:
2777         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2778             ff_mpeg4_init_partitions(s);
2779         break;
2780     }
2781
2782     s->resync_mb_x=0;
2783     s->resync_mb_y=0;
2784     s->first_slice_line = 1;
2785     s->ptr_lastgob = s->pb.buf;
2786     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2787         s->mb_x=0;
2788         s->mb_y= mb_y;
2789
2790         ff_set_qscale(s, s->qscale);
2791         ff_init_block_index(s);
2792
2793         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2794             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2795             int mb_type= s->mb_type[xy];
2796 //            int d;
2797             int dmin= INT_MAX;
2798             int dir;
2799
2800             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2801                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2802                 return -1;
2803             }
2804             if(s->data_partitioning){
2805                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2806                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2807                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2808                     return -1;
2809                 }
2810             }
2811
2812             s->mb_x = mb_x;
2813             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2814             ff_update_block_index(s);
2815
2816             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2817                 ff_h261_reorder_mb_index(s);
2818                 xy= s->mb_y*s->mb_stride + s->mb_x;
2819                 mb_type= s->mb_type[xy];
2820             }
2821
2822             /* write gob / video packet header  */
2823             if(s->rtp_mode){
2824                 int current_packet_size, is_gob_start;
2825
2826                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2827
2828                 is_gob_start = s->rtp_payload_size &&
2829                                current_packet_size >= s->rtp_payload_size &&
2830                                mb_y + mb_x > 0;
2831
2832                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2833
2834                 switch(s->codec_id){
2835                 case AV_CODEC_ID_H263:
2836                 case AV_CODEC_ID_H263P:
2837                     if(!s->h263_slice_structured)
2838                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2839                     break;
2840                 case AV_CODEC_ID_MPEG2VIDEO:
2841                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2842                 case AV_CODEC_ID_MPEG1VIDEO:
2843                     if(s->mb_skip_run) is_gob_start=0;
2844                     break;
2845                 }
2846
2847                 if(is_gob_start){
2848                     if(s->start_mb_y != mb_y || mb_x!=0){
2849                         write_slice_end(s);
2850
2851                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2852                             ff_mpeg4_init_partitions(s);
2853                         }
2854                     }
2855
2856                     assert((put_bits_count(&s->pb)&7) == 0);
2857                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2858
2859                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2860                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2861                         int d = 100 / s->error_rate;
2862                         if(r % d == 0){
2863                             current_packet_size=0;
2864                             s->pb.buf_ptr= s->ptr_lastgob;
2865                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2866                         }
2867                     }
2868
2869 #if FF_API_RTP_CALLBACK
2870 FF_DISABLE_DEPRECATION_WARNINGS
2871                     if (s->avctx->rtp_callback){
2872                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2873                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2874                     }
2875 FF_ENABLE_DEPRECATION_WARNINGS
2876 #endif
2877                     update_mb_info(s, 1);
2878
2879                     switch(s->codec_id){
2880                     case AV_CODEC_ID_MPEG4:
2881                         if (CONFIG_MPEG4_ENCODER) {
2882                             ff_mpeg4_encode_video_packet_header(s);
2883                             ff_mpeg4_clean_buffers(s);
2884                         }
2885                     break;
2886                     case AV_CODEC_ID_MPEG1VIDEO:
2887                     case AV_CODEC_ID_MPEG2VIDEO:
2888                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2889                             ff_mpeg1_encode_slice_header(s);
2890                             ff_mpeg1_clean_buffers(s);
2891                         }
2892                     break;
2893                     case AV_CODEC_ID_H263:
2894                     case AV_CODEC_ID_H263P:
2895                         if (CONFIG_H263_ENCODER)
2896                             ff_h263_encode_gob_header(s, mb_y);
2897                     break;
2898                     }
2899
2900                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2901                         int bits= put_bits_count(&s->pb);
2902                         s->misc_bits+= bits - s->last_bits;
2903                         s->last_bits= bits;
2904                     }
2905
2906                     s->ptr_lastgob += current_packet_size;
2907                     s->first_slice_line=1;
2908                     s->resync_mb_x=mb_x;
2909                     s->resync_mb_y=mb_y;
2910                 }
2911             }
2912
2913             if(  (s->resync_mb_x   == s->mb_x)
2914                && s->resync_mb_y+1 == s->mb_y){
2915                 s->first_slice_line=0;
2916             }
2917
2918             s->mb_skipped=0;
2919             s->dquant=0; //only for QP_RD
2920
2921             update_mb_info(s, 0);
2922
2923             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2924                 int next_block=0;
2925                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2926
2927                 copy_context_before_encode(&backup_s, s, -1);
2928                 backup_s.pb= s->pb;
2929                 best_s.data_partitioning= s->data_partitioning;
2930                 best_s.partitioned_frame= s->partitioned_frame;
2931                 if(s->data_partitioning){
2932                     backup_s.pb2= s->pb2;
2933                     backup_s.tex_pb= s->tex_pb;
2934                 }
2935
2936                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2937                     s->mv_dir = MV_DIR_FORWARD;
2938                     s->mv_type = MV_TYPE_16X16;
2939                     s->mb_intra= 0;
2940                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2941                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2942                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2943                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2944                 }
2945                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2946                     s->mv_dir = MV_DIR_FORWARD;
2947                     s->mv_type = MV_TYPE_FIELD;
2948                     s->mb_intra= 0;
2949                     for(i=0; i<2; i++){
2950                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2951                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2952                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2953                     }
2954                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2955                                  &dmin, &next_block, 0, 0);
2956                 }
2957                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2958                     s->mv_dir = MV_DIR_FORWARD;
2959                     s->mv_type = MV_TYPE_16X16;
2960                     s->mb_intra= 0;
2961                     s->mv[0][0][0] = 0;
2962                     s->mv[0][0][1] = 0;
2963                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2964                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2965                 }
2966                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2967                     s->mv_dir = MV_DIR_FORWARD;
2968                     s->mv_type = MV_TYPE_8X8;
2969                     s->mb_intra= 0;
2970                     for(i=0; i<4; i++){
2971                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2972                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2973                     }
2974                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2975                                  &dmin, &next_block, 0, 0);
2976                 }
2977                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2978                     s->mv_dir = MV_DIR_FORWARD;
2979                     s->mv_type = MV_TYPE_16X16;
2980                     s->mb_intra= 0;
2981                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2982                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2983                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2984                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2985                 }
2986                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2987                     s->mv_dir = MV_DIR_BACKWARD;
2988                     s->mv_type = MV_TYPE_16X16;
2989                     s->mb_intra= 0;
2990                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2991                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2992                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2993                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2994                 }
2995                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2996                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2997                     s->mv_type = MV_TYPE_16X16;
2998                     s->mb_intra= 0;
2999                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3000                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3001                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3002                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3003                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3004                                  &dmin, &next_block, 0, 0);
3005                 }
3006                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3007                     s->mv_dir = MV_DIR_FORWARD;
3008                     s->mv_type = MV_TYPE_FIELD;
3009                     s->mb_intra= 0;
3010                     for(i=0; i<2; i++){
3011                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3012                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3013                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3014                     }
3015                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3016                                  &dmin, &next_block, 0, 0);
3017                 }
3018                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3019                     s->mv_dir = MV_DIR_BACKWARD;
3020                     s->mv_type = MV_TYPE_FIELD;
3021                     s->mb_intra= 0;
3022                     for(i=0; i<2; i++){
3023                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3024                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3025                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3026                     }
3027                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3028                                  &dmin, &next_block, 0, 0);
3029                 }
3030                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3031                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3032                     s->mv_type = MV_TYPE_FIELD;
3033                     s->mb_intra= 0;
3034                     for(dir=0; dir<2; dir++){
3035                         for(i=0; i<2; i++){
3036                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3037                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3038                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3039                         }
3040                     }
3041                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3042                                  &dmin, &next_block, 0, 0);
3043                 }
3044                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3045                     s->mv_dir = 0;
3046                     s->mv_type = MV_TYPE_16X16;
3047                     s->mb_intra= 1;
3048                     s->mv[0][0][0] = 0;
3049                     s->mv[0][0][1] = 0;
3050                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3051                                  &dmin, &next_block, 0, 0);
3052                     if(s->h263_pred || s->h263_aic){
3053                         if(best_s.mb_intra)
3054                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3055                         else
3056                             ff_clean_intra_table_entries(s); //old mode?
3057                     }
3058                 }
3059
3060                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3061                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3062                         const int last_qp= backup_s.qscale;
3063                         int qpi, qp, dc[6];
3064                         int16_t ac[6][16];
3065                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3066                         static const int dquant_tab[4]={-1,1,-2,2};
3067
3068                         assert(backup_s.dquant == 0);
3069
3070                         //FIXME intra
3071                         s->mv_dir= best_s.mv_dir;
3072                         s->mv_type = MV_TYPE_16X16;
3073                         s->mb_intra= best_s.mb_intra;
3074                         s->mv[0][0][0] = best_s.mv[0][0][0];
3075                         s->mv[0][0][1] = best_s.mv[0][0][1];
3076                         s->mv[1][0][0] = best_s.mv[1][0][0];
3077                         s->mv[1][0][1] = best_s.mv[1][0][1];
3078
3079                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3080                         for(; qpi<4; qpi++){
3081                             int dquant= dquant_tab[qpi];
3082                             qp= last_qp + dquant;
3083                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3084                                 continue;
3085                             backup_s.dquant= dquant;
3086                             if(s->mb_intra && s->dc_val[0]){
3087                                 for(i=0; i<6; i++){
3088                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3089                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3090                                 }
3091                             }
3092
3093                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3094                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3095                             if(best_s.qscale != qp){
3096                                 if(s->mb_intra && s->dc_val[0]){
3097                                     for(i=0; i<6; i++){
3098                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3099                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3100                                     }
3101                                 }
3102                             }
3103                         }
3104                     }
3105                 }
3106                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3107                     int mx= s->b_direct_mv_table[xy][0];
3108                     int my= s->b_direct_mv_table[xy][1];
3109
3110                     backup_s.dquant = 0;
3111                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3112                     s->mb_intra= 0;
3113                     ff_mpeg4_set_direct_mv(s, mx, my);
3114                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3115                                  &dmin, &next_block, mx, my);
3116                 }
3117                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3118                     backup_s.dquant = 0;
3119                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3120                     s->mb_intra= 0;
3121                     ff_mpeg4_set_direct_mv(s, 0, 0);
3122                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3123                                  &dmin, &next_block, 0, 0);
3124                 }
3125                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3126                     int coded=0;
3127                     for(i=0; i<6; i++)
3128                         coded |= s->block_last_index[i];
3129                     if(coded){
3130                         int mx,my;
3131                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3132                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3133                             mx=my=0; //FIXME find the one we actually used
3134                             ff_mpeg4_set_direct_mv(s, mx, my);
3135                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3136                             mx= s->mv[1][0][0];
3137                             my= s->mv[1][0][1];
3138                         }else{
3139                             mx= s->mv[0][0][0];
3140                             my= s->mv[0][0][1];
3141                         }
3142
3143                         s->mv_dir= best_s.mv_dir;
3144                         s->mv_type = best_s.mv_type;
3145                         s->mb_intra= 0;
3146 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3147                         s->mv[0][0][1] = best_s.mv[0][0][1];
3148                         s->mv[1][0][0] = best_s.mv[1][0][0];
3149                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3150                         backup_s.dquant= 0;
3151                         s->skipdct=1;
3152                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3153                                         &dmin, &next_block, mx, my);
3154                         s->skipdct=0;
3155                     }
3156                 }
3157
3158                 s->current_picture.qscale_table[xy] = best_s.qscale;
3159
3160                 copy_context_after_encode(s, &best_s, -1);
3161
3162                 pb_bits_count= put_bits_count(&s->pb);
3163                 flush_put_bits(&s->pb);
3164                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3165                 s->pb= backup_s.pb;
3166
3167                 if(s->data_partitioning){
3168                     pb2_bits_count= put_bits_count(&s->pb2);
3169                     flush_put_bits(&s->pb2);
3170                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3171                     s->pb2= backup_s.pb2;
3172
3173                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3174                     flush_put_bits(&s->tex_pb);
3175                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3176                     s->tex_pb= backup_s.tex_pb;
3177                 }
3178                 s->last_bits= put_bits_count(&s->pb);
3179
3180                 if (CONFIG_H263_ENCODER &&
3181                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3182                     ff_h263_update_motion_val(s);
3183
3184                 if(next_block==0){ //FIXME 16 vs linesize16
3185                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3186                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3187                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3188                 }
3189
3190                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3191                     ff_mpv_decode_mb(s, s->block);
3192             } else {
3193                 int motion_x = 0, motion_y = 0;
3194                 s->mv_type=MV_TYPE_16X16;
3195                 // only one MB-Type possible
3196
3197                 switch(mb_type){
3198                 case CANDIDATE_MB_TYPE_INTRA:
3199                     s->mv_dir = 0;
3200                     s->mb_intra= 1;
3201                     motion_x= s->mv[0][0][0] = 0;
3202                     motion_y= s->mv[0][0][1] = 0;
3203                     break;
3204                 case CANDIDATE_MB_TYPE_INTER:
3205                     s->mv_dir = MV_DIR_FORWARD;
3206                     s->mb_intra= 0;
3207                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3208                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3209                     break;
3210                 case CANDIDATE_MB_TYPE_INTER_I:
3211                     s->mv_dir = MV_DIR_FORWARD;
3212                     s->mv_type = MV_TYPE_FIELD;
3213                     s->mb_intra= 0;
3214                     for(i=0; i<2; i++){
3215                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3216                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3217                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3218                     }
3219                     break;
3220                 case CANDIDATE_MB_TYPE_INTER4V:
3221                     s->mv_dir = MV_DIR_FORWARD;
3222                     s->mv_type = MV_TYPE_8X8;
3223                     s->mb_intra= 0;
3224                     for(i=0; i<4; i++){
3225                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3226                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3227                     }
3228                     break;
3229                 case CANDIDATE_MB_TYPE_DIRECT:
3230                     if (CONFIG_MPEG4_ENCODER) {
3231                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3232                         s->mb_intra= 0;
3233                         motion_x=s->b_direct_mv_table[xy][0];
3234                         motion_y=s->b_direct_mv_table[xy][1];
3235                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3236                     }
3237                     break;
3238                 case CANDIDATE_MB_TYPE_DIRECT0:
3239                     if (CONFIG_MPEG4_ENCODER) {
3240                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3241                         s->mb_intra= 0;
3242                         ff_mpeg4_set_direct_mv(s, 0, 0);
3243                     }
3244                     break;
3245                 case CANDIDATE_MB_TYPE_BIDIR:
3246                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3247                     s->mb_intra= 0;
3248                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3249                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3250                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3251                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3252                     break;
3253                 case CANDIDATE_MB_TYPE_BACKWARD:
3254                     s->mv_dir = MV_DIR_BACKWARD;
3255                     s->mb_intra= 0;
3256                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3257                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3258                     break;
3259                 case CANDIDATE_MB_TYPE_FORWARD:
3260                     s->mv_dir = MV_DIR_FORWARD;
3261                     s->mb_intra= 0;
3262                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3263                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3264                     break;
3265                 case CANDIDATE_MB_TYPE_FORWARD_I:
3266                     s->mv_dir = MV_DIR_FORWARD;
3267                     s->mv_type = MV_TYPE_FIELD;
3268                     s->mb_intra= 0;
3269                     for(i=0; i<2; i++){
3270                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3271                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3272                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3273                     }
3274                     break;
3275                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3276                     s->mv_dir = MV_DIR_BACKWARD;
3277                     s->mv_type = MV_TYPE_FIELD;
3278                     s->mb_intra= 0;
3279                     for(i=0; i<2; i++){
3280                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3281                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3282                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3283                     }
3284                     break;
3285                 case CANDIDATE_MB_TYPE_BIDIR_I:
3286                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3287                     s->mv_type = MV_TYPE_FIELD;
3288                     s->mb_intra= 0;
3289                     for(dir=0; dir<2; dir++){
3290                         for(i=0; i<2; i++){
3291                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3292                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3293                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3294                         }
3295                     }
3296                     break;
3297                 default:
3298                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3299                 }
3300
3301                 encode_mb(s, motion_x, motion_y);
3302
3303                 // RAL: Update last macroblock type
3304                 s->last_mv_dir = s->mv_dir;
3305
3306                 if (CONFIG_H263_ENCODER &&
3307                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3308                     ff_h263_update_motion_val(s);
3309
3310                 ff_mpv_decode_mb(s, s->block);
3311             }
3312
3313             /* clean the MV table in IPS frames for direct mode in B-frames */
3314             if(s->mb_intra /* && I,P,S_TYPE */){
3315                 s->p_mv_table[xy][0]=0;
3316                 s->p_mv_table[xy][1]=0;
3317             }
3318
3319             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3320                 int w= 16;
3321                 int h= 16;
3322
3323                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3324                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3325
3326                 s->current_picture.encoding_error[0] += sse(
3327                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3328                     s->dest[0], w, h, s->linesize);
3329                 s->current_picture.encoding_error[1] += sse(
3330                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3331                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3332                 s->current_picture.encoding_error[2] += sse(
3333                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3334                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3335             }
3336             if(s->loop_filter){
3337                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3338                     ff_h263_loop_filter(s);
3339             }
3340             ff_dlog(s->avctx, "MB %d %d bits\n",
3341                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3342         }
3343     }
3344
3345     //not beautiful here but we must write it before flushing so it has to be here
3346     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3347         ff_msmpeg4_encode_ext_header(s);
3348
3349     write_slice_end(s);
3350
3351 #if FF_API_RTP_CALLBACK
3352 FF_DISABLE_DEPRECATION_WARNINGS
3353     /* Send the last GOB if RTP */
3354     if (s->avctx->rtp_callback) {
3355         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3356         int pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3357         /* Call the RTP callback to send the last GOB */
3358         emms_c();
3359         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3360     }
3361 FF_ENABLE_DEPRECATION_WARNINGS
3362 #endif
3363
3364     return 0;
3365 }
3366
3367 #define MERGE(field) dst->field += src->field; src->field=0
3368 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3369     MERGE(me.scene_change_score);
3370     MERGE(me.mc_mb_var_sum_temp);
3371     MERGE(me.mb_var_sum_temp);
3372 }
3373
3374 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3375     int i;
3376
3377     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3378     MERGE(dct_count[1]);
3379     MERGE(mv_bits);
3380     MERGE(i_tex_bits);
3381     MERGE(p_tex_bits);
3382     MERGE(i_count);
3383     MERGE(f_count);
3384     MERGE(b_count);
3385     MERGE(skip_count);
3386     MERGE(misc_bits);
3387     MERGE(er.error_count);
3388     MERGE(padding_bug_score);
3389     MERGE(current_picture.encoding_error[0]);
3390     MERGE(current_picture.encoding_error[1]);
3391     MERGE(current_picture.encoding_error[2]);
3392
3393     if (dst->noise_reduction){
3394         for(i=0; i<64; i++){
3395             MERGE(dct_error_sum[0][i]);
3396             MERGE(dct_error_sum[1][i]);
3397         }
3398     }
3399
3400     assert(put_bits_count(&src->pb) % 8 ==0);
3401     assert(put_bits_count(&dst->pb) % 8 ==0);
3402     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3403     flush_put_bits(&dst->pb);
3404 }
3405
3406 static int estimate_qp(MpegEncContext *s, int dry_run){
3407     if (s->next_lambda){
3408         s->current_picture_ptr->f->quality =
3409         s->current_picture.f->quality = s->next_lambda;
3410         if(!dry_run) s->next_lambda= 0;
3411     } else if (!s->fixed_qscale) {
3412         int quality;
3413 #if CONFIG_LIBXVID
3414         if ((s->avctx->flags & AV_CODEC_FLAG_PASS2) && s->rc_strategy == 1)
3415             quality = ff_xvid_rate_estimate_qscale(s, dry_run);
3416         else
3417 #endif
3418         quality = ff_rate_estimate_qscale(s, dry_run);
3419         s->current_picture_ptr->f->quality =
3420         s->current_picture.f->quality = quality;
3421         if (s->current_picture.f->quality < 0)
3422             return -1;
3423     }
3424
3425     if(s->adaptive_quant){
3426         switch(s->codec_id){
3427         case AV_CODEC_ID_MPEG4:
3428             if (CONFIG_MPEG4_ENCODER)
3429                 ff_clean_mpeg4_qscales(s);
3430             break;
3431         case AV_CODEC_ID_H263:
3432         case AV_CODEC_ID_H263P:
3433         case AV_CODEC_ID_FLV1:
3434             if (CONFIG_H263_ENCODER)
3435                 ff_clean_h263_qscales(s);
3436             break;
3437         default:
3438             ff_init_qscale_tab(s);
3439         }
3440
3441         s->lambda= s->lambda_table[0];
3442         //FIXME broken
3443     }else
3444         s->lambda = s->current_picture.f->quality;
3445     update_qscale(s);
3446     return 0;
3447 }
3448
3449 /* must be called before writing the header */
3450 static void set_frame_distances(MpegEncContext * s){
3451     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3452     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3453
3454     if(s->pict_type==AV_PICTURE_TYPE_B){
3455         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3456         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3457     }else{
3458         s->pp_time= s->time - s->last_non_b_time;
3459         s->last_non_b_time= s->time;
3460         assert(s->picture_number==0 || s->pp_time > 0);
3461     }
3462 }
3463
3464 static int encode_picture(MpegEncContext *s, int picture_number)
3465 {
3466     int i, ret;
3467     int bits;
3468     int context_count = s->slice_context_count;
3469
3470     s->picture_number = picture_number;
3471
3472     /* Reset the average MB variance */
3473     s->me.mb_var_sum_temp    =
3474     s->me.mc_mb_var_sum_temp = 0;
3475
3476     /* we need to initialize some time vars before we can encode B-frames */
3477     // RAL: Condition added for MPEG1VIDEO
3478     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3479         set_frame_distances(s);
3480     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3481         ff_set_mpeg4_time(s);
3482
3483     s->me.scene_change_score=0;
3484
3485 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3486
3487     if(s->pict_type==AV_PICTURE_TYPE_I){
3488         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3489         else                        s->no_rounding=0;
3490     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3491         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3492             s->no_rounding ^= 1;
3493     }
3494
3495     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3496         if (estimate_qp(s,1) < 0)
3497             return -1;
3498         ff_get_2pass_fcode(s);
3499     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3500         if(s->pict_type==AV_PICTURE_TYPE_B)
3501             s->lambda= s->last_lambda_for[s->pict_type];
3502         else
3503             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3504         update_qscale(s);
3505     }
3506
3507     s->mb_intra=0; //for the rate distortion & bit compare functions
3508     for(i=1; i<context_count; i++){
3509         ret = ff_update_duplicate_context(s->thread_context[i], s);
3510         if (ret < 0)
3511             return ret;
3512     }
3513
3514     if(ff_init_me(s)<0)
3515         return -1;
3516
3517     /* Estimate motion for every MB */
3518     if(s->pict_type != AV_PICTURE_TYPE_I){
3519         s->lambda  = (s->lambda  * s->me_penalty_compensation + 128) >> 8;
3520         s->lambda2 = (s->lambda2 * (int64_t) s->me_penalty_compensation + 128) >> 8;
3521         if (s->pict_type != AV_PICTURE_TYPE_B) {
3522             if ((s->me_pre && s->last_non_b_pict_type == AV_PICTURE_TYPE_I) ||
3523                 s->me_pre == 2) {
3524                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3525             }
3526         }
3527
3528         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3529     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3530         /* I-Frame */
3531         for(i=0; i<s->mb_stride*s->mb_height; i++)
3532             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3533
3534         if(!s->fixed_qscale){
3535             /* finding spatial complexity for I-frame rate control */
3536             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3537         }
3538     }
3539     for(i=1; i<context_count; i++){
3540         merge_context_after_me(s, s->thread_context[i]);
3541     }
3542     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3543     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3544     emms_c();
3545
3546     if (s->me.scene_change_score > s->scenechange_threshold &&
3547         s->pict_type == AV_PICTURE_TYPE_P) {
3548         s->pict_type= AV_PICTURE_TYPE_I;
3549         for(i=0; i<s->mb_stride*s->mb_height; i++)
3550             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3551         ff_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3552                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3553     }
3554
3555     if(!s->umvplus){
3556         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3557             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3558
3559             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3560                 int a,b;
3561                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3562                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3563                 s->f_code= FFMAX3(s->f_code, a, b);
3564             }
3565
3566             ff_fix_long_p_mvs(s);
3567             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3568             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3569                 int j;
3570                 for(i=0; i<2; i++){
3571                     for(j=0; j<2; j++)
3572                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3573                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3574                 }
3575             }
3576         }
3577
3578         if(s->pict_type==AV_PICTURE_TYPE_B){
3579             int a, b;
3580
3581             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3582             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3583             s->f_code = FFMAX(a, b);
3584
3585             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3586             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3587             s->b_code = FFMAX(a, b);
3588
3589             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3590             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3591             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3592             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3593             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3594                 int dir, j;
3595                 for(dir=0; dir<2; dir++){
3596                     for(i=0; i<2; i++){
3597                         for(j=0; j<2; j++){
3598                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3599                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3600                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3601                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3602                         }
3603                     }
3604                 }
3605             }
3606         }
3607     }
3608
3609     if (estimate_qp(s, 0) < 0)
3610         return -1;
3611
3612     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3613         s->pict_type == AV_PICTURE_TYPE_I &&
3614         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3615         s->qscale= 3; //reduce clipping problems
3616
3617     if (s->out_format == FMT_MJPEG) {
3618         /* for mjpeg, we do include qscale in the matrix */
3619         for(i=1;i<64;i++){
3620             int j = s->idsp.idct_permutation[i];
3621
3622             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3623         }
3624         s->y_dc_scale_table=
3625         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3626         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3627         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3628                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3629         s->qscale= 8;
3630     }
3631
3632     //FIXME var duplication
3633     s->current_picture_ptr->f->key_frame =
3634     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3635     s->current_picture_ptr->f->pict_type =
3636     s->current_picture.f->pict_type = s->pict_type;
3637
3638     if (s->current_picture.f->key_frame)
3639         s->picture_in_gop_number=0;
3640
3641     s->last_bits= put_bits_count(&s->pb);
3642     switch(s->out_format) {
3643     case FMT_MJPEG:
3644         if (CONFIG_MJPEG_ENCODER)
3645             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3646                                            s->pred, s->intra_matrix);
3647         break;
3648     case FMT_H261:
3649         if (CONFIG_H261_ENCODER)
3650             ff_h261_encode_picture_header(s, picture_number);
3651         break;
3652     case FMT_H263:
3653         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3654             ff_wmv2_encode_picture_header(s, picture_number);
3655         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3656             ff_msmpeg4_encode_picture_header(s, picture_number);
3657         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3658             ff_mpeg4_encode_picture_header(s, picture_number);
3659         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3660             ret = ff_rv10_encode_picture_header(s, picture_number);
3661             if (ret < 0)
3662                 return ret;
3663         }
3664         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3665             ff_rv20_encode_picture_header(s, picture_number);
3666         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3667             ff_flv_encode_picture_header(s, picture_number);
3668         else if (CONFIG_H263_ENCODER)
3669             ff_h263_encode_picture_header(s, picture_number);
3670         break;
3671     case FMT_MPEG1:
3672         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3673             ff_mpeg1_encode_picture_header(s, picture_number);
3674         break;
3675     default:
3676         assert(0);
3677     }
3678     bits= put_bits_count(&s->pb);
3679     s->header_bits= bits - s->last_bits;
3680
3681     for(i=1; i<context_count; i++){
3682         update_duplicate_context_after_me(s->thread_context[i], s);
3683     }
3684     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3685     for(i=1; i<context_count; i++){
3686         merge_context_after_encode(s, s->thread_context[i]);
3687     }
3688     emms_c();
3689     return 0;
3690 }
3691
3692 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3693     const int intra= s->mb_intra;
3694     int i;
3695
3696     s->dct_count[intra]++;
3697
3698     for(i=0; i<64; i++){
3699         int level= block[i];
3700
3701         if(level){
3702             if(level>0){
3703                 s->dct_error_sum[intra][i] += level;
3704                 level -= s->dct_offset[intra][i];
3705                 if(level<0) level=0;
3706             }else{
3707                 s->dct_error_sum[intra][i] -= level;
3708                 level += s->dct_offset[intra][i];
3709                 if(level>0) level=0;
3710             }
3711             block[i]= level;
3712         }
3713     }
3714 }
3715
3716 static int dct_quantize_trellis_c(MpegEncContext *s,
3717                                   int16_t *block, int n,
3718                                   int qscale, int *overflow){
3719     const int *qmat;
3720     const uint8_t *scantable= s->intra_scantable.scantable;
3721     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3722     int max=0;
3723     unsigned int threshold1, threshold2;
3724     int bias=0;
3725     int run_tab[65];
3726     int level_tab[65];
3727     int score_tab[65];
3728     int survivor[65];
3729     int survivor_count;
3730     int last_run=0;
3731     int last_level=0;
3732     int last_score= 0;
3733     int last_i;
3734     int coeff[2][64];
3735     int coeff_count[64];
3736     int qmul, qadd, start_i, last_non_zero, i, dc;
3737     const int esc_length= s->ac_esc_length;
3738     uint8_t * length;
3739     uint8_t * last_length;
3740     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3741
3742     s->fdsp.fdct(block);
3743
3744     if(s->dct_error_sum)
3745         s->denoise_dct(s, block);
3746     qmul= qscale*16;
3747     qadd= ((qscale-1)|1)*8;
3748
3749     if (s->mb_intra) {
3750         int q;
3751         if (!s->h263_aic) {
3752             if (n < 4)
3753                 q = s->y_dc_scale;
3754             else
3755                 q = s->c_dc_scale;
3756             q = q << 3;
3757         } else{
3758             /* For AIC we skip quant/dequant of INTRADC */
3759             q = 1 << 3;
3760             qadd=0;
3761         }
3762
3763         /* note: block[0] is assumed to be positive */
3764         block[0] = (block[0] + (q >> 1)) / q;
3765         start_i = 1;
3766         last_non_zero = 0;
3767         qmat = s->q_intra_matrix[qscale];
3768         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3769             bias= 1<<(QMAT_SHIFT-1);
3770         length     = s->intra_ac_vlc_length;
3771         last_length= s->intra_ac_vlc_last_length;
3772     } else {
3773         start_i = 0;
3774         last_non_zero = -1;
3775         qmat = s->q_inter_matrix[qscale];
3776         length     = s->inter_ac_vlc_length;
3777         last_length= s->inter_ac_vlc_last_length;
3778     }
3779     last_i= start_i;
3780
3781     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3782     threshold2= (threshold1<<1);
3783
3784     for(i=63; i>=start_i; i--) {
3785         const int j = scantable[i];
3786         int level = block[j] * qmat[j];
3787
3788         if(((unsigned)(level+threshold1))>threshold2){
3789             last_non_zero = i;
3790             break;
3791         }
3792     }
3793
3794     for(i=start_i; i<=last_non_zero; i++) {
3795         const int j = scantable[i];
3796         int level = block[j] * qmat[j];
3797
3798 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3799 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3800         if(((unsigned)(level+threshold1))>threshold2){
3801             if(level>0){
3802                 level= (bias + level)>>QMAT_SHIFT;
3803                 coeff[0][i]= level;
3804                 coeff[1][i]= level-1;
3805 //                coeff[2][k]= level-2;
3806             }else{
3807                 level= (bias - level)>>QMAT_SHIFT;
3808                 coeff[0][i]= -level;
3809                 coeff[1][i]= -level+1;
3810 //                coeff[2][k]= -level+2;
3811             }
3812             coeff_count[i]= FFMIN(level, 2);
3813             assert(coeff_count[i]);
3814             max |=level;
3815         }else{
3816             coeff[0][i]= (level>>31)|1;
3817             coeff_count[i]= 1;
3818         }
3819     }
3820
3821     *overflow= s->max_qcoeff < max; //overflow might have happened
3822
3823     if(last_non_zero < start_i){
3824         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3825         return last_non_zero;
3826     }
3827
3828     score_tab[start_i]= 0;
3829     survivor[0]= start_i;
3830     survivor_count= 1;
3831
3832     for(i=start_i; i<=last_non_zero; i++){
3833         int level_index, j, zero_distortion;
3834         int dct_coeff= FFABS(block[ scantable[i] ]);
3835         int best_score=256*256*256*120;
3836
3837         if (s->fdsp.fdct == ff_fdct_ifast)
3838             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3839         zero_distortion= dct_coeff*dct_coeff;
3840
3841         for(level_index=0; level_index < coeff_count[i]; level_index++){
3842             int distortion;
3843             int level= coeff[level_index][i];
3844             const int alevel= FFABS(level);
3845             int unquant_coeff;
3846
3847             assert(level);
3848
3849             if(s->out_format == FMT_H263){
3850                 unquant_coeff= alevel*qmul + qadd;
3851             } else { // MPEG-1
3852                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3853                 if(s->mb_intra){
3854                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3855                         unquant_coeff =   (unquant_coeff - 1) | 1;
3856                 }else{
3857                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3858                         unquant_coeff =   (unquant_coeff - 1) | 1;
3859                 }
3860                 unquant_coeff<<= 3;
3861             }
3862
3863             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3864             level+=64;
3865             if((level&(~127)) == 0){
3866                 for(j=survivor_count-1; j>=0; j--){
3867                     int run= i - survivor[j];
3868                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3869                     score += score_tab[i-run];
3870
3871                     if(score < best_score){
3872                         best_score= score;
3873                         run_tab[i+1]= run;
3874                         level_tab[i+1]= level-64;
3875                     }
3876                 }
3877
3878                 if(s->out_format == FMT_H263){
3879                     for(j=survivor_count-1; j>=0; j--){
3880                         int run= i - survivor[j];
3881                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3882                         score += score_tab[i-run];
3883                         if(score < last_score){
3884                             last_score= score;
3885                             last_run= run;
3886                             last_level= level-64;
3887                             last_i= i+1;
3888                         }
3889                     }
3890                 }
3891             }else{
3892                 distortion += esc_length*lambda;
3893                 for(j=survivor_count-1; j>=0; j--){
3894                     int run= i - survivor[j];
3895                     int score= distortion + score_tab[i-run];
3896
3897                     if(score < best_score){
3898                         best_score= score;
3899                         run_tab[i+1]= run;
3900                         level_tab[i+1]= level-64;
3901                     }
3902                 }
3903
3904                 if(s->out_format == FMT_H263){
3905                   for(j=survivor_count-1; j>=0; j--){
3906                         int run= i - survivor[j];
3907                         int score= distortion + score_tab[i-run];
3908                         if(score < last_score){
3909                             last_score= score;
3910                             last_run= run;
3911                             last_level= level-64;
3912                             last_i= i+1;
3913                         }
3914                     }
3915                 }
3916             }
3917         }
3918
3919         score_tab[i+1]= best_score;
3920
3921         // Note: there is a vlc code in MPEG-4 which is 1 bit shorter then another one with a shorter run and the same level
3922         if(last_non_zero <= 27){
3923             for(; survivor_count; survivor_count--){
3924                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3925                     break;
3926             }
3927         }else{
3928             for(; survivor_count; survivor_count--){
3929                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3930                     break;
3931             }
3932         }
3933
3934         survivor[ survivor_count++ ]= i+1;
3935     }
3936
3937     if(s->out_format != FMT_H263){
3938         last_score= 256*256*256*120;
3939         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3940             int score= score_tab[i];
3941             if (i)
3942                 score += lambda * 2; // FIXME more exact?
3943
3944             if(score < last_score){
3945                 last_score= score;
3946                 last_i= i;
3947                 last_level= level_tab[i];
3948                 last_run= run_tab[i];
3949             }
3950         }
3951     }
3952
3953     s->coded_score[n] = last_score;
3954
3955     dc= FFABS(block[0]);
3956     last_non_zero= last_i - 1;
3957     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3958
3959     if(last_non_zero < start_i)
3960         return last_non_zero;
3961
3962     if(last_non_zero == 0 && start_i == 0){
3963         int best_level= 0;
3964         int best_score= dc * dc;
3965
3966         for(i=0; i<coeff_count[0]; i++){
3967             int level= coeff[i][0];
3968             int alevel= FFABS(level);
3969             int unquant_coeff, score, distortion;
3970
3971             if(s->out_format == FMT_H263){
3972                     unquant_coeff= (alevel*qmul + qadd)>>3;
3973             } else { // MPEG-1
3974                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3975                     unquant_coeff =   (unquant_coeff - 1) | 1;
3976             }
3977             unquant_coeff = (unquant_coeff + 4) >> 3;
3978             unquant_coeff<<= 3 + 3;
3979
3980             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3981             level+=64;
3982             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3983             else                    score= distortion + esc_length*lambda;
3984
3985             if(score < best_score){
3986                 best_score= score;
3987                 best_level= level - 64;
3988             }
3989         }
3990         block[0]= best_level;
3991         s->coded_score[n] = best_score - dc*dc;
3992         if(best_level == 0) return -1;
3993         else                return last_non_zero;
3994     }
3995
3996     i= last_i;
3997     assert(last_level);
3998
3999     block[ perm_scantable[last_non_zero] ]= last_level;
4000     i -= last_run + 1;
4001
4002     for(; i>start_i; i -= run_tab[i] + 1){
4003         block[ perm_scantable[i-1] ]= level_tab[i];
4004     }
4005
4006     return last_non_zero;
4007 }
4008
4009 //#define REFINE_STATS 1
4010 static int16_t basis[64][64];
4011
4012 static void build_basis(uint8_t *perm){
4013     int i, j, x, y;
4014     emms_c();
4015     for(i=0; i<8; i++){
4016         for(j=0; j<8; j++){
4017             for(y=0; y<8; y++){
4018                 for(x=0; x<8; x++){
4019                     double s= 0.25*(1<<BASIS_SHIFT);
4020                     int index= 8*i + j;
4021                     int perm_index= perm[index];
4022                     if(i==0) s*= sqrt(0.5);
4023                     if(j==0) s*= sqrt(0.5);
4024                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4025                 }
4026             }
4027         }
4028     }
4029 }
4030
4031 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4032                         int16_t *block, int16_t *weight, int16_t *orig,
4033                         int n, int qscale){
4034     int16_t rem[64];
4035     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4036     const uint8_t *scantable= s->intra_scantable.scantable;
4037     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4038 //    unsigned int threshold1, threshold2;
4039 //    int bias=0;
4040     int run_tab[65];
4041     int prev_run=0;
4042     int prev_level=0;
4043     int qmul, qadd, start_i, last_non_zero, i, dc;
4044     uint8_t * length;
4045     uint8_t * last_length;
4046     int lambda;
4047     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4048 #ifdef REFINE_STATS
4049 static int count=0;
4050 static int after_last=0;
4051 static int to_zero=0;
4052 static int from_zero=0;
4053 static int raise=0;
4054 static int lower=0;
4055 static int messed_sign=0;
4056 #endif
4057
4058     if(basis[0][0] == 0)
4059         build_basis(s->idsp.idct_permutation);
4060
4061     qmul= qscale*2;
4062     qadd= (qscale-1)|1;
4063     if (s->mb_intra) {
4064         if (!s->h263_aic) {
4065             if (n < 4)
4066                 q = s->y_dc_scale;
4067             else
4068                 q = s->c_dc_scale;
4069         } else{
4070             /* For AIC we skip quant/dequant of INTRADC */
4071             q = 1;
4072             qadd=0;
4073         }
4074         q <<= RECON_SHIFT-3;
4075         /* note: block[0] is assumed to be positive */
4076         dc= block[0]*q;
4077 //        block[0] = (block[0] + (q >> 1)) / q;
4078         start_i = 1;
4079 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4080 //            bias= 1<<(QMAT_SHIFT-1);
4081         length     = s->intra_ac_vlc_length;
4082         last_length= s->intra_ac_vlc_last_length;
4083     } else {
4084         dc= 0;
4085         start_i = 0;
4086         length     = s->inter_ac_vlc_length;
4087         last_length= s->inter_ac_vlc_last_length;
4088     }
4089     last_non_zero = s->block_last_index[n];
4090
4091 #ifdef REFINE_STATS
4092 {START_TIMER
4093 #endif
4094     dc += (1<<(RECON_SHIFT-1));
4095     for(i=0; i<64; i++){
4096         rem[i] = dc - (orig[i] << RECON_SHIFT); // FIXME use orig directly instead of copying to rem[]
4097     }
4098 #ifdef REFINE_STATS
4099 STOP_TIMER("memset rem[]")}
4100 #endif
4101     sum=0;
4102     for(i=0; i<64; i++){
4103         int one= 36;
4104         int qns=4;
4105         int w;
4106
4107         w= FFABS(weight[i]) + qns*one;
4108         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4109
4110         weight[i] = w;
4111 //        w=weight[i] = (63*qns + (w/2)) / w;
4112
4113         assert(w>0);
4114         assert(w<(1<<6));
4115         sum += w*w;
4116     }
4117     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4118 #ifdef REFINE_STATS
4119 {START_TIMER
4120 #endif
4121     run=0;
4122     rle_index=0;
4123     for(i=start_i; i<=last_non_zero; i++){
4124         int j= perm_scantable[i];
4125         const int level= block[j];
4126         int coeff;
4127
4128         if(level){
4129             if(level<0) coeff= qmul*level - qadd;
4130             else        coeff= qmul*level + qadd;
4131             run_tab[rle_index++]=run;
4132             run=0;
4133
4134             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4135         }else{
4136             run++;
4137         }
4138     }
4139 #ifdef REFINE_STATS
4140 if(last_non_zero>0){
4141 STOP_TIMER("init rem[]")
4142 }
4143 }
4144
4145 {START_TIMER
4146 #endif
4147     for(;;){
4148         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4149         int best_coeff=0;
4150         int best_change=0;
4151         int run2, best_unquant_change=0, analyze_gradient;
4152 #ifdef REFINE_STATS
4153 {START_TIMER
4154 #endif
4155         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4156
4157         if(analyze_gradient){
4158 #ifdef REFINE_STATS
4159 {START_TIMER
4160 #endif
4161             for(i=0; i<64; i++){
4162                 int w= weight[i];
4163
4164                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4165             }
4166 #ifdef REFINE_STATS
4167 STOP_TIMER("rem*w*w")}
4168 {START_TIMER
4169 #endif
4170             s->fdsp.fdct(d1);
4171 #ifdef REFINE_STATS
4172 STOP_TIMER("dct")}
4173 #endif
4174         }
4175
4176         if(start_i){
4177             const int level= block[0];
4178             int change, old_coeff;
4179
4180             assert(s->mb_intra);
4181
4182             old_coeff= q*level;
4183
4184             for(change=-1; change<=1; change+=2){
4185                 int new_level= level + change;
4186                 int score, new_coeff;
4187
4188                 new_coeff= q*new_level;
4189                 if(new_coeff >= 2048 || new_coeff < 0)
4190                     continue;
4191
4192                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4193                                                   new_coeff - old_coeff);
4194                 if(score<best_score){
4195                     best_score= score;
4196                     best_coeff= 0;
4197                     best_change= change;
4198                     best_unquant_change= new_coeff - old_coeff;
4199                 }
4200             }
4201         }
4202
4203         run=0;
4204         rle_index=0;
4205         run2= run_tab[rle_index++];
4206         prev_level=0;
4207         prev_run=0;
4208
4209         for(i=start_i; i<64; i++){
4210             int j= perm_scantable[i];
4211             const int level= block[j];
4212             int change, old_coeff;
4213
4214             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4215                 break;
4216
4217             if(level){
4218                 if(level<0) old_coeff= qmul*level - qadd;
4219                 else        old_coeff= qmul*level + qadd;
4220                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4221             }else{
4222                 old_coeff=0;
4223                 run2--;
4224                 assert(run2>=0 || i >= last_non_zero );
4225             }
4226
4227             for(change=-1; change<=1; change+=2){
4228                 int new_level= level + change;
4229                 int score, new_coeff, unquant_change;
4230
4231                 score=0;
4232                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4233                    continue;
4234
4235                 if(new_level){
4236                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4237                     else            new_coeff= qmul*new_level + qadd;
4238                     if(new_coeff >= 2048 || new_coeff <= -2048)
4239                         continue;
4240                     //FIXME check for overflow
4241
4242                     if(level){
4243                         if(level < 63 && level > -63){
4244                             if(i < last_non_zero)
4245                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4246                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4247                             else
4248                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4249                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4250                         }
4251                     }else{
4252                         assert(FFABS(new_level)==1);
4253
4254                         if(analyze_gradient){
4255                             int g= d1[ scantable[i] ];
4256                             if(g && (g^new_level) >= 0)
4257                                 continue;
4258                         }
4259
4260                         if(i < last_non_zero){
4261                             int next_i= i + run2 + 1;
4262                             int next_level= block[ perm_scantable[next_i] ] + 64;
4263
4264                             if(next_level&(~127))
4265                                 next_level= 0;
4266
4267                             if(next_i < last_non_zero)
4268                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4269                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4270                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4271                             else
4272                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4273                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4274                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4275                         }else{
4276                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4277                             if(prev_level){
4278                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4279                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4280                             }
4281                         }
4282                     }
4283                 }else{
4284                     new_coeff=0;
4285                     assert(FFABS(level)==1);
4286
4287                     if(i < last_non_zero){
4288                         int next_i= i + run2 + 1;
4289                         int next_level= block[ perm_scantable[next_i] ] + 64;
4290
4291                         if(next_level&(~127))
4292                             next_level= 0;
4293
4294                         if(next_i < last_non_zero)
4295                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4296                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4297                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4298                         else
4299                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4300                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4301                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4302                     }else{
4303                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4304                         if(prev_level){
4305                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4306                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4307                         }
4308                     }
4309                 }
4310
4311                 score *= lambda;
4312
4313                 unquant_change= new_coeff - old_coeff;
4314                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4315
4316                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4317                                                    unquant_change);
4318                 if(score<best_score){
4319                     best_score= score;
4320                     best_coeff= i;
4321                     best_change= change;
4322                     best_unquant_change= unquant_change;
4323                 }
4324             }
4325             if(level){
4326                 prev_level= level + 64;
4327                 if(prev_level&(~127))
4328                     prev_level= 0;
4329                 prev_run= run;
4330                 run=0;
4331             }else{
4332                 run++;
4333             }
4334         }
4335 #ifdef REFINE_STATS
4336 STOP_TIMER("iterative step")}
4337 #endif
4338
4339         if(best_change){
4340             int j= perm_scantable[ best_coeff ];
4341
4342             block[j] += best_change;
4343
4344             if(best_coeff > last_non_zero){
4345                 last_non_zero= best_coeff;
4346                 assert(block[j]);
4347 #ifdef REFINE_STATS
4348 after_last++;
4349 #endif
4350             }else{
4351 #ifdef REFINE_STATS
4352 if(block[j]){
4353     if(block[j] - best_change){
4354         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4355             raise++;
4356         }else{
4357             lower++;
4358         }
4359     }else{
4360         from_zero++;
4361     }
4362 }else{
4363     to_zero++;
4364 }
4365 #endif
4366                 for(; last_non_zero>=start_i; last_non_zero--){
4367                     if(block[perm_scantable[last_non_zero]])
4368                         break;
4369                 }
4370             }
4371 #ifdef REFINE_STATS
4372 count++;
4373 if(256*256*256*64 % count == 0){
4374     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4375 }
4376 #endif
4377             run=0;
4378             rle_index=0;
4379             for(i=start_i; i<=last_non_zero; i++){
4380                 int j= perm_scantable[i];
4381                 const int level= block[j];
4382
4383                  if(level){
4384                      run_tab[rle_index++]=run;
4385                      run=0;
4386                  }else{
4387                      run++;
4388                  }
4389             }
4390
4391             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4392         }else{
4393             break;
4394         }
4395     }
4396 #ifdef REFINE_STATS
4397 if(last_non_zero>0){
4398 STOP_TIMER("iterative search")
4399 }
4400 }
4401 #endif
4402
4403     return last_non_zero;
4404 }
4405
4406 /**
4407  * Permute an 8x8 block according to permutation.
4408  * @param block the block which will be permuted according to
4409  *              the given permutation vector
4410  * @param permutation the permutation vector
4411  * @param last the last non zero coefficient in scantable order, used to
4412  *             speed the permutation up
4413  * @param scantable the used scantable, this is only used to speed the
4414  *                  permutation up, the block is not (inverse) permutated
4415  *                  to scantable order!
4416  */
4417 static void block_permute(int16_t *block, uint8_t *permutation,
4418                           const uint8_t *scantable, int last)
4419 {
4420     int i;
4421     int16_t temp[64];
4422
4423     if (last <= 0)
4424         return;
4425     //FIXME it is ok but not clean and might fail for some permutations
4426     // if (permutation[1] == 1)
4427     // return;
4428
4429     for (i = 0; i <= last; i++) {
4430         const int j = scantable[i];
4431         temp[j] = block[j];
4432         block[j] = 0;
4433     }
4434
4435     for (i = 0; i <= last; i++) {
4436         const int j = scantable[i];
4437         const int perm_j = permutation[j];
4438         block[perm_j] = temp[j];
4439     }
4440 }
4441
4442 int ff_dct_quantize_c(MpegEncContext *s,
4443                         int16_t *block, int n,
4444                         int qscale, int *overflow)
4445 {
4446     int i, j, level, last_non_zero, q, start_i;
4447     const int *qmat;
4448     const uint8_t *scantable= s->intra_scantable.scantable;
4449     int bias;
4450     int max=0;
4451     unsigned int threshold1, threshold2;
4452
4453     s->fdsp.fdct(block);
4454
4455     if(s->dct_error_sum)
4456         s->denoise_dct(s, block);
4457
4458     if (s->mb_intra) {
4459         if (!s->h263_aic) {
4460             if (n < 4)
4461                 q = s->y_dc_scale;
4462             else
4463                 q = s->c_dc_scale;
4464             q = q << 3;
4465         } else
4466             /* For AIC we skip quant/dequant of INTRADC */
4467             q = 1 << 3;
4468
4469         /* note: block[0] is assumed to be positive */
4470         block[0] = (block[0] + (q >> 1)) / q;
4471         start_i = 1;
4472         last_non_zero = 0;
4473         qmat = s->q_intra_matrix[qscale];
4474         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4475     } else {
4476         start_i = 0;
4477         last_non_zero = -1;
4478         qmat = s->q_inter_matrix[qscale];
4479         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4480     }
4481     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4482     threshold2= (threshold1<<1);
4483     for(i=63;i>=start_i;i--) {
4484         j = scantable[i];
4485         level = block[j] * qmat[j];
4486
4487         if(((unsigned)(level+threshold1))>threshold2){
4488             last_non_zero = i;
4489             break;
4490         }else{
4491             block[j]=0;
4492         }
4493     }
4494     for(i=start_i; i<=last_non_zero; i++) {
4495         j = scantable[i];
4496         level = block[j] * qmat[j];
4497
4498 //        if(   bias+level >= (1<<QMAT_SHIFT)
4499 //           || bias-level >= (1<<QMAT_SHIFT)){
4500         if(((unsigned)(level+threshold1))>threshold2){
4501             if(level>0){
4502                 level= (bias + level)>>QMAT_SHIFT;
4503                 block[j]= level;
4504             }else{
4505                 level= (bias - level)>>QMAT_SHIFT;
4506                 block[j]= -level;
4507             }
4508             max |=level;
4509         }else{
4510             block[j]=0;
4511         }
4512     }
4513     *overflow= s->max_qcoeff < max; //overflow might have happened
4514
4515     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4516     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4517         block_permute(block, s->idsp.idct_permutation,
4518                       scantable, last_non_zero);
4519
4520     return last_non_zero;
4521 }
4522
4523 #define OFFSET(x) offsetof(MpegEncContext, x)
4524 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4525 static const AVOption h263_options[] = {
4526     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4527     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4528     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4529     FF_MPV_COMMON_OPTS
4530     { NULL },
4531 };
4532
4533 static const AVClass h263_class = {
4534     .class_name = "H.263 encoder",
4535     .item_name  = av_default_item_name,
4536     .option     = h263_options,
4537     .version    = LIBAVUTIL_VERSION_INT,
4538 };
4539
4540 AVCodec ff_h263_encoder = {
4541     .name           = "h263",
4542     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4543     .type           = AVMEDIA_TYPE_VIDEO,
4544     .id             = AV_CODEC_ID_H263,
4545     .priv_data_size = sizeof(MpegEncContext),
4546     .init           = ff_mpv_encode_init,
4547     .encode2        = ff_mpv_encode_picture,
4548     .close          = ff_mpv_encode_end,
4549     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4550     .priv_class     = &h263_class,
4551 };
4552
4553 static const AVOption h263p_options[] = {
4554     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4555     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4556     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4557     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4558     FF_MPV_COMMON_OPTS
4559     { NULL },
4560 };
4561 static const AVClass h263p_class = {
4562     .class_name = "H.263p encoder",
4563     .item_name  = av_default_item_name,
4564     .option     = h263p_options,
4565     .version    = LIBAVUTIL_VERSION_INT,
4566 };
4567
4568 AVCodec ff_h263p_encoder = {
4569     .name           = "h263p",
4570     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4571     .type           = AVMEDIA_TYPE_VIDEO,
4572     .id             = AV_CODEC_ID_H263P,
4573     .priv_data_size = sizeof(MpegEncContext),
4574     .init           = ff_mpv_encode_init,
4575     .encode2        = ff_mpv_encode_picture,
4576     .close          = ff_mpv_encode_end,
4577     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4578     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4579     .priv_class     = &h263p_class,
4580 };
4581
4582 static const AVClass msmpeg4v2_class = {
4583     .class_name = "msmpeg4v2 encoder",
4584     .item_name  = av_default_item_name,
4585     .option     = ff_mpv_generic_options,
4586     .version    = LIBAVUTIL_VERSION_INT,
4587 };
4588
4589 AVCodec ff_msmpeg4v2_encoder = {
4590     .name           = "msmpeg4v2",
4591     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4592     .type           = AVMEDIA_TYPE_VIDEO,
4593     .id             = AV_CODEC_ID_MSMPEG4V2,
4594     .priv_data_size = sizeof(MpegEncContext),
4595     .init           = ff_mpv_encode_init,
4596     .encode2        = ff_mpv_encode_picture,
4597     .close          = ff_mpv_encode_end,
4598     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4599     .priv_class     = &msmpeg4v2_class,
4600 };
4601
4602 static const AVClass msmpeg4v3_class = {
4603     .class_name = "msmpeg4v3 encoder",
4604     .item_name  = av_default_item_name,
4605     .option     = ff_mpv_generic_options,
4606     .version    = LIBAVUTIL_VERSION_INT,
4607 };
4608
4609 AVCodec ff_msmpeg4v3_encoder = {
4610     .name           = "msmpeg4",
4611     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4612     .type           = AVMEDIA_TYPE_VIDEO,
4613     .id             = AV_CODEC_ID_MSMPEG4V3,
4614     .priv_data_size = sizeof(MpegEncContext),
4615     .init           = ff_mpv_encode_init,
4616     .encode2        = ff_mpv_encode_picture,
4617     .close          = ff_mpv_encode_end,
4618     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4619     .priv_class     = &msmpeg4v3_class,
4620 };
4621
4622 static const AVClass wmv1_class = {
4623     .class_name = "wmv1 encoder",
4624     .item_name  = av_default_item_name,
4625     .option     = ff_mpv_generic_options,
4626     .version    = LIBAVUTIL_VERSION_INT,
4627 };
4628
4629 AVCodec ff_wmv1_encoder = {
4630     .name           = "wmv1",
4631     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4632     .type           = AVMEDIA_TYPE_VIDEO,
4633     .id             = AV_CODEC_ID_WMV1,
4634     .priv_data_size = sizeof(MpegEncContext),
4635     .init           = ff_mpv_encode_init,
4636     .encode2        = ff_mpv_encode_picture,
4637     .close          = ff_mpv_encode_end,
4638     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4639     .priv_class     = &wmv1_class,
4640 };