]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge commit '5e129ed655bff5b6d90355c0b713d7aaba3898ec'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "mpegvideodata.h"
44 #include "h261.h"
45 #include "h263.h"
46 #include "h263data.h"
47 #include "mjpegenc_common.h"
48 #include "mathops.h"
49 #include "mpegutils.h"
50 #include "mjpegenc.h"
51 #include "msmpeg4.h"
52 #include "pixblockdsp.h"
53 #include "qpeldsp.h"
54 #include "faandct.h"
55 #include "thread.h"
56 #include "aandcttab.h"
57 #include "flv.h"
58 #include "mpeg4video.h"
59 #include "internal.h"
60 #include "bytestream.h"
61 #include "wmv2.h"
62 #include "rv10.h"
63 #include <limits.h>
64 #include "sp5x.h"
65
66 #define QUANT_BIAS_SHIFT 8
67
68 #define QMAT_SHIFT_MMX 16
69 #define QMAT_SHIFT 21
70
71 static int encode_picture(MpegEncContext *s, int picture_number);
72 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
73 static int sse_mb(MpegEncContext *s);
74 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
75 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
76
77 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
78 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
79
80 const AVOption ff_mpv_generic_options[] = {
81     FF_MPV_COMMON_OPTS
82     { NULL },
83 };
84
85 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
86                        uint16_t (*qmat16)[2][64],
87                        const uint16_t *quant_matrix,
88                        int bias, int qmin, int qmax, int intra)
89 {
90     FDCTDSPContext *fdsp = &s->fdsp;
91     int qscale;
92     int shift = 0;
93
94     for (qscale = qmin; qscale <= qmax; qscale++) {
95         int i;
96         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
97 #if CONFIG_FAANDCT
98             fdsp->fdct == ff_faandct            ||
99 #endif /* CONFIG_FAANDCT */
100             fdsp->fdct == ff_jpeg_fdct_islow_10) {
101             for (i = 0; i < 64; i++) {
102                 const int j = s->idsp.idct_permutation[i];
103                 int64_t den = (int64_t) qscale * quant_matrix[j];
104                 /* 16 <= qscale * quant_matrix[i] <= 7905
105                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
106                  *             19952 <=              x  <= 249205026
107                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
108                  *           3444240 >= (1 << 36) / (x) >= 275 */
109
110                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
111             }
112         } else if (fdsp->fdct == ff_fdct_ifast) {
113             for (i = 0; i < 64; i++) {
114                 const int j = s->idsp.idct_permutation[i];
115                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
116                 /* 16 <= qscale * quant_matrix[i] <= 7905
117                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
118                  *             19952 <=              x  <= 249205026
119                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
120                  *           3444240 >= (1 << 36) / (x) >= 275 */
121
122                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
123             }
124         } else {
125             for (i = 0; i < 64; i++) {
126                 const int j = s->idsp.idct_permutation[i];
127                 int64_t den = (int64_t) qscale * quant_matrix[j];
128                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
129                  * Assume x = qscale * quant_matrix[i]
130                  * So             16 <=              x  <= 7905
131                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
132                  * so          32768 >= (1 << 19) / (x) >= 67 */
133                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
134                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
135                 //                    (qscale * quant_matrix[i]);
136                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
137
138                 if (qmat16[qscale][0][i] == 0 ||
139                     qmat16[qscale][0][i] == 128 * 256)
140                     qmat16[qscale][0][i] = 128 * 256 - 1;
141                 qmat16[qscale][1][i] =
142                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
143                                 qmat16[qscale][0][i]);
144             }
145         }
146
147         for (i = intra; i < 64; i++) {
148             int64_t max = 8191;
149             if (fdsp->fdct == ff_fdct_ifast) {
150                 max = (8191LL * ff_aanscales[i]) >> 14;
151             }
152             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
153                 shift++;
154             }
155         }
156     }
157     if (shift) {
158         av_log(NULL, AV_LOG_INFO,
159                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
160                QMAT_SHIFT - shift);
161     }
162 }
163
164 static inline void update_qscale(MpegEncContext *s)
165 {
166     if (s->q_scale_type == 1) {
167         int i;
168         int bestdiff=INT_MAX;
169         int best = 1;
170         static const uint8_t non_linear_qscale[] = {
171             1,2,3,4,5,6,7,8,9,10,11,12,14,16,18,20,24,26,28
172         };
173
174         for (i = 0 ; i<FF_ARRAY_ELEMS(non_linear_qscale); i++) {
175             int diff = FFABS((non_linear_qscale[i]<<(FF_LAMBDA_SHIFT + 7)) - (int)s->lambda * 139);
176             if (non_linear_qscale[i] < s->avctx->qmin ||
177                 (non_linear_qscale[i] > s->avctx->qmax && !s->vbv_ignore_qmax))
178                 continue;
179             if (diff < bestdiff) {
180                 bestdiff = diff;
181                 best = non_linear_qscale[i];
182             }
183         }
184         s->qscale = best;
185     } else {
186         s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
187                     (FF_LAMBDA_SHIFT + 7);
188         s->qscale = av_clip(s->qscale, s->avctx->qmin, s->vbv_ignore_qmax ? 31 : s->avctx->qmax);
189     }
190
191     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
192                  FF_LAMBDA_SHIFT;
193 }
194
195 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
196 {
197     int i;
198
199     if (matrix) {
200         put_bits(pb, 1, 1);
201         for (i = 0; i < 64; i++) {
202             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
203         }
204     } else
205         put_bits(pb, 1, 0);
206 }
207
208 /**
209  * init s->current_picture.qscale_table from s->lambda_table
210  */
211 void ff_init_qscale_tab(MpegEncContext *s)
212 {
213     int8_t * const qscale_table = s->current_picture.qscale_table;
214     int i;
215
216     for (i = 0; i < s->mb_num; i++) {
217         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
218         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
219         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
220                                                   s->avctx->qmax);
221     }
222 }
223
224 static void update_duplicate_context_after_me(MpegEncContext *dst,
225                                               MpegEncContext *src)
226 {
227 #define COPY(a) dst->a= src->a
228     COPY(pict_type);
229     COPY(current_picture);
230     COPY(f_code);
231     COPY(b_code);
232     COPY(qscale);
233     COPY(lambda);
234     COPY(lambda2);
235     COPY(picture_in_gop_number);
236     COPY(gop_picture_number);
237     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
238     COPY(progressive_frame);    // FIXME don't set in encode_header
239     COPY(partitioned_frame);    // FIXME don't set in encode_header
240 #undef COPY
241 }
242
243 /**
244  * Set the given MpegEncContext to defaults for encoding.
245  * the changed fields will not depend upon the prior state of the MpegEncContext.
246  */
247 static void mpv_encode_defaults(MpegEncContext *s)
248 {
249     int i;
250     ff_mpv_common_defaults(s);
251
252     for (i = -16; i < 16; i++) {
253         default_fcode_tab[i + MAX_MV] = 1;
254     }
255     s->me.mv_penalty = default_mv_penalty;
256     s->fcode_tab     = default_fcode_tab;
257
258     s->input_picture_number  = 0;
259     s->picture_in_gop_number = 0;
260 }
261
262 av_cold int ff_dct_encode_init(MpegEncContext *s) {
263     if (ARCH_X86)
264         ff_dct_encode_init_x86(s);
265
266     if (CONFIG_H263_ENCODER)
267         ff_h263dsp_init(&s->h263dsp);
268     if (!s->dct_quantize)
269         s->dct_quantize = ff_dct_quantize_c;
270     if (!s->denoise_dct)
271         s->denoise_dct  = denoise_dct_c;
272     s->fast_dct_quantize = s->dct_quantize;
273     if (s->avctx->trellis)
274         s->dct_quantize  = dct_quantize_trellis_c;
275
276     return 0;
277 }
278
279 /* init video encoder */
280 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
281 {
282     MpegEncContext *s = avctx->priv_data;
283     int i, ret, format_supported;
284
285     mpv_encode_defaults(s);
286
287     switch (avctx->codec_id) {
288     case AV_CODEC_ID_MPEG2VIDEO:
289         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
290             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
291             av_log(avctx, AV_LOG_ERROR,
292                    "only YUV420 and YUV422 are supported\n");
293             return -1;
294         }
295         break;
296     case AV_CODEC_ID_MJPEG:
297     case AV_CODEC_ID_AMV:
298         format_supported = 0;
299         /* JPEG color space */
300         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
301             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
302             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
303             (avctx->color_range == AVCOL_RANGE_JPEG &&
304              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
305               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
306               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
307             format_supported = 1;
308         /* MPEG color space */
309         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
310                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
311                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
312                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
313             format_supported = 1;
314
315         if (!format_supported) {
316             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
317             return -1;
318         }
319         break;
320     default:
321         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
322             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
323             return -1;
324         }
325     }
326
327     switch (avctx->pix_fmt) {
328     case AV_PIX_FMT_YUVJ444P:
329     case AV_PIX_FMT_YUV444P:
330         s->chroma_format = CHROMA_444;
331         break;
332     case AV_PIX_FMT_YUVJ422P:
333     case AV_PIX_FMT_YUV422P:
334         s->chroma_format = CHROMA_422;
335         break;
336     case AV_PIX_FMT_YUVJ420P:
337     case AV_PIX_FMT_YUV420P:
338     default:
339         s->chroma_format = CHROMA_420;
340         break;
341     }
342
343     s->bit_rate = avctx->bit_rate;
344     s->width    = avctx->width;
345     s->height   = avctx->height;
346     if (avctx->gop_size > 600 &&
347         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
348         av_log(avctx, AV_LOG_WARNING,
349                "keyframe interval too large!, reducing it from %d to %d\n",
350                avctx->gop_size, 600);
351         avctx->gop_size = 600;
352     }
353     s->gop_size     = avctx->gop_size;
354     s->avctx        = avctx;
355     if (avctx->max_b_frames > MAX_B_FRAMES) {
356         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
357                "is %d.\n", MAX_B_FRAMES);
358         avctx->max_b_frames = MAX_B_FRAMES;
359     }
360     s->max_b_frames = avctx->max_b_frames;
361     s->codec_id     = avctx->codec->id;
362     s->strict_std_compliance = avctx->strict_std_compliance;
363     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
364     s->mpeg_quant         = avctx->mpeg_quant;
365     s->rtp_mode           = !!avctx->rtp_payload_size;
366     s->intra_dc_precision = avctx->intra_dc_precision;
367
368     // workaround some differences between how applications specify dc precision
369     if (s->intra_dc_precision < 0) {
370         s->intra_dc_precision += 8;
371     } else if (s->intra_dc_precision >= 8)
372         s->intra_dc_precision -= 8;
373
374     if (s->intra_dc_precision < 0) {
375         av_log(avctx, AV_LOG_ERROR,
376                 "intra dc precision must be positive, note some applications use"
377                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
378         return AVERROR(EINVAL);
379     }
380
381     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
382         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
383         return AVERROR(EINVAL);
384     }
385     s->user_specified_pts = AV_NOPTS_VALUE;
386
387     if (s->gop_size <= 1) {
388         s->intra_only = 1;
389         s->gop_size   = 12;
390     } else {
391         s->intra_only = 0;
392     }
393
394 #if FF_API_MOTION_EST
395 FF_DISABLE_DEPRECATION_WARNINGS
396     s->me_method = avctx->me_method;
397 FF_ENABLE_DEPRECATION_WARNINGS
398 #endif
399
400     /* Fixed QSCALE */
401     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
402
403 #if FF_API_MPV_OPT
404     FF_DISABLE_DEPRECATION_WARNINGS
405     if (avctx->border_masking != 0.0)
406         s->border_masking = avctx->border_masking;
407     FF_ENABLE_DEPRECATION_WARNINGS
408 #endif
409
410     s->adaptive_quant = (s->avctx->lumi_masking ||
411                          s->avctx->dark_masking ||
412                          s->avctx->temporal_cplx_masking ||
413                          s->avctx->spatial_cplx_masking  ||
414                          s->avctx->p_masking      ||
415                          s->border_masking ||
416                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
417                         !s->fixed_qscale;
418
419     s->loop_filter = !!(s->avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
420
421     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
422         switch(avctx->codec_id) {
423         case AV_CODEC_ID_MPEG1VIDEO:
424         case AV_CODEC_ID_MPEG2VIDEO:
425             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
426             break;
427         case AV_CODEC_ID_MPEG4:
428         case AV_CODEC_ID_MSMPEG4V1:
429         case AV_CODEC_ID_MSMPEG4V2:
430         case AV_CODEC_ID_MSMPEG4V3:
431             if       (avctx->rc_max_rate >= 15000000) {
432                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
433             } else if(avctx->rc_max_rate >=  2000000) {
434                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
435             } else if(avctx->rc_max_rate >=   384000) {
436                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
437             } else
438                 avctx->rc_buffer_size = 40;
439             avctx->rc_buffer_size *= 16384;
440             break;
441         }
442         if (avctx->rc_buffer_size) {
443             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
444         }
445     }
446
447     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
448         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
449         return -1;
450     }
451
452     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
453         av_log(avctx, AV_LOG_INFO,
454                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
455     }
456
457     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
458         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
459         return -1;
460     }
461
462     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
463         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
464         return -1;
465     }
466
467     if (avctx->rc_max_rate &&
468         avctx->rc_max_rate == avctx->bit_rate &&
469         avctx->rc_max_rate != avctx->rc_min_rate) {
470         av_log(avctx, AV_LOG_INFO,
471                "impossible bitrate constraints, this will fail\n");
472     }
473
474     if (avctx->rc_buffer_size &&
475         avctx->bit_rate * (int64_t)avctx->time_base.num >
476             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
477         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
478         return -1;
479     }
480
481     if (!s->fixed_qscale &&
482         avctx->bit_rate * av_q2d(avctx->time_base) >
483             avctx->bit_rate_tolerance) {
484         av_log(avctx, AV_LOG_WARNING,
485                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
486         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
487     }
488
489     if (s->avctx->rc_max_rate &&
490         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
491         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
492          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
493         90000LL * (avctx->rc_buffer_size - 1) >
494             s->avctx->rc_max_rate * 0xFFFFLL) {
495         av_log(avctx, AV_LOG_INFO,
496                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
497                "specified vbv buffer is too large for the given bitrate!\n");
498     }
499
500     if ((s->avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
501         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
502         s->codec_id != AV_CODEC_ID_FLV1) {
503         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
504         return -1;
505     }
506
507     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
508         av_log(avctx, AV_LOG_ERROR,
509                "OBMC is only supported with simple mb decision\n");
510         return -1;
511     }
512
513     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
514         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
515         return -1;
516     }
517
518     if (s->max_b_frames                    &&
519         s->codec_id != AV_CODEC_ID_MPEG4      &&
520         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
521         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
522         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
523         return -1;
524     }
525     if (s->max_b_frames < 0) {
526         av_log(avctx, AV_LOG_ERROR,
527                "max b frames must be 0 or positive for mpegvideo based encoders\n");
528         return -1;
529     }
530
531     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
532          s->codec_id == AV_CODEC_ID_H263  ||
533          s->codec_id == AV_CODEC_ID_H263P) &&
534         (avctx->sample_aspect_ratio.num > 255 ||
535          avctx->sample_aspect_ratio.den > 255)) {
536         av_log(avctx, AV_LOG_WARNING,
537                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
538                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
539         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
540                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
541     }
542
543     if ((s->codec_id == AV_CODEC_ID_H263  ||
544          s->codec_id == AV_CODEC_ID_H263P) &&
545         (avctx->width  > 2048 ||
546          avctx->height > 1152 )) {
547         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
548         return -1;
549     }
550     if ((s->codec_id == AV_CODEC_ID_H263  ||
551          s->codec_id == AV_CODEC_ID_H263P) &&
552         ((avctx->width &3) ||
553          (avctx->height&3) )) {
554         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
555         return -1;
556     }
557
558     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
559         (avctx->width  > 4095 ||
560          avctx->height > 4095 )) {
561         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
562         return -1;
563     }
564
565     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
566         (avctx->width  > 16383 ||
567          avctx->height > 16383 )) {
568         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
569         return -1;
570     }
571
572     if (s->codec_id == AV_CODEC_ID_RV10 &&
573         (avctx->width &15 ||
574          avctx->height&15 )) {
575         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
576         return AVERROR(EINVAL);
577     }
578
579     if (s->codec_id == AV_CODEC_ID_RV20 &&
580         (avctx->width &3 ||
581          avctx->height&3 )) {
582         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
583         return AVERROR(EINVAL);
584     }
585
586     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
587          s->codec_id == AV_CODEC_ID_WMV2) &&
588          avctx->width & 1) {
589          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
590          return -1;
591     }
592
593     if ((s->avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
594         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
595         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
596         return -1;
597     }
598
599     // FIXME mpeg2 uses that too
600     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
601                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
602         av_log(avctx, AV_LOG_ERROR,
603                "mpeg2 style quantization not supported by codec\n");
604         return -1;
605     }
606
607     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
608         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
609         return -1;
610     }
611
612     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
613         s->avctx->mb_decision != FF_MB_DECISION_RD) {
614         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
615         return -1;
616     }
617
618     if (s->avctx->scenechange_threshold < 1000000000 &&
619         (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
620         av_log(avctx, AV_LOG_ERROR,
621                "closed gop with scene change detection are not supported yet, "
622                "set threshold to 1000000000\n");
623         return -1;
624     }
625
626     if (s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
627         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
628             av_log(avctx, AV_LOG_ERROR,
629                   "low delay forcing is only available for mpeg2\n");
630             return -1;
631         }
632         if (s->max_b_frames != 0) {
633             av_log(avctx, AV_LOG_ERROR,
634                    "b frames cannot be used with low delay\n");
635             return -1;
636         }
637     }
638
639     if (s->q_scale_type == 1) {
640         if (avctx->qmax > 28) {
641             av_log(avctx, AV_LOG_ERROR,
642                    "non linear quant only supports qmax <= 28 currently\n");
643             return -1;
644         }
645     }
646
647     if (s->avctx->thread_count > 1         &&
648         s->codec_id != AV_CODEC_ID_MPEG4      &&
649         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
650         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
651         s->codec_id != AV_CODEC_ID_MJPEG      &&
652         (s->codec_id != AV_CODEC_ID_H263P)) {
653         av_log(avctx, AV_LOG_ERROR,
654                "multi threaded encoding not supported by codec\n");
655         return -1;
656     }
657
658     if (s->avctx->thread_count < 1) {
659         av_log(avctx, AV_LOG_ERROR,
660                "automatic thread number detection not supported by codec, "
661                "patch welcome\n");
662         return -1;
663     }
664
665     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
666         s->rtp_mode = 1;
667
668     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
669         s->h263_slice_structured = 1;
670
671     if (!avctx->time_base.den || !avctx->time_base.num) {
672         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
673         return -1;
674     }
675
676     if (avctx->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
677         av_log(avctx, AV_LOG_INFO,
678                "notice: b_frame_strategy only affects the first pass\n");
679         avctx->b_frame_strategy = 0;
680     }
681
682     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
683     if (i > 1) {
684         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
685         avctx->time_base.den /= i;
686         avctx->time_base.num /= i;
687         //return -1;
688     }
689
690     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
691         // (a + x * 3 / 8) / x
692         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
693         s->inter_quant_bias = 0;
694     } else {
695         s->intra_quant_bias = 0;
696         // (a - x / 4) / x
697         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
698     }
699
700     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
701         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
702         return AVERROR(EINVAL);
703     }
704
705 #if FF_API_QUANT_BIAS
706 FF_DISABLE_DEPRECATION_WARNINGS
707     if (s->intra_quant_bias == FF_DEFAULT_QUANT_BIAS &&
708         avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
709         s->intra_quant_bias = avctx->intra_quant_bias;
710     if (s->inter_quant_bias == FF_DEFAULT_QUANT_BIAS &&
711         avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
712         s->inter_quant_bias = avctx->inter_quant_bias;
713 FF_ENABLE_DEPRECATION_WARNINGS
714 #endif
715
716     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
717
718     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
719         s->avctx->time_base.den > (1 << 16) - 1) {
720         av_log(avctx, AV_LOG_ERROR,
721                "timebase %d/%d not supported by MPEG 4 standard, "
722                "the maximum admitted value for the timebase denominator "
723                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
724                (1 << 16) - 1);
725         return -1;
726     }
727     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
728
729     switch (avctx->codec->id) {
730     case AV_CODEC_ID_MPEG1VIDEO:
731         s->out_format = FMT_MPEG1;
732         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
733         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
734         break;
735     case AV_CODEC_ID_MPEG2VIDEO:
736         s->out_format = FMT_MPEG1;
737         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
738         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
739         s->rtp_mode   = 1;
740         break;
741     case AV_CODEC_ID_MJPEG:
742     case AV_CODEC_ID_AMV:
743         s->out_format = FMT_MJPEG;
744         s->intra_only = 1; /* force intra only for jpeg */
745         if (!CONFIG_MJPEG_ENCODER ||
746             ff_mjpeg_encode_init(s) < 0)
747             return -1;
748         avctx->delay = 0;
749         s->low_delay = 1;
750         break;
751     case AV_CODEC_ID_H261:
752         if (!CONFIG_H261_ENCODER)
753             return -1;
754         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
755             av_log(avctx, AV_LOG_ERROR,
756                    "The specified picture size of %dx%d is not valid for the "
757                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
758                     s->width, s->height);
759             return -1;
760         }
761         s->out_format = FMT_H261;
762         avctx->delay  = 0;
763         s->low_delay  = 1;
764         s->rtp_mode   = 0; /* Sliced encoding not supported */
765         break;
766     case AV_CODEC_ID_H263:
767         if (!CONFIG_H263_ENCODER)
768             return -1;
769         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
770                              s->width, s->height) == 8) {
771             av_log(avctx, AV_LOG_ERROR,
772                    "The specified picture size of %dx%d is not valid for "
773                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
774                    "352x288, 704x576, and 1408x1152. "
775                    "Try H.263+.\n", s->width, s->height);
776             return -1;
777         }
778         s->out_format = FMT_H263;
779         avctx->delay  = 0;
780         s->low_delay  = 1;
781         break;
782     case AV_CODEC_ID_H263P:
783         s->out_format = FMT_H263;
784         s->h263_plus  = 1;
785         /* Fx */
786         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
787         s->modified_quant  = s->h263_aic;
788         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
789         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
790
791         /* /Fx */
792         /* These are just to be sure */
793         avctx->delay = 0;
794         s->low_delay = 1;
795         break;
796     case AV_CODEC_ID_FLV1:
797         s->out_format      = FMT_H263;
798         s->h263_flv        = 2; /* format = 1; 11-bit codes */
799         s->unrestricted_mv = 1;
800         s->rtp_mode  = 0; /* don't allow GOB */
801         avctx->delay = 0;
802         s->low_delay = 1;
803         break;
804     case AV_CODEC_ID_RV10:
805         s->out_format = FMT_H263;
806         avctx->delay  = 0;
807         s->low_delay  = 1;
808         break;
809     case AV_CODEC_ID_RV20:
810         s->out_format      = FMT_H263;
811         avctx->delay       = 0;
812         s->low_delay       = 1;
813         s->modified_quant  = 1;
814         s->h263_aic        = 1;
815         s->h263_plus       = 1;
816         s->loop_filter     = 1;
817         s->unrestricted_mv = 0;
818         break;
819     case AV_CODEC_ID_MPEG4:
820         s->out_format      = FMT_H263;
821         s->h263_pred       = 1;
822         s->unrestricted_mv = 1;
823         s->low_delay       = s->max_b_frames ? 0 : 1;
824         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
825         break;
826     case AV_CODEC_ID_MSMPEG4V2:
827         s->out_format      = FMT_H263;
828         s->h263_pred       = 1;
829         s->unrestricted_mv = 1;
830         s->msmpeg4_version = 2;
831         avctx->delay       = 0;
832         s->low_delay       = 1;
833         break;
834     case AV_CODEC_ID_MSMPEG4V3:
835         s->out_format        = FMT_H263;
836         s->h263_pred         = 1;
837         s->unrestricted_mv   = 1;
838         s->msmpeg4_version   = 3;
839         s->flipflop_rounding = 1;
840         avctx->delay         = 0;
841         s->low_delay         = 1;
842         break;
843     case AV_CODEC_ID_WMV1:
844         s->out_format        = FMT_H263;
845         s->h263_pred         = 1;
846         s->unrestricted_mv   = 1;
847         s->msmpeg4_version   = 4;
848         s->flipflop_rounding = 1;
849         avctx->delay         = 0;
850         s->low_delay         = 1;
851         break;
852     case AV_CODEC_ID_WMV2:
853         s->out_format        = FMT_H263;
854         s->h263_pred         = 1;
855         s->unrestricted_mv   = 1;
856         s->msmpeg4_version   = 5;
857         s->flipflop_rounding = 1;
858         avctx->delay         = 0;
859         s->low_delay         = 1;
860         break;
861     default:
862         return -1;
863     }
864
865     avctx->has_b_frames = !s->low_delay;
866
867     s->encoding = 1;
868
869     s->progressive_frame    =
870     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
871                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
872                                 s->alternate_scan);
873
874     /* init */
875     ff_mpv_idct_init(s);
876     if (ff_mpv_common_init(s) < 0)
877         return -1;
878
879     ff_fdctdsp_init(&s->fdsp, avctx);
880     ff_me_cmp_init(&s->mecc, avctx);
881     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
882     ff_pixblockdsp_init(&s->pdsp, avctx);
883     ff_qpeldsp_init(&s->qdsp);
884
885     if (s->msmpeg4_version) {
886         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
887                           2 * 2 * (MAX_LEVEL + 1) *
888                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
889     }
890     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
891
892     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
893     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
894     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
895     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
896     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
897     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
898     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
899                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
900     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
901                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
902
903     if (s->avctx->noise_reduction) {
904         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
905                           2 * 64 * sizeof(uint16_t), fail);
906     }
907
908     ff_dct_encode_init(s);
909
910     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
911         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
912
913     s->quant_precision = 5;
914
915     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
916     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
917
918     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
919         ff_h261_encode_init(s);
920     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
921         ff_h263_encode_init(s);
922     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
923         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
924             return ret;
925     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
926         && s->out_format == FMT_MPEG1)
927         ff_mpeg1_encode_init(s);
928
929     /* init q matrix */
930     for (i = 0; i < 64; i++) {
931         int j = s->idsp.idct_permutation[i];
932         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
933             s->mpeg_quant) {
934             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
935             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
936         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
937             s->intra_matrix[j] =
938             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
939         } else {
940             /* mpeg1/2 */
941             s->chroma_intra_matrix[j] =
942             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
943             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
944         }
945         if (s->avctx->intra_matrix)
946             s->intra_matrix[j] = s->avctx->intra_matrix[i];
947         if (s->avctx->inter_matrix)
948             s->inter_matrix[j] = s->avctx->inter_matrix[i];
949     }
950
951     /* precompute matrix */
952     /* for mjpeg, we do include qscale in the matrix */
953     if (s->out_format != FMT_MJPEG) {
954         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
955                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
956                           31, 1);
957         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
958                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
959                           31, 0);
960     }
961
962     if (ff_rate_control_init(s) < 0)
963         return -1;
964
965 #if FF_API_ERROR_RATE
966     FF_DISABLE_DEPRECATION_WARNINGS
967     if (avctx->error_rate)
968         s->error_rate = avctx->error_rate;
969     FF_ENABLE_DEPRECATION_WARNINGS;
970 #endif
971
972 #if FF_API_NORMALIZE_AQP
973     FF_DISABLE_DEPRECATION_WARNINGS
974     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
975         s->mpv_flags |= FF_MPV_FLAG_NAQ;
976     FF_ENABLE_DEPRECATION_WARNINGS;
977 #endif
978
979 #if FF_API_MV0
980     FF_DISABLE_DEPRECATION_WARNINGS
981     if (avctx->flags & CODEC_FLAG_MV0)
982         s->mpv_flags |= FF_MPV_FLAG_MV0;
983     FF_ENABLE_DEPRECATION_WARNINGS
984 #endif
985
986 #if FF_API_MPV_OPT
987     FF_DISABLE_DEPRECATION_WARNINGS
988     if (avctx->rc_qsquish != 0.0)
989         s->rc_qsquish = avctx->rc_qsquish;
990     if (avctx->rc_qmod_amp != 0.0)
991         s->rc_qmod_amp = avctx->rc_qmod_amp;
992     if (avctx->rc_qmod_freq)
993         s->rc_qmod_freq = avctx->rc_qmod_freq;
994     if (avctx->rc_buffer_aggressivity != 1.0)
995         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
996     if (avctx->rc_initial_cplx != 0.0)
997         s->rc_initial_cplx = avctx->rc_initial_cplx;
998     if (avctx->lmin)
999         s->lmin = avctx->lmin;
1000     if (avctx->lmax)
1001         s->lmax = avctx->lmax;
1002
1003     if (avctx->rc_eq) {
1004         av_freep(&s->rc_eq);
1005         s->rc_eq = av_strdup(avctx->rc_eq);
1006         if (!s->rc_eq)
1007             return AVERROR(ENOMEM);
1008     }
1009     FF_ENABLE_DEPRECATION_WARNINGS
1010 #endif
1011
1012     if (avctx->b_frame_strategy == 2) {
1013         for (i = 0; i < s->max_b_frames + 2; i++) {
1014             s->tmp_frames[i] = av_frame_alloc();
1015             if (!s->tmp_frames[i])
1016                 return AVERROR(ENOMEM);
1017
1018             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
1019             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
1020             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
1021
1022             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
1023             if (ret < 0)
1024                 return ret;
1025         }
1026     }
1027
1028     return 0;
1029 fail:
1030     ff_mpv_encode_end(avctx);
1031     return AVERROR_UNKNOWN;
1032 }
1033
1034 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1035 {
1036     MpegEncContext *s = avctx->priv_data;
1037     int i;
1038
1039     ff_rate_control_uninit(s);
1040
1041     ff_mpv_common_end(s);
1042     if (CONFIG_MJPEG_ENCODER &&
1043         s->out_format == FMT_MJPEG)
1044         ff_mjpeg_encode_close(s);
1045
1046     av_freep(&avctx->extradata);
1047
1048     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1049         av_frame_free(&s->tmp_frames[i]);
1050
1051     ff_free_picture_tables(&s->new_picture);
1052     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1053
1054     av_freep(&s->avctx->stats_out);
1055     av_freep(&s->ac_stats);
1056
1057     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1058     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1059     s->q_chroma_intra_matrix=   NULL;
1060     s->q_chroma_intra_matrix16= NULL;
1061     av_freep(&s->q_intra_matrix);
1062     av_freep(&s->q_inter_matrix);
1063     av_freep(&s->q_intra_matrix16);
1064     av_freep(&s->q_inter_matrix16);
1065     av_freep(&s->input_picture);
1066     av_freep(&s->reordered_input_picture);
1067     av_freep(&s->dct_offset);
1068
1069     return 0;
1070 }
1071
1072 static int get_sae(uint8_t *src, int ref, int stride)
1073 {
1074     int x,y;
1075     int acc = 0;
1076
1077     for (y = 0; y < 16; y++) {
1078         for (x = 0; x < 16; x++) {
1079             acc += FFABS(src[x + y * stride] - ref);
1080         }
1081     }
1082
1083     return acc;
1084 }
1085
1086 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1087                            uint8_t *ref, int stride)
1088 {
1089     int x, y, w, h;
1090     int acc = 0;
1091
1092     w = s->width  & ~15;
1093     h = s->height & ~15;
1094
1095     for (y = 0; y < h; y += 16) {
1096         for (x = 0; x < w; x += 16) {
1097             int offset = x + y * stride;
1098             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1099                                       stride, 16);
1100             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1101             int sae  = get_sae(src + offset, mean, stride);
1102
1103             acc += sae + 500 < sad;
1104         }
1105     }
1106     return acc;
1107 }
1108
1109 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1110 {
1111     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1112                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1113                             s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
1114                             &s->linesize, &s->uvlinesize);
1115 }
1116
1117 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1118 {
1119     Picture *pic = NULL;
1120     int64_t pts;
1121     int i, display_picture_number = 0, ret;
1122     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1123                                                  (s->low_delay ? 0 : 1);
1124     int direct = 1;
1125
1126     if (pic_arg) {
1127         pts = pic_arg->pts;
1128         display_picture_number = s->input_picture_number++;
1129
1130         if (pts != AV_NOPTS_VALUE) {
1131             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1132                 int64_t last = s->user_specified_pts;
1133
1134                 if (pts <= last) {
1135                     av_log(s->avctx, AV_LOG_ERROR,
1136                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1137                            pts, last);
1138                     return AVERROR(EINVAL);
1139                 }
1140
1141                 if (!s->low_delay && display_picture_number == 1)
1142                     s->dts_delta = pts - last;
1143             }
1144             s->user_specified_pts = pts;
1145         } else {
1146             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1147                 s->user_specified_pts =
1148                 pts = s->user_specified_pts + 1;
1149                 av_log(s->avctx, AV_LOG_INFO,
1150                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1151                        pts);
1152             } else {
1153                 pts = display_picture_number;
1154             }
1155         }
1156     }
1157
1158     if (pic_arg) {
1159         if (!pic_arg->buf[0] ||
1160             pic_arg->linesize[0] != s->linesize ||
1161             pic_arg->linesize[1] != s->uvlinesize ||
1162             pic_arg->linesize[2] != s->uvlinesize)
1163             direct = 0;
1164         if ((s->width & 15) || (s->height & 15))
1165             direct = 0;
1166         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1167             direct = 0;
1168         if (s->linesize & (STRIDE_ALIGN-1))
1169             direct = 0;
1170
1171         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1172                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1173
1174         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1175         if (i < 0)
1176             return i;
1177
1178         pic = &s->picture[i];
1179         pic->reference = 3;
1180
1181         if (direct) {
1182             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1183                 return ret;
1184         }
1185         ret = alloc_picture(s, pic, direct);
1186         if (ret < 0)
1187             return ret;
1188
1189         if (!direct) {
1190             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1191                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1192                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1193                 // empty
1194             } else {
1195                 int h_chroma_shift, v_chroma_shift;
1196                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1197                                                  &h_chroma_shift,
1198                                                  &v_chroma_shift);
1199
1200                 for (i = 0; i < 3; i++) {
1201                     int src_stride = pic_arg->linesize[i];
1202                     int dst_stride = i ? s->uvlinesize : s->linesize;
1203                     int h_shift = i ? h_chroma_shift : 0;
1204                     int v_shift = i ? v_chroma_shift : 0;
1205                     int w = s->width  >> h_shift;
1206                     int h = s->height >> v_shift;
1207                     uint8_t *src = pic_arg->data[i];
1208                     uint8_t *dst = pic->f->data[i];
1209                     int vpad = 16;
1210
1211                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1212                         && !s->progressive_sequence
1213                         && FFALIGN(s->height, 32) - s->height > 16)
1214                         vpad = 32;
1215
1216                     if (!s->avctx->rc_buffer_size)
1217                         dst += INPLACE_OFFSET;
1218
1219                     if (src_stride == dst_stride)
1220                         memcpy(dst, src, src_stride * h);
1221                     else {
1222                         int h2 = h;
1223                         uint8_t *dst2 = dst;
1224                         while (h2--) {
1225                             memcpy(dst2, src, w);
1226                             dst2 += dst_stride;
1227                             src += src_stride;
1228                         }
1229                     }
1230                     if ((s->width & 15) || (s->height & (vpad-1))) {
1231                         s->mpvencdsp.draw_edges(dst, dst_stride,
1232                                                 w, h,
1233                                                 16 >> h_shift,
1234                                                 vpad >> v_shift,
1235                                                 EDGE_BOTTOM);
1236                     }
1237                 }
1238             }
1239         }
1240         ret = av_frame_copy_props(pic->f, pic_arg);
1241         if (ret < 0)
1242             return ret;
1243
1244         pic->f->display_picture_number = display_picture_number;
1245         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1246     }
1247
1248     /* shift buffer entries */
1249     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1250         s->input_picture[i - 1] = s->input_picture[i];
1251
1252     s->input_picture[encoding_delay] = (Picture*) pic;
1253
1254     return 0;
1255 }
1256
1257 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1258 {
1259     int x, y, plane;
1260     int score = 0;
1261     int64_t score64 = 0;
1262
1263     for (plane = 0; plane < 3; plane++) {
1264         const int stride = p->f->linesize[plane];
1265         const int bw = plane ? 1 : 2;
1266         for (y = 0; y < s->mb_height * bw; y++) {
1267             for (x = 0; x < s->mb_width * bw; x++) {
1268                 int off = p->shared ? 0 : 16;
1269                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1270                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1271                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1272
1273                 switch (FFABS(s->avctx->frame_skip_exp)) {
1274                 case 0: score    =  FFMAX(score, v);          break;
1275                 case 1: score   += FFABS(v);                  break;
1276                 case 2: score64 += v * (int64_t)v;                       break;
1277                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1278                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1279                 }
1280             }
1281         }
1282     }
1283     emms_c();
1284
1285     if (score)
1286         score64 = score;
1287     if (s->avctx->frame_skip_exp < 0)
1288         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1289                       -1.0/s->avctx->frame_skip_exp);
1290
1291     if (score64 < s->avctx->frame_skip_threshold)
1292         return 1;
1293     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1294         return 1;
1295     return 0;
1296 }
1297
1298 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1299 {
1300     AVPacket pkt = { 0 };
1301     int ret, got_output;
1302
1303     av_init_packet(&pkt);
1304     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1305     if (ret < 0)
1306         return ret;
1307
1308     ret = pkt.size;
1309     av_free_packet(&pkt);
1310     return ret;
1311 }
1312
1313 static int estimate_best_b_count(MpegEncContext *s)
1314 {
1315     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1316     AVCodecContext *c = avcodec_alloc_context3(NULL);
1317     const int scale = s->avctx->brd_scale;
1318     int i, j, out_size, p_lambda, b_lambda, lambda2;
1319     int64_t best_rd  = INT64_MAX;
1320     int best_b_count = -1;
1321
1322     if (!c)
1323         return AVERROR(ENOMEM);
1324     av_assert0(scale >= 0 && scale <= 3);
1325
1326     //emms_c();
1327     //s->next_picture_ptr->quality;
1328     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1329     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1330     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1331     if (!b_lambda) // FIXME we should do this somewhere else
1332         b_lambda = p_lambda;
1333     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1334                FF_LAMBDA_SHIFT;
1335
1336     c->width        = s->width  >> scale;
1337     c->height       = s->height >> scale;
1338     c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1339     c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1340     c->mb_decision  = s->avctx->mb_decision;
1341     c->me_cmp       = s->avctx->me_cmp;
1342     c->mb_cmp       = s->avctx->mb_cmp;
1343     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1344     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1345     c->time_base    = s->avctx->time_base;
1346     c->max_b_frames = s->max_b_frames;
1347
1348     if (avcodec_open2(c, codec, NULL) < 0)
1349         return -1;
1350
1351     for (i = 0; i < s->max_b_frames + 2; i++) {
1352         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1353                                                 s->next_picture_ptr;
1354         uint8_t *data[4];
1355
1356         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1357             pre_input = *pre_input_ptr;
1358             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1359
1360             if (!pre_input.shared && i) {
1361                 data[0] += INPLACE_OFFSET;
1362                 data[1] += INPLACE_OFFSET;
1363                 data[2] += INPLACE_OFFSET;
1364             }
1365
1366             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1367                                        s->tmp_frames[i]->linesize[0],
1368                                        data[0],
1369                                        pre_input.f->linesize[0],
1370                                        c->width, c->height);
1371             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1372                                        s->tmp_frames[i]->linesize[1],
1373                                        data[1],
1374                                        pre_input.f->linesize[1],
1375                                        c->width >> 1, c->height >> 1);
1376             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1377                                        s->tmp_frames[i]->linesize[2],
1378                                        data[2],
1379                                        pre_input.f->linesize[2],
1380                                        c->width >> 1, c->height >> 1);
1381         }
1382     }
1383
1384     for (j = 0; j < s->max_b_frames + 1; j++) {
1385         int64_t rd = 0;
1386
1387         if (!s->input_picture[j])
1388             break;
1389
1390         c->error[0] = c->error[1] = c->error[2] = 0;
1391
1392         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1393         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1394
1395         out_size = encode_frame(c, s->tmp_frames[0]);
1396
1397         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1398
1399         for (i = 0; i < s->max_b_frames + 1; i++) {
1400             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1401
1402             s->tmp_frames[i + 1]->pict_type = is_p ?
1403                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1404             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1405
1406             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1407
1408             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1409         }
1410
1411         /* get the delayed frames */
1412         while (out_size) {
1413             out_size = encode_frame(c, NULL);
1414             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1415         }
1416
1417         rd += c->error[0] + c->error[1] + c->error[2];
1418
1419         if (rd < best_rd) {
1420             best_rd = rd;
1421             best_b_count = j;
1422         }
1423     }
1424
1425     avcodec_close(c);
1426     av_freep(&c);
1427
1428     return best_b_count;
1429 }
1430
1431 static int select_input_picture(MpegEncContext *s)
1432 {
1433     int i, ret;
1434
1435     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1436         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1437     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1438
1439     /* set next picture type & ordering */
1440     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1441         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1442             if (s->picture_in_gop_number < s->gop_size &&
1443                 s->next_picture_ptr &&
1444                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1445                 // FIXME check that te gop check above is +-1 correct
1446                 av_frame_unref(s->input_picture[0]->f);
1447
1448                 ff_vbv_update(s, 0);
1449
1450                 goto no_output_pic;
1451             }
1452         }
1453
1454         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1455             !s->next_picture_ptr || s->intra_only) {
1456             s->reordered_input_picture[0] = s->input_picture[0];
1457             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1458             s->reordered_input_picture[0]->f->coded_picture_number =
1459                 s->coded_picture_number++;
1460         } else {
1461             int b_frames;
1462
1463             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1464                 for (i = 0; i < s->max_b_frames + 1; i++) {
1465                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1466
1467                     if (pict_num >= s->rc_context.num_entries)
1468                         break;
1469                     if (!s->input_picture[i]) {
1470                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1471                         break;
1472                     }
1473
1474                     s->input_picture[i]->f->pict_type =
1475                         s->rc_context.entry[pict_num].new_pict_type;
1476                 }
1477             }
1478
1479             if (s->avctx->b_frame_strategy == 0) {
1480                 b_frames = s->max_b_frames;
1481                 while (b_frames && !s->input_picture[b_frames])
1482                     b_frames--;
1483             } else if (s->avctx->b_frame_strategy == 1) {
1484                 for (i = 1; i < s->max_b_frames + 1; i++) {
1485                     if (s->input_picture[i] &&
1486                         s->input_picture[i]->b_frame_score == 0) {
1487                         s->input_picture[i]->b_frame_score =
1488                             get_intra_count(s,
1489                                             s->input_picture[i    ]->f->data[0],
1490                                             s->input_picture[i - 1]->f->data[0],
1491                                             s->linesize) + 1;
1492                     }
1493                 }
1494                 for (i = 0; i < s->max_b_frames + 1; i++) {
1495                     if (!s->input_picture[i] ||
1496                         s->input_picture[i]->b_frame_score - 1 >
1497                             s->mb_num / s->avctx->b_sensitivity)
1498                         break;
1499                 }
1500
1501                 b_frames = FFMAX(0, i - 1);
1502
1503                 /* reset scores */
1504                 for (i = 0; i < b_frames + 1; i++) {
1505                     s->input_picture[i]->b_frame_score = 0;
1506                 }
1507             } else if (s->avctx->b_frame_strategy == 2) {
1508                 b_frames = estimate_best_b_count(s);
1509             } else {
1510                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1511                 b_frames = 0;
1512             }
1513
1514             emms_c();
1515
1516             for (i = b_frames - 1; i >= 0; i--) {
1517                 int type = s->input_picture[i]->f->pict_type;
1518                 if (type && type != AV_PICTURE_TYPE_B)
1519                     b_frames = i;
1520             }
1521             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1522                 b_frames == s->max_b_frames) {
1523                 av_log(s->avctx, AV_LOG_ERROR,
1524                        "warning, too many b frames in a row\n");
1525             }
1526
1527             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1528                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1529                     s->gop_size > s->picture_in_gop_number) {
1530                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1531                 } else {
1532                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1533                         b_frames = 0;
1534                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1535                 }
1536             }
1537
1538             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1539                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1540                 b_frames--;
1541
1542             s->reordered_input_picture[0] = s->input_picture[b_frames];
1543             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1544                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1545             s->reordered_input_picture[0]->f->coded_picture_number =
1546                 s->coded_picture_number++;
1547             for (i = 0; i < b_frames; i++) {
1548                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1549                 s->reordered_input_picture[i + 1]->f->pict_type =
1550                     AV_PICTURE_TYPE_B;
1551                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1552                     s->coded_picture_number++;
1553             }
1554         }
1555     }
1556 no_output_pic:
1557     if (s->reordered_input_picture[0]) {
1558         s->reordered_input_picture[0]->reference =
1559            s->reordered_input_picture[0]->f->pict_type !=
1560                AV_PICTURE_TYPE_B ? 3 : 0;
1561
1562         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1563         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1564             return ret;
1565
1566         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1567             // input is a shared pix, so we can't modifiy it -> alloc a new
1568             // one & ensure that the shared one is reuseable
1569
1570             Picture *pic;
1571             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1572             if (i < 0)
1573                 return i;
1574             pic = &s->picture[i];
1575
1576             pic->reference = s->reordered_input_picture[0]->reference;
1577             if (alloc_picture(s, pic, 0) < 0) {
1578                 return -1;
1579             }
1580
1581             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1582             if (ret < 0)
1583                 return ret;
1584
1585             /* mark us unused / free shared pic */
1586             av_frame_unref(s->reordered_input_picture[0]->f);
1587             s->reordered_input_picture[0]->shared = 0;
1588
1589             s->current_picture_ptr = pic;
1590         } else {
1591             // input is not a shared pix -> reuse buffer for current_pix
1592             s->current_picture_ptr = s->reordered_input_picture[0];
1593             for (i = 0; i < 4; i++) {
1594                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1595             }
1596         }
1597         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1598         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1599                                        s->current_picture_ptr)) < 0)
1600             return ret;
1601
1602         s->picture_number = s->new_picture.f->display_picture_number;
1603     } else {
1604         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1605     }
1606     return 0;
1607 }
1608
1609 static void frame_end(MpegEncContext *s)
1610 {
1611     if (s->unrestricted_mv &&
1612         s->current_picture.reference &&
1613         !s->intra_only) {
1614         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1615         int hshift = desc->log2_chroma_w;
1616         int vshift = desc->log2_chroma_h;
1617         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1618                                 s->current_picture.f->linesize[0],
1619                                 s->h_edge_pos, s->v_edge_pos,
1620                                 EDGE_WIDTH, EDGE_WIDTH,
1621                                 EDGE_TOP | EDGE_BOTTOM);
1622         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1623                                 s->current_picture.f->linesize[1],
1624                                 s->h_edge_pos >> hshift,
1625                                 s->v_edge_pos >> vshift,
1626                                 EDGE_WIDTH >> hshift,
1627                                 EDGE_WIDTH >> vshift,
1628                                 EDGE_TOP | EDGE_BOTTOM);
1629         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1630                                 s->current_picture.f->linesize[2],
1631                                 s->h_edge_pos >> hshift,
1632                                 s->v_edge_pos >> vshift,
1633                                 EDGE_WIDTH >> hshift,
1634                                 EDGE_WIDTH >> vshift,
1635                                 EDGE_TOP | EDGE_BOTTOM);
1636     }
1637
1638     emms_c();
1639
1640     s->last_pict_type                 = s->pict_type;
1641     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1642     if (s->pict_type!= AV_PICTURE_TYPE_B)
1643         s->last_non_b_pict_type = s->pict_type;
1644
1645 #if FF_API_CODED_FRAME
1646 FF_DISABLE_DEPRECATION_WARNINGS
1647     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1648 FF_ENABLE_DEPRECATION_WARNINGS
1649 #endif
1650 }
1651
1652 static void update_noise_reduction(MpegEncContext *s)
1653 {
1654     int intra, i;
1655
1656     for (intra = 0; intra < 2; intra++) {
1657         if (s->dct_count[intra] > (1 << 16)) {
1658             for (i = 0; i < 64; i++) {
1659                 s->dct_error_sum[intra][i] >>= 1;
1660             }
1661             s->dct_count[intra] >>= 1;
1662         }
1663
1664         for (i = 0; i < 64; i++) {
1665             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1666                                        s->dct_count[intra] +
1667                                        s->dct_error_sum[intra][i] / 2) /
1668                                       (s->dct_error_sum[intra][i] + 1);
1669         }
1670     }
1671 }
1672
1673 static int frame_start(MpegEncContext *s)
1674 {
1675     int ret;
1676
1677     /* mark & release old frames */
1678     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1679         s->last_picture_ptr != s->next_picture_ptr &&
1680         s->last_picture_ptr->f->buf[0]) {
1681         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1682     }
1683
1684     s->current_picture_ptr->f->pict_type = s->pict_type;
1685     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1686
1687     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1688     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1689                                    s->current_picture_ptr)) < 0)
1690         return ret;
1691
1692     if (s->pict_type != AV_PICTURE_TYPE_B) {
1693         s->last_picture_ptr = s->next_picture_ptr;
1694         if (!s->droppable)
1695             s->next_picture_ptr = s->current_picture_ptr;
1696     }
1697
1698     if (s->last_picture_ptr) {
1699         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1700         if (s->last_picture_ptr->f->buf[0] &&
1701             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1702                                        s->last_picture_ptr)) < 0)
1703             return ret;
1704     }
1705     if (s->next_picture_ptr) {
1706         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1707         if (s->next_picture_ptr->f->buf[0] &&
1708             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1709                                        s->next_picture_ptr)) < 0)
1710             return ret;
1711     }
1712
1713     if (s->picture_structure!= PICT_FRAME) {
1714         int i;
1715         for (i = 0; i < 4; i++) {
1716             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1717                 s->current_picture.f->data[i] +=
1718                     s->current_picture.f->linesize[i];
1719             }
1720             s->current_picture.f->linesize[i] *= 2;
1721             s->last_picture.f->linesize[i]    *= 2;
1722             s->next_picture.f->linesize[i]    *= 2;
1723         }
1724     }
1725
1726     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1727         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1728         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1729     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1730         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1731         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1732     } else {
1733         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1734         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1735     }
1736
1737     if (s->dct_error_sum) {
1738         av_assert2(s->avctx->noise_reduction && s->encoding);
1739         update_noise_reduction(s);
1740     }
1741
1742     return 0;
1743 }
1744
1745 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1746                           const AVFrame *pic_arg, int *got_packet)
1747 {
1748     MpegEncContext *s = avctx->priv_data;
1749     int i, stuffing_count, ret;
1750     int context_count = s->slice_context_count;
1751
1752     s->vbv_ignore_qmax = 0;
1753
1754     s->picture_in_gop_number++;
1755
1756     if (load_input_picture(s, pic_arg) < 0)
1757         return -1;
1758
1759     if (select_input_picture(s) < 0) {
1760         return -1;
1761     }
1762
1763     /* output? */
1764     if (s->new_picture.f->data[0]) {
1765         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1766         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - AV_INPUT_BUFFER_PADDING_SIZE
1767                                               :
1768                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1769         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size, 0)) < 0)
1770             return ret;
1771         if (s->mb_info) {
1772             s->mb_info_ptr = av_packet_new_side_data(pkt,
1773                                  AV_PKT_DATA_H263_MB_INFO,
1774                                  s->mb_width*s->mb_height*12);
1775             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1776         }
1777
1778         for (i = 0; i < context_count; i++) {
1779             int start_y = s->thread_context[i]->start_mb_y;
1780             int   end_y = s->thread_context[i]->  end_mb_y;
1781             int h       = s->mb_height;
1782             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1783             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1784
1785             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1786         }
1787
1788         s->pict_type = s->new_picture.f->pict_type;
1789         //emms_c();
1790         ret = frame_start(s);
1791         if (ret < 0)
1792             return ret;
1793 vbv_retry:
1794         ret = encode_picture(s, s->picture_number);
1795         if (growing_buffer) {
1796             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1797             pkt->data = s->pb.buf;
1798             pkt->size = avctx->internal->byte_buffer_size;
1799         }
1800         if (ret < 0)
1801             return -1;
1802
1803         avctx->header_bits = s->header_bits;
1804         avctx->mv_bits     = s->mv_bits;
1805         avctx->misc_bits   = s->misc_bits;
1806         avctx->i_tex_bits  = s->i_tex_bits;
1807         avctx->p_tex_bits  = s->p_tex_bits;
1808         avctx->i_count     = s->i_count;
1809         // FIXME f/b_count in avctx
1810         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1811         avctx->skip_count  = s->skip_count;
1812
1813         frame_end(s);
1814
1815         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1816             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1817
1818         if (avctx->rc_buffer_size) {
1819             RateControlContext *rcc = &s->rc_context;
1820             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1821
1822             if (put_bits_count(&s->pb) > max_size &&
1823                 s->lambda < s->lmax) {
1824                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1825                                        (s->qscale + 1) / s->qscale);
1826                 if (s->adaptive_quant) {
1827                     int i;
1828                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1829                         s->lambda_table[i] =
1830                             FFMAX(s->lambda_table[i] + 1,
1831                                   s->lambda_table[i] * (s->qscale + 1) /
1832                                   s->qscale);
1833                 }
1834                 s->mb_skipped = 0;        // done in frame_start()
1835                 // done in encode_picture() so we must undo it
1836                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1837                     if (s->flipflop_rounding          ||
1838                         s->codec_id == AV_CODEC_ID_H263P ||
1839                         s->codec_id == AV_CODEC_ID_MPEG4)
1840                         s->no_rounding ^= 1;
1841                 }
1842                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1843                     s->time_base       = s->last_time_base;
1844                     s->last_non_b_time = s->time - s->pp_time;
1845                 }
1846                 for (i = 0; i < context_count; i++) {
1847                     PutBitContext *pb = &s->thread_context[i]->pb;
1848                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1849                 }
1850                 s->vbv_ignore_qmax = 1;
1851                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1852                 goto vbv_retry;
1853             }
1854
1855             av_assert0(s->avctx->rc_max_rate);
1856         }
1857
1858         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1859             ff_write_pass1_stats(s);
1860
1861         for (i = 0; i < 4; i++) {
1862             s->current_picture_ptr->f->error[i] =
1863             s->current_picture.f->error[i] =
1864                 s->current_picture.error[i];
1865             avctx->error[i] += s->current_picture_ptr->f->error[i];
1866         }
1867         ff_side_data_set_encoder_stats(pkt, s->current_picture.f->quality,
1868                                        s->current_picture_ptr->f->error,
1869                                        (s->avctx->flags&AV_CODEC_FLAG_PSNR) ? 4 : 0,
1870                                        s->pict_type);
1871
1872         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1873             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1874                    avctx->i_tex_bits + avctx->p_tex_bits ==
1875                        put_bits_count(&s->pb));
1876         flush_put_bits(&s->pb);
1877         s->frame_bits  = put_bits_count(&s->pb);
1878
1879         stuffing_count = ff_vbv_update(s, s->frame_bits);
1880         s->stuffing_bits = 8*stuffing_count;
1881         if (stuffing_count) {
1882             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1883                     stuffing_count + 50) {
1884                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1885                 return -1;
1886             }
1887
1888             switch (s->codec_id) {
1889             case AV_CODEC_ID_MPEG1VIDEO:
1890             case AV_CODEC_ID_MPEG2VIDEO:
1891                 while (stuffing_count--) {
1892                     put_bits(&s->pb, 8, 0);
1893                 }
1894             break;
1895             case AV_CODEC_ID_MPEG4:
1896                 put_bits(&s->pb, 16, 0);
1897                 put_bits(&s->pb, 16, 0x1C3);
1898                 stuffing_count -= 4;
1899                 while (stuffing_count--) {
1900                     put_bits(&s->pb, 8, 0xFF);
1901                 }
1902             break;
1903             default:
1904                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1905             }
1906             flush_put_bits(&s->pb);
1907             s->frame_bits  = put_bits_count(&s->pb);
1908         }
1909
1910         /* update mpeg1/2 vbv_delay for CBR */
1911         if (s->avctx->rc_max_rate                          &&
1912             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1913             s->out_format == FMT_MPEG1                     &&
1914             90000LL * (avctx->rc_buffer_size - 1) <=
1915                 s->avctx->rc_max_rate * 0xFFFFLL) {
1916             int vbv_delay, min_delay;
1917             double inbits  = s->avctx->rc_max_rate *
1918                              av_q2d(s->avctx->time_base);
1919             int    minbits = s->frame_bits - 8 *
1920                              (s->vbv_delay_ptr - s->pb.buf - 1);
1921             double bits    = s->rc_context.buffer_index + minbits - inbits;
1922
1923             if (bits < 0)
1924                 av_log(s->avctx, AV_LOG_ERROR,
1925                        "Internal error, negative bits\n");
1926
1927             assert(s->repeat_first_field == 0);
1928
1929             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1930             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1931                         s->avctx->rc_max_rate;
1932
1933             vbv_delay = FFMAX(vbv_delay, min_delay);
1934
1935             av_assert0(vbv_delay < 0xFFFF);
1936
1937             s->vbv_delay_ptr[0] &= 0xF8;
1938             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1939             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1940             s->vbv_delay_ptr[2] &= 0x07;
1941             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1942             avctx->vbv_delay     = vbv_delay * 300;
1943         }
1944         s->total_bits     += s->frame_bits;
1945         avctx->frame_bits  = s->frame_bits;
1946
1947         pkt->pts = s->current_picture.f->pts;
1948         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1949             if (!s->current_picture.f->coded_picture_number)
1950                 pkt->dts = pkt->pts - s->dts_delta;
1951             else
1952                 pkt->dts = s->reordered_pts;
1953             s->reordered_pts = pkt->pts;
1954         } else
1955             pkt->dts = pkt->pts;
1956         if (s->current_picture.f->key_frame)
1957             pkt->flags |= AV_PKT_FLAG_KEY;
1958         if (s->mb_info)
1959             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1960     } else {
1961         s->frame_bits = 0;
1962     }
1963
1964     /* release non-reference frames */
1965     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1966         if (!s->picture[i].reference)
1967             ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1968     }
1969
1970     av_assert1((s->frame_bits & 7) == 0);
1971
1972     pkt->size = s->frame_bits / 8;
1973     *got_packet = !!pkt->size;
1974     return 0;
1975 }
1976
1977 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1978                                                 int n, int threshold)
1979 {
1980     static const char tab[64] = {
1981         3, 2, 2, 1, 1, 1, 1, 1,
1982         1, 1, 1, 1, 1, 1, 1, 1,
1983         1, 1, 1, 1, 1, 1, 1, 1,
1984         0, 0, 0, 0, 0, 0, 0, 0,
1985         0, 0, 0, 0, 0, 0, 0, 0,
1986         0, 0, 0, 0, 0, 0, 0, 0,
1987         0, 0, 0, 0, 0, 0, 0, 0,
1988         0, 0, 0, 0, 0, 0, 0, 0
1989     };
1990     int score = 0;
1991     int run = 0;
1992     int i;
1993     int16_t *block = s->block[n];
1994     const int last_index = s->block_last_index[n];
1995     int skip_dc;
1996
1997     if (threshold < 0) {
1998         skip_dc = 0;
1999         threshold = -threshold;
2000     } else
2001         skip_dc = 1;
2002
2003     /* Are all we could set to zero already zero? */
2004     if (last_index <= skip_dc - 1)
2005         return;
2006
2007     for (i = 0; i <= last_index; i++) {
2008         const int j = s->intra_scantable.permutated[i];
2009         const int level = FFABS(block[j]);
2010         if (level == 1) {
2011             if (skip_dc && i == 0)
2012                 continue;
2013             score += tab[run];
2014             run = 0;
2015         } else if (level > 1) {
2016             return;
2017         } else {
2018             run++;
2019         }
2020     }
2021     if (score >= threshold)
2022         return;
2023     for (i = skip_dc; i <= last_index; i++) {
2024         const int j = s->intra_scantable.permutated[i];
2025         block[j] = 0;
2026     }
2027     if (block[0])
2028         s->block_last_index[n] = 0;
2029     else
2030         s->block_last_index[n] = -1;
2031 }
2032
2033 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
2034                                int last_index)
2035 {
2036     int i;
2037     const int maxlevel = s->max_qcoeff;
2038     const int minlevel = s->min_qcoeff;
2039     int overflow = 0;
2040
2041     if (s->mb_intra) {
2042         i = 1; // skip clipping of intra dc
2043     } else
2044         i = 0;
2045
2046     for (; i <= last_index; i++) {
2047         const int j = s->intra_scantable.permutated[i];
2048         int level = block[j];
2049
2050         if (level > maxlevel) {
2051             level = maxlevel;
2052             overflow++;
2053         } else if (level < minlevel) {
2054             level = minlevel;
2055             overflow++;
2056         }
2057
2058         block[j] = level;
2059     }
2060
2061     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2062         av_log(s->avctx, AV_LOG_INFO,
2063                "warning, clipping %d dct coefficients to %d..%d\n",
2064                overflow, minlevel, maxlevel);
2065 }
2066
2067 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2068 {
2069     int x, y;
2070     // FIXME optimize
2071     for (y = 0; y < 8; y++) {
2072         for (x = 0; x < 8; x++) {
2073             int x2, y2;
2074             int sum = 0;
2075             int sqr = 0;
2076             int count = 0;
2077
2078             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2079                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2080                     int v = ptr[x2 + y2 * stride];
2081                     sum += v;
2082                     sqr += v * v;
2083                     count++;
2084                 }
2085             }
2086             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2087         }
2088     }
2089 }
2090
2091 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2092                                                 int motion_x, int motion_y,
2093                                                 int mb_block_height,
2094                                                 int mb_block_width,
2095                                                 int mb_block_count)
2096 {
2097     int16_t weight[12][64];
2098     int16_t orig[12][64];
2099     const int mb_x = s->mb_x;
2100     const int mb_y = s->mb_y;
2101     int i;
2102     int skip_dct[12];
2103     int dct_offset = s->linesize * 8; // default for progressive frames
2104     int uv_dct_offset = s->uvlinesize * 8;
2105     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2106     ptrdiff_t wrap_y, wrap_c;
2107
2108     for (i = 0; i < mb_block_count; i++)
2109         skip_dct[i] = s->skipdct;
2110
2111     if (s->adaptive_quant) {
2112         const int last_qp = s->qscale;
2113         const int mb_xy = mb_x + mb_y * s->mb_stride;
2114
2115         s->lambda = s->lambda_table[mb_xy];
2116         update_qscale(s);
2117
2118         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2119             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2120             s->dquant = s->qscale - last_qp;
2121
2122             if (s->out_format == FMT_H263) {
2123                 s->dquant = av_clip(s->dquant, -2, 2);
2124
2125                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2126                     if (!s->mb_intra) {
2127                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2128                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2129                                 s->dquant = 0;
2130                         }
2131                         if (s->mv_type == MV_TYPE_8X8)
2132                             s->dquant = 0;
2133                     }
2134                 }
2135             }
2136         }
2137         ff_set_qscale(s, last_qp + s->dquant);
2138     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2139         ff_set_qscale(s, s->qscale + s->dquant);
2140
2141     wrap_y = s->linesize;
2142     wrap_c = s->uvlinesize;
2143     ptr_y  = s->new_picture.f->data[0] +
2144              (mb_y * 16 * wrap_y)              + mb_x * 16;
2145     ptr_cb = s->new_picture.f->data[1] +
2146              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2147     ptr_cr = s->new_picture.f->data[2] +
2148              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2149
2150     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2151         uint8_t *ebuf = s->sc.edge_emu_buffer + 36 * wrap_y;
2152         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2153         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2154         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2155                                  wrap_y, wrap_y,
2156                                  16, 16, mb_x * 16, mb_y * 16,
2157                                  s->width, s->height);
2158         ptr_y = ebuf;
2159         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2160                                  wrap_c, wrap_c,
2161                                  mb_block_width, mb_block_height,
2162                                  mb_x * mb_block_width, mb_y * mb_block_height,
2163                                  cw, ch);
2164         ptr_cb = ebuf + 16 * wrap_y;
2165         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2166                                  wrap_c, wrap_c,
2167                                  mb_block_width, mb_block_height,
2168                                  mb_x * mb_block_width, mb_y * mb_block_height,
2169                                  cw, ch);
2170         ptr_cr = ebuf + 16 * wrap_y + 16;
2171     }
2172
2173     if (s->mb_intra) {
2174         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2175             int progressive_score, interlaced_score;
2176
2177             s->interlaced_dct = 0;
2178             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2179                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2180                                                      NULL, wrap_y, 8) - 400;
2181
2182             if (progressive_score > 0) {
2183                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2184                                                         NULL, wrap_y * 2, 8) +
2185                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2186                                                         NULL, wrap_y * 2, 8);
2187                 if (progressive_score > interlaced_score) {
2188                     s->interlaced_dct = 1;
2189
2190                     dct_offset = wrap_y;
2191                     uv_dct_offset = wrap_c;
2192                     wrap_y <<= 1;
2193                     if (s->chroma_format == CHROMA_422 ||
2194                         s->chroma_format == CHROMA_444)
2195                         wrap_c <<= 1;
2196                 }
2197             }
2198         }
2199
2200         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2201         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2202         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2203         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2204
2205         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2206             skip_dct[4] = 1;
2207             skip_dct[5] = 1;
2208         } else {
2209             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2210             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2211             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2212                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2213                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2214             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2215                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2216                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2217                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2218                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2219                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2220                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2221             }
2222         }
2223     } else {
2224         op_pixels_func (*op_pix)[4];
2225         qpel_mc_func (*op_qpix)[16];
2226         uint8_t *dest_y, *dest_cb, *dest_cr;
2227
2228         dest_y  = s->dest[0];
2229         dest_cb = s->dest[1];
2230         dest_cr = s->dest[2];
2231
2232         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2233             op_pix  = s->hdsp.put_pixels_tab;
2234             op_qpix = s->qdsp.put_qpel_pixels_tab;
2235         } else {
2236             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2237             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2238         }
2239
2240         if (s->mv_dir & MV_DIR_FORWARD) {
2241             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2242                           s->last_picture.f->data,
2243                           op_pix, op_qpix);
2244             op_pix  = s->hdsp.avg_pixels_tab;
2245             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2246         }
2247         if (s->mv_dir & MV_DIR_BACKWARD) {
2248             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2249                           s->next_picture.f->data,
2250                           op_pix, op_qpix);
2251         }
2252
2253         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2254             int progressive_score, interlaced_score;
2255
2256             s->interlaced_dct = 0;
2257             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2258                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2259                                                      ptr_y + wrap_y * 8,
2260                                                      wrap_y, 8) - 400;
2261
2262             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2263                 progressive_score -= 400;
2264
2265             if (progressive_score > 0) {
2266                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2267                                                         wrap_y * 2, 8) +
2268                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2269                                                         ptr_y + wrap_y,
2270                                                         wrap_y * 2, 8);
2271
2272                 if (progressive_score > interlaced_score) {
2273                     s->interlaced_dct = 1;
2274
2275                     dct_offset = wrap_y;
2276                     uv_dct_offset = wrap_c;
2277                     wrap_y <<= 1;
2278                     if (s->chroma_format == CHROMA_422)
2279                         wrap_c <<= 1;
2280                 }
2281             }
2282         }
2283
2284         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2285         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2286         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2287                             dest_y + dct_offset, wrap_y);
2288         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2289                             dest_y + dct_offset + 8, wrap_y);
2290
2291         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2292             skip_dct[4] = 1;
2293             skip_dct[5] = 1;
2294         } else {
2295             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2296             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2297             if (!s->chroma_y_shift) { /* 422 */
2298                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2299                                     dest_cb + uv_dct_offset, wrap_c);
2300                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2301                                     dest_cr + uv_dct_offset, wrap_c);
2302             }
2303         }
2304         /* pre quantization */
2305         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2306                 2 * s->qscale * s->qscale) {
2307             // FIXME optimize
2308             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2309                 skip_dct[0] = 1;
2310             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2311                 skip_dct[1] = 1;
2312             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2313                                wrap_y, 8) < 20 * s->qscale)
2314                 skip_dct[2] = 1;
2315             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2316                                wrap_y, 8) < 20 * s->qscale)
2317                 skip_dct[3] = 1;
2318             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2319                 skip_dct[4] = 1;
2320             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2321                 skip_dct[5] = 1;
2322             if (!s->chroma_y_shift) { /* 422 */
2323                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2324                                    dest_cb + uv_dct_offset,
2325                                    wrap_c, 8) < 20 * s->qscale)
2326                     skip_dct[6] = 1;
2327                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2328                                    dest_cr + uv_dct_offset,
2329                                    wrap_c, 8) < 20 * s->qscale)
2330                     skip_dct[7] = 1;
2331             }
2332         }
2333     }
2334
2335     if (s->quantizer_noise_shaping) {
2336         if (!skip_dct[0])
2337             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2338         if (!skip_dct[1])
2339             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2340         if (!skip_dct[2])
2341             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2342         if (!skip_dct[3])
2343             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2344         if (!skip_dct[4])
2345             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2346         if (!skip_dct[5])
2347             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2348         if (!s->chroma_y_shift) { /* 422 */
2349             if (!skip_dct[6])
2350                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2351                                   wrap_c);
2352             if (!skip_dct[7])
2353                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2354                                   wrap_c);
2355         }
2356         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2357     }
2358
2359     /* DCT & quantize */
2360     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2361     {
2362         for (i = 0; i < mb_block_count; i++) {
2363             if (!skip_dct[i]) {
2364                 int overflow;
2365                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2366                 // FIXME we could decide to change to quantizer instead of
2367                 // clipping
2368                 // JS: I don't think that would be a good idea it could lower
2369                 //     quality instead of improve it. Just INTRADC clipping
2370                 //     deserves changes in quantizer
2371                 if (overflow)
2372                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2373             } else
2374                 s->block_last_index[i] = -1;
2375         }
2376         if (s->quantizer_noise_shaping) {
2377             for (i = 0; i < mb_block_count; i++) {
2378                 if (!skip_dct[i]) {
2379                     s->block_last_index[i] =
2380                         dct_quantize_refine(s, s->block[i], weight[i],
2381                                             orig[i], i, s->qscale);
2382                 }
2383             }
2384         }
2385
2386         if (s->luma_elim_threshold && !s->mb_intra)
2387             for (i = 0; i < 4; i++)
2388                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2389         if (s->chroma_elim_threshold && !s->mb_intra)
2390             for (i = 4; i < mb_block_count; i++)
2391                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2392
2393         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2394             for (i = 0; i < mb_block_count; i++) {
2395                 if (s->block_last_index[i] == -1)
2396                     s->coded_score[i] = INT_MAX / 256;
2397             }
2398         }
2399     }
2400
2401     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2402         s->block_last_index[4] =
2403         s->block_last_index[5] = 0;
2404         s->block[4][0] =
2405         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2406         if (!s->chroma_y_shift) { /* 422 / 444 */
2407             for (i=6; i<12; i++) {
2408                 s->block_last_index[i] = 0;
2409                 s->block[i][0] = s->block[4][0];
2410             }
2411         }
2412     }
2413
2414     // non c quantize code returns incorrect block_last_index FIXME
2415     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2416         for (i = 0; i < mb_block_count; i++) {
2417             int j;
2418             if (s->block_last_index[i] > 0) {
2419                 for (j = 63; j > 0; j--) {
2420                     if (s->block[i][s->intra_scantable.permutated[j]])
2421                         break;
2422                 }
2423                 s->block_last_index[i] = j;
2424             }
2425         }
2426     }
2427
2428     /* huffman encode */
2429     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2430     case AV_CODEC_ID_MPEG1VIDEO:
2431     case AV_CODEC_ID_MPEG2VIDEO:
2432         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2433             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2434         break;
2435     case AV_CODEC_ID_MPEG4:
2436         if (CONFIG_MPEG4_ENCODER)
2437             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2438         break;
2439     case AV_CODEC_ID_MSMPEG4V2:
2440     case AV_CODEC_ID_MSMPEG4V3:
2441     case AV_CODEC_ID_WMV1:
2442         if (CONFIG_MSMPEG4_ENCODER)
2443             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2444         break;
2445     case AV_CODEC_ID_WMV2:
2446         if (CONFIG_WMV2_ENCODER)
2447             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2448         break;
2449     case AV_CODEC_ID_H261:
2450         if (CONFIG_H261_ENCODER)
2451             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2452         break;
2453     case AV_CODEC_ID_H263:
2454     case AV_CODEC_ID_H263P:
2455     case AV_CODEC_ID_FLV1:
2456     case AV_CODEC_ID_RV10:
2457     case AV_CODEC_ID_RV20:
2458         if (CONFIG_H263_ENCODER)
2459             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2460         break;
2461     case AV_CODEC_ID_MJPEG:
2462     case AV_CODEC_ID_AMV:
2463         if (CONFIG_MJPEG_ENCODER)
2464             ff_mjpeg_encode_mb(s, s->block);
2465         break;
2466     default:
2467         av_assert1(0);
2468     }
2469 }
2470
2471 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2472 {
2473     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2474     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2475     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2476 }
2477
2478 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2479     int i;
2480
2481     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2482
2483     /* mpeg1 */
2484     d->mb_skip_run= s->mb_skip_run;
2485     for(i=0; i<3; i++)
2486         d->last_dc[i] = s->last_dc[i];
2487
2488     /* statistics */
2489     d->mv_bits= s->mv_bits;
2490     d->i_tex_bits= s->i_tex_bits;
2491     d->p_tex_bits= s->p_tex_bits;
2492     d->i_count= s->i_count;
2493     d->f_count= s->f_count;
2494     d->b_count= s->b_count;
2495     d->skip_count= s->skip_count;
2496     d->misc_bits= s->misc_bits;
2497     d->last_bits= 0;
2498
2499     d->mb_skipped= 0;
2500     d->qscale= s->qscale;
2501     d->dquant= s->dquant;
2502
2503     d->esc3_level_length= s->esc3_level_length;
2504 }
2505
2506 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2507     int i;
2508
2509     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2510     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2511
2512     /* mpeg1 */
2513     d->mb_skip_run= s->mb_skip_run;
2514     for(i=0; i<3; i++)
2515         d->last_dc[i] = s->last_dc[i];
2516
2517     /* statistics */
2518     d->mv_bits= s->mv_bits;
2519     d->i_tex_bits= s->i_tex_bits;
2520     d->p_tex_bits= s->p_tex_bits;
2521     d->i_count= s->i_count;
2522     d->f_count= s->f_count;
2523     d->b_count= s->b_count;
2524     d->skip_count= s->skip_count;
2525     d->misc_bits= s->misc_bits;
2526
2527     d->mb_intra= s->mb_intra;
2528     d->mb_skipped= s->mb_skipped;
2529     d->mv_type= s->mv_type;
2530     d->mv_dir= s->mv_dir;
2531     d->pb= s->pb;
2532     if(s->data_partitioning){
2533         d->pb2= s->pb2;
2534         d->tex_pb= s->tex_pb;
2535     }
2536     d->block= s->block;
2537     for(i=0; i<8; i++)
2538         d->block_last_index[i]= s->block_last_index[i];
2539     d->interlaced_dct= s->interlaced_dct;
2540     d->qscale= s->qscale;
2541
2542     d->esc3_level_length= s->esc3_level_length;
2543 }
2544
2545 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2546                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2547                            int *dmin, int *next_block, int motion_x, int motion_y)
2548 {
2549     int score;
2550     uint8_t *dest_backup[3];
2551
2552     copy_context_before_encode(s, backup, type);
2553
2554     s->block= s->blocks[*next_block];
2555     s->pb= pb[*next_block];
2556     if(s->data_partitioning){
2557         s->pb2   = pb2   [*next_block];
2558         s->tex_pb= tex_pb[*next_block];
2559     }
2560
2561     if(*next_block){
2562         memcpy(dest_backup, s->dest, sizeof(s->dest));
2563         s->dest[0] = s->sc.rd_scratchpad;
2564         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2565         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2566         av_assert0(s->linesize >= 32); //FIXME
2567     }
2568
2569     encode_mb(s, motion_x, motion_y);
2570
2571     score= put_bits_count(&s->pb);
2572     if(s->data_partitioning){
2573         score+= put_bits_count(&s->pb2);
2574         score+= put_bits_count(&s->tex_pb);
2575     }
2576
2577     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2578         ff_mpv_decode_mb(s, s->block);
2579
2580         score *= s->lambda2;
2581         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2582     }
2583
2584     if(*next_block){
2585         memcpy(s->dest, dest_backup, sizeof(s->dest));
2586     }
2587
2588     if(score<*dmin){
2589         *dmin= score;
2590         *next_block^=1;
2591
2592         copy_context_after_encode(best, s, type);
2593     }
2594 }
2595
2596 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2597     uint32_t *sq = ff_square_tab + 256;
2598     int acc=0;
2599     int x,y;
2600
2601     if(w==16 && h==16)
2602         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2603     else if(w==8 && h==8)
2604         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2605
2606     for(y=0; y<h; y++){
2607         for(x=0; x<w; x++){
2608             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2609         }
2610     }
2611
2612     av_assert2(acc>=0);
2613
2614     return acc;
2615 }
2616
2617 static int sse_mb(MpegEncContext *s){
2618     int w= 16;
2619     int h= 16;
2620
2621     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2622     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2623
2624     if(w==16 && h==16)
2625       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2626         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2627                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2628                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2629       }else{
2630         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2631                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2632                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2633       }
2634     else
2635         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2636                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2637                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2638 }
2639
2640 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2641     MpegEncContext *s= *(void**)arg;
2642
2643
2644     s->me.pre_pass=1;
2645     s->me.dia_size= s->avctx->pre_dia_size;
2646     s->first_slice_line=1;
2647     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2648         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2649             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2650         }
2651         s->first_slice_line=0;
2652     }
2653
2654     s->me.pre_pass=0;
2655
2656     return 0;
2657 }
2658
2659 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2660     MpegEncContext *s= *(void**)arg;
2661
2662     ff_check_alignment();
2663
2664     s->me.dia_size= s->avctx->dia_size;
2665     s->first_slice_line=1;
2666     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2667         s->mb_x=0; //for block init below
2668         ff_init_block_index(s);
2669         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2670             s->block_index[0]+=2;
2671             s->block_index[1]+=2;
2672             s->block_index[2]+=2;
2673             s->block_index[3]+=2;
2674
2675             /* compute motion vector & mb_type and store in context */
2676             if(s->pict_type==AV_PICTURE_TYPE_B)
2677                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2678             else
2679                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2680         }
2681         s->first_slice_line=0;
2682     }
2683     return 0;
2684 }
2685
2686 static int mb_var_thread(AVCodecContext *c, void *arg){
2687     MpegEncContext *s= *(void**)arg;
2688     int mb_x, mb_y;
2689
2690     ff_check_alignment();
2691
2692     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2693         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2694             int xx = mb_x * 16;
2695             int yy = mb_y * 16;
2696             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2697             int varc;
2698             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2699
2700             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2701                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2702
2703             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2704             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2705             s->me.mb_var_sum_temp    += varc;
2706         }
2707     }
2708     return 0;
2709 }
2710
2711 static void write_slice_end(MpegEncContext *s){
2712     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2713         if(s->partitioned_frame){
2714             ff_mpeg4_merge_partitions(s);
2715         }
2716
2717         ff_mpeg4_stuffing(&s->pb);
2718     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2719         ff_mjpeg_encode_stuffing(s);
2720     }
2721
2722     avpriv_align_put_bits(&s->pb);
2723     flush_put_bits(&s->pb);
2724
2725     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2726         s->misc_bits+= get_bits_diff(s);
2727 }
2728
2729 static void write_mb_info(MpegEncContext *s)
2730 {
2731     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2732     int offset = put_bits_count(&s->pb);
2733     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2734     int gobn = s->mb_y / s->gob_index;
2735     int pred_x, pred_y;
2736     if (CONFIG_H263_ENCODER)
2737         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2738     bytestream_put_le32(&ptr, offset);
2739     bytestream_put_byte(&ptr, s->qscale);
2740     bytestream_put_byte(&ptr, gobn);
2741     bytestream_put_le16(&ptr, mba);
2742     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2743     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2744     /* 4MV not implemented */
2745     bytestream_put_byte(&ptr, 0); /* hmv2 */
2746     bytestream_put_byte(&ptr, 0); /* vmv2 */
2747 }
2748
2749 static void update_mb_info(MpegEncContext *s, int startcode)
2750 {
2751     if (!s->mb_info)
2752         return;
2753     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2754         s->mb_info_size += 12;
2755         s->prev_mb_info = s->last_mb_info;
2756     }
2757     if (startcode) {
2758         s->prev_mb_info = put_bits_count(&s->pb)/8;
2759         /* This might have incremented mb_info_size above, and we return without
2760          * actually writing any info into that slot yet. But in that case,
2761          * this will be called again at the start of the after writing the
2762          * start code, actually writing the mb info. */
2763         return;
2764     }
2765
2766     s->last_mb_info = put_bits_count(&s->pb)/8;
2767     if (!s->mb_info_size)
2768         s->mb_info_size += 12;
2769     write_mb_info(s);
2770 }
2771
2772 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2773 {
2774     if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold
2775         && s->slice_context_count == 1
2776         && s->pb.buf == s->avctx->internal->byte_buffer) {
2777         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2778         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2779
2780         uint8_t *new_buffer = NULL;
2781         int new_buffer_size = 0;
2782
2783         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2784                               s->avctx->internal->byte_buffer_size + size_increase);
2785         if (!new_buffer)
2786             return AVERROR(ENOMEM);
2787
2788         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2789         av_free(s->avctx->internal->byte_buffer);
2790         s->avctx->internal->byte_buffer      = new_buffer;
2791         s->avctx->internal->byte_buffer_size = new_buffer_size;
2792         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2793         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2794         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2795     }
2796     if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold)
2797         return AVERROR(EINVAL);
2798     return 0;
2799 }
2800
2801 static int encode_thread(AVCodecContext *c, void *arg){
2802     MpegEncContext *s= *(void**)arg;
2803     int mb_x, mb_y, pdif = 0;
2804     int chr_h= 16>>s->chroma_y_shift;
2805     int i, j;
2806     MpegEncContext best_s = { 0 }, backup_s;
2807     uint8_t bit_buf[2][MAX_MB_BYTES];
2808     uint8_t bit_buf2[2][MAX_MB_BYTES];
2809     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2810     PutBitContext pb[2], pb2[2], tex_pb[2];
2811
2812     ff_check_alignment();
2813
2814     for(i=0; i<2; i++){
2815         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2816         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2817         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2818     }
2819
2820     s->last_bits= put_bits_count(&s->pb);
2821     s->mv_bits=0;
2822     s->misc_bits=0;
2823     s->i_tex_bits=0;
2824     s->p_tex_bits=0;
2825     s->i_count=0;
2826     s->f_count=0;
2827     s->b_count=0;
2828     s->skip_count=0;
2829
2830     for(i=0; i<3; i++){
2831         /* init last dc values */
2832         /* note: quant matrix value (8) is implied here */
2833         s->last_dc[i] = 128 << s->intra_dc_precision;
2834
2835         s->current_picture.error[i] = 0;
2836     }
2837     if(s->codec_id==AV_CODEC_ID_AMV){
2838         s->last_dc[0] = 128*8/13;
2839         s->last_dc[1] = 128*8/14;
2840         s->last_dc[2] = 128*8/14;
2841     }
2842     s->mb_skip_run = 0;
2843     memset(s->last_mv, 0, sizeof(s->last_mv));
2844
2845     s->last_mv_dir = 0;
2846
2847     switch(s->codec_id){
2848     case AV_CODEC_ID_H263:
2849     case AV_CODEC_ID_H263P:
2850     case AV_CODEC_ID_FLV1:
2851         if (CONFIG_H263_ENCODER)
2852             s->gob_index = H263_GOB_HEIGHT(s->height);
2853         break;
2854     case AV_CODEC_ID_MPEG4:
2855         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2856             ff_mpeg4_init_partitions(s);
2857         break;
2858     }
2859
2860     s->resync_mb_x=0;
2861     s->resync_mb_y=0;
2862     s->first_slice_line = 1;
2863     s->ptr_lastgob = s->pb.buf;
2864     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2865         s->mb_x=0;
2866         s->mb_y= mb_y;
2867
2868         ff_set_qscale(s, s->qscale);
2869         ff_init_block_index(s);
2870
2871         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2872             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2873             int mb_type= s->mb_type[xy];
2874 //            int d;
2875             int dmin= INT_MAX;
2876             int dir;
2877             int size_increase =  s->avctx->internal->byte_buffer_size/4
2878                                + s->mb_width*MAX_MB_BYTES;
2879
2880             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2881             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2882                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2883                 return -1;
2884             }
2885             if(s->data_partitioning){
2886                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2887                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2888                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2889                     return -1;
2890                 }
2891             }
2892
2893             s->mb_x = mb_x;
2894             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2895             ff_update_block_index(s);
2896
2897             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2898                 ff_h261_reorder_mb_index(s);
2899                 xy= s->mb_y*s->mb_stride + s->mb_x;
2900                 mb_type= s->mb_type[xy];
2901             }
2902
2903             /* write gob / video packet header  */
2904             if(s->rtp_mode){
2905                 int current_packet_size, is_gob_start;
2906
2907                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2908
2909                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2910
2911                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2912
2913                 switch(s->codec_id){
2914                 case AV_CODEC_ID_H263:
2915                 case AV_CODEC_ID_H263P:
2916                     if(!s->h263_slice_structured)
2917                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2918                     break;
2919                 case AV_CODEC_ID_MPEG2VIDEO:
2920                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2921                 case AV_CODEC_ID_MPEG1VIDEO:
2922                     if(s->mb_skip_run) is_gob_start=0;
2923                     break;
2924                 case AV_CODEC_ID_MJPEG:
2925                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2926                     break;
2927                 }
2928
2929                 if(is_gob_start){
2930                     if(s->start_mb_y != mb_y || mb_x!=0){
2931                         write_slice_end(s);
2932
2933                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2934                             ff_mpeg4_init_partitions(s);
2935                         }
2936                     }
2937
2938                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2939                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2940
2941                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2942                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2943                         int d = 100 / s->error_rate;
2944                         if(r % d == 0){
2945                             current_packet_size=0;
2946                             s->pb.buf_ptr= s->ptr_lastgob;
2947                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2948                         }
2949                     }
2950
2951                     if (s->avctx->rtp_callback){
2952                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2953                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2954                     }
2955                     update_mb_info(s, 1);
2956
2957                     switch(s->codec_id){
2958                     case AV_CODEC_ID_MPEG4:
2959                         if (CONFIG_MPEG4_ENCODER) {
2960                             ff_mpeg4_encode_video_packet_header(s);
2961                             ff_mpeg4_clean_buffers(s);
2962                         }
2963                     break;
2964                     case AV_CODEC_ID_MPEG1VIDEO:
2965                     case AV_CODEC_ID_MPEG2VIDEO:
2966                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2967                             ff_mpeg1_encode_slice_header(s);
2968                             ff_mpeg1_clean_buffers(s);
2969                         }
2970                     break;
2971                     case AV_CODEC_ID_H263:
2972                     case AV_CODEC_ID_H263P:
2973                         if (CONFIG_H263_ENCODER)
2974                             ff_h263_encode_gob_header(s, mb_y);
2975                     break;
2976                     }
2977
2978                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2979                         int bits= put_bits_count(&s->pb);
2980                         s->misc_bits+= bits - s->last_bits;
2981                         s->last_bits= bits;
2982                     }
2983
2984                     s->ptr_lastgob += current_packet_size;
2985                     s->first_slice_line=1;
2986                     s->resync_mb_x=mb_x;
2987                     s->resync_mb_y=mb_y;
2988                 }
2989             }
2990
2991             if(  (s->resync_mb_x   == s->mb_x)
2992                && s->resync_mb_y+1 == s->mb_y){
2993                 s->first_slice_line=0;
2994             }
2995
2996             s->mb_skipped=0;
2997             s->dquant=0; //only for QP_RD
2998
2999             update_mb_info(s, 0);
3000
3001             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
3002                 int next_block=0;
3003                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
3004
3005                 copy_context_before_encode(&backup_s, s, -1);
3006                 backup_s.pb= s->pb;
3007                 best_s.data_partitioning= s->data_partitioning;
3008                 best_s.partitioned_frame= s->partitioned_frame;
3009                 if(s->data_partitioning){
3010                     backup_s.pb2= s->pb2;
3011                     backup_s.tex_pb= s->tex_pb;
3012                 }
3013
3014                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
3015                     s->mv_dir = MV_DIR_FORWARD;
3016                     s->mv_type = MV_TYPE_16X16;
3017                     s->mb_intra= 0;
3018                     s->mv[0][0][0] = s->p_mv_table[xy][0];
3019                     s->mv[0][0][1] = s->p_mv_table[xy][1];
3020                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
3021                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3022                 }
3023                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
3024                     s->mv_dir = MV_DIR_FORWARD;
3025                     s->mv_type = MV_TYPE_FIELD;
3026                     s->mb_intra= 0;
3027                     for(i=0; i<2; i++){
3028                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3029                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3030                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3031                     }
3032                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
3033                                  &dmin, &next_block, 0, 0);
3034                 }
3035                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
3036                     s->mv_dir = MV_DIR_FORWARD;
3037                     s->mv_type = MV_TYPE_16X16;
3038                     s->mb_intra= 0;
3039                     s->mv[0][0][0] = 0;
3040                     s->mv[0][0][1] = 0;
3041                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
3042                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3043                 }
3044                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3045                     s->mv_dir = MV_DIR_FORWARD;
3046                     s->mv_type = MV_TYPE_8X8;
3047                     s->mb_intra= 0;
3048                     for(i=0; i<4; i++){
3049                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3050                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3051                     }
3052                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3053                                  &dmin, &next_block, 0, 0);
3054                 }
3055                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3056                     s->mv_dir = MV_DIR_FORWARD;
3057                     s->mv_type = MV_TYPE_16X16;
3058                     s->mb_intra= 0;
3059                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3060                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3061                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3062                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3063                 }
3064                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3065                     s->mv_dir = MV_DIR_BACKWARD;
3066                     s->mv_type = MV_TYPE_16X16;
3067                     s->mb_intra= 0;
3068                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3069                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3070                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3071                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3072                 }
3073                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3074                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3075                     s->mv_type = MV_TYPE_16X16;
3076                     s->mb_intra= 0;
3077                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3078                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3079                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3080                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3081                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3082                                  &dmin, &next_block, 0, 0);
3083                 }
3084                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3085                     s->mv_dir = MV_DIR_FORWARD;
3086                     s->mv_type = MV_TYPE_FIELD;
3087                     s->mb_intra= 0;
3088                     for(i=0; i<2; i++){
3089                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3090                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3091                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3092                     }
3093                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3094                                  &dmin, &next_block, 0, 0);
3095                 }
3096                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3097                     s->mv_dir = MV_DIR_BACKWARD;
3098                     s->mv_type = MV_TYPE_FIELD;
3099                     s->mb_intra= 0;
3100                     for(i=0; i<2; i++){
3101                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3102                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3103                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3104                     }
3105                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3106                                  &dmin, &next_block, 0, 0);
3107                 }
3108                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3109                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3110                     s->mv_type = MV_TYPE_FIELD;
3111                     s->mb_intra= 0;
3112                     for(dir=0; dir<2; dir++){
3113                         for(i=0; i<2; i++){
3114                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3115                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3116                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3117                         }
3118                     }
3119                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3120                                  &dmin, &next_block, 0, 0);
3121                 }
3122                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3123                     s->mv_dir = 0;
3124                     s->mv_type = MV_TYPE_16X16;
3125                     s->mb_intra= 1;
3126                     s->mv[0][0][0] = 0;
3127                     s->mv[0][0][1] = 0;
3128                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3129                                  &dmin, &next_block, 0, 0);
3130                     if(s->h263_pred || s->h263_aic){
3131                         if(best_s.mb_intra)
3132                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3133                         else
3134                             ff_clean_intra_table_entries(s); //old mode?
3135                     }
3136                 }
3137
3138                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3139                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3140                         const int last_qp= backup_s.qscale;
3141                         int qpi, qp, dc[6];
3142                         int16_t ac[6][16];
3143                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3144                         static const int dquant_tab[4]={-1,1,-2,2};
3145                         int storecoefs = s->mb_intra && s->dc_val[0];
3146
3147                         av_assert2(backup_s.dquant == 0);
3148
3149                         //FIXME intra
3150                         s->mv_dir= best_s.mv_dir;
3151                         s->mv_type = MV_TYPE_16X16;
3152                         s->mb_intra= best_s.mb_intra;
3153                         s->mv[0][0][0] = best_s.mv[0][0][0];
3154                         s->mv[0][0][1] = best_s.mv[0][0][1];
3155                         s->mv[1][0][0] = best_s.mv[1][0][0];
3156                         s->mv[1][0][1] = best_s.mv[1][0][1];
3157
3158                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3159                         for(; qpi<4; qpi++){
3160                             int dquant= dquant_tab[qpi];
3161                             qp= last_qp + dquant;
3162                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3163                                 continue;
3164                             backup_s.dquant= dquant;
3165                             if(storecoefs){
3166                                 for(i=0; i<6; i++){
3167                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3168                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3169                                 }
3170                             }
3171
3172                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3173                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3174                             if(best_s.qscale != qp){
3175                                 if(storecoefs){
3176                                     for(i=0; i<6; i++){
3177                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3178                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3179                                     }
3180                                 }
3181                             }
3182                         }
3183                     }
3184                 }
3185                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3186                     int mx= s->b_direct_mv_table[xy][0];
3187                     int my= s->b_direct_mv_table[xy][1];
3188
3189                     backup_s.dquant = 0;
3190                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3191                     s->mb_intra= 0;
3192                     ff_mpeg4_set_direct_mv(s, mx, my);
3193                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3194                                  &dmin, &next_block, mx, my);
3195                 }
3196                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3197                     backup_s.dquant = 0;
3198                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3199                     s->mb_intra= 0;
3200                     ff_mpeg4_set_direct_mv(s, 0, 0);
3201                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3202                                  &dmin, &next_block, 0, 0);
3203                 }
3204                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3205                     int coded=0;
3206                     for(i=0; i<6; i++)
3207                         coded |= s->block_last_index[i];
3208                     if(coded){
3209                         int mx,my;
3210                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3211                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3212                             mx=my=0; //FIXME find the one we actually used
3213                             ff_mpeg4_set_direct_mv(s, mx, my);
3214                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3215                             mx= s->mv[1][0][0];
3216                             my= s->mv[1][0][1];
3217                         }else{
3218                             mx= s->mv[0][0][0];
3219                             my= s->mv[0][0][1];
3220                         }
3221
3222                         s->mv_dir= best_s.mv_dir;
3223                         s->mv_type = best_s.mv_type;
3224                         s->mb_intra= 0;
3225 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3226                         s->mv[0][0][1] = best_s.mv[0][0][1];
3227                         s->mv[1][0][0] = best_s.mv[1][0][0];
3228                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3229                         backup_s.dquant= 0;
3230                         s->skipdct=1;
3231                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3232                                         &dmin, &next_block, mx, my);
3233                         s->skipdct=0;
3234                     }
3235                 }
3236
3237                 s->current_picture.qscale_table[xy] = best_s.qscale;
3238
3239                 copy_context_after_encode(s, &best_s, -1);
3240
3241                 pb_bits_count= put_bits_count(&s->pb);
3242                 flush_put_bits(&s->pb);
3243                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3244                 s->pb= backup_s.pb;
3245
3246                 if(s->data_partitioning){
3247                     pb2_bits_count= put_bits_count(&s->pb2);
3248                     flush_put_bits(&s->pb2);
3249                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3250                     s->pb2= backup_s.pb2;
3251
3252                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3253                     flush_put_bits(&s->tex_pb);
3254                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3255                     s->tex_pb= backup_s.tex_pb;
3256                 }
3257                 s->last_bits= put_bits_count(&s->pb);
3258
3259                 if (CONFIG_H263_ENCODER &&
3260                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3261                     ff_h263_update_motion_val(s);
3262
3263                 if(next_block==0){ //FIXME 16 vs linesize16
3264                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3265                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3266                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3267                 }
3268
3269                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3270                     ff_mpv_decode_mb(s, s->block);
3271             } else {
3272                 int motion_x = 0, motion_y = 0;
3273                 s->mv_type=MV_TYPE_16X16;
3274                 // only one MB-Type possible
3275
3276                 switch(mb_type){
3277                 case CANDIDATE_MB_TYPE_INTRA:
3278                     s->mv_dir = 0;
3279                     s->mb_intra= 1;
3280                     motion_x= s->mv[0][0][0] = 0;
3281                     motion_y= s->mv[0][0][1] = 0;
3282                     break;
3283                 case CANDIDATE_MB_TYPE_INTER:
3284                     s->mv_dir = MV_DIR_FORWARD;
3285                     s->mb_intra= 0;
3286                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3287                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3288                     break;
3289                 case CANDIDATE_MB_TYPE_INTER_I:
3290                     s->mv_dir = MV_DIR_FORWARD;
3291                     s->mv_type = MV_TYPE_FIELD;
3292                     s->mb_intra= 0;
3293                     for(i=0; i<2; i++){
3294                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3295                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3296                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3297                     }
3298                     break;
3299                 case CANDIDATE_MB_TYPE_INTER4V:
3300                     s->mv_dir = MV_DIR_FORWARD;
3301                     s->mv_type = MV_TYPE_8X8;
3302                     s->mb_intra= 0;
3303                     for(i=0; i<4; i++){
3304                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3305                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3306                     }
3307                     break;
3308                 case CANDIDATE_MB_TYPE_DIRECT:
3309                     if (CONFIG_MPEG4_ENCODER) {
3310                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3311                         s->mb_intra= 0;
3312                         motion_x=s->b_direct_mv_table[xy][0];
3313                         motion_y=s->b_direct_mv_table[xy][1];
3314                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3315                     }
3316                     break;
3317                 case CANDIDATE_MB_TYPE_DIRECT0:
3318                     if (CONFIG_MPEG4_ENCODER) {
3319                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3320                         s->mb_intra= 0;
3321                         ff_mpeg4_set_direct_mv(s, 0, 0);
3322                     }
3323                     break;
3324                 case CANDIDATE_MB_TYPE_BIDIR:
3325                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3326                     s->mb_intra= 0;
3327                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3328                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3329                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3330                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3331                     break;
3332                 case CANDIDATE_MB_TYPE_BACKWARD:
3333                     s->mv_dir = MV_DIR_BACKWARD;
3334                     s->mb_intra= 0;
3335                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3336                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3337                     break;
3338                 case CANDIDATE_MB_TYPE_FORWARD:
3339                     s->mv_dir = MV_DIR_FORWARD;
3340                     s->mb_intra= 0;
3341                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3342                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3343                     break;
3344                 case CANDIDATE_MB_TYPE_FORWARD_I:
3345                     s->mv_dir = MV_DIR_FORWARD;
3346                     s->mv_type = MV_TYPE_FIELD;
3347                     s->mb_intra= 0;
3348                     for(i=0; i<2; i++){
3349                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3350                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3351                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3352                     }
3353                     break;
3354                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3355                     s->mv_dir = MV_DIR_BACKWARD;
3356                     s->mv_type = MV_TYPE_FIELD;
3357                     s->mb_intra= 0;
3358                     for(i=0; i<2; i++){
3359                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3360                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3361                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3362                     }
3363                     break;
3364                 case CANDIDATE_MB_TYPE_BIDIR_I:
3365                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3366                     s->mv_type = MV_TYPE_FIELD;
3367                     s->mb_intra= 0;
3368                     for(dir=0; dir<2; dir++){
3369                         for(i=0; i<2; i++){
3370                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3371                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3372                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3373                         }
3374                     }
3375                     break;
3376                 default:
3377                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3378                 }
3379
3380                 encode_mb(s, motion_x, motion_y);
3381
3382                 // RAL: Update last macroblock type
3383                 s->last_mv_dir = s->mv_dir;
3384
3385                 if (CONFIG_H263_ENCODER &&
3386                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3387                     ff_h263_update_motion_val(s);
3388
3389                 ff_mpv_decode_mb(s, s->block);
3390             }
3391
3392             /* clean the MV table in IPS frames for direct mode in B frames */
3393             if(s->mb_intra /* && I,P,S_TYPE */){
3394                 s->p_mv_table[xy][0]=0;
3395                 s->p_mv_table[xy][1]=0;
3396             }
3397
3398             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3399                 int w= 16;
3400                 int h= 16;
3401
3402                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3403                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3404
3405                 s->current_picture.error[0] += sse(
3406                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3407                     s->dest[0], w, h, s->linesize);
3408                 s->current_picture.error[1] += sse(
3409                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3410                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3411                 s->current_picture.error[2] += sse(
3412                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3413                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3414             }
3415             if(s->loop_filter){
3416                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3417                     ff_h263_loop_filter(s);
3418             }
3419             ff_dlog(s->avctx, "MB %d %d bits\n",
3420                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3421         }
3422     }
3423
3424     //not beautiful here but we must write it before flushing so it has to be here
3425     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3426         ff_msmpeg4_encode_ext_header(s);
3427
3428     write_slice_end(s);
3429
3430     /* Send the last GOB if RTP */
3431     if (s->avctx->rtp_callback) {
3432         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3433         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3434         /* Call the RTP callback to send the last GOB */
3435         emms_c();
3436         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3437     }
3438
3439     return 0;
3440 }
3441
3442 #define MERGE(field) dst->field += src->field; src->field=0
3443 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3444     MERGE(me.scene_change_score);
3445     MERGE(me.mc_mb_var_sum_temp);
3446     MERGE(me.mb_var_sum_temp);
3447 }
3448
3449 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3450     int i;
3451
3452     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3453     MERGE(dct_count[1]);
3454     MERGE(mv_bits);
3455     MERGE(i_tex_bits);
3456     MERGE(p_tex_bits);
3457     MERGE(i_count);
3458     MERGE(f_count);
3459     MERGE(b_count);
3460     MERGE(skip_count);
3461     MERGE(misc_bits);
3462     MERGE(er.error_count);
3463     MERGE(padding_bug_score);
3464     MERGE(current_picture.error[0]);
3465     MERGE(current_picture.error[1]);
3466     MERGE(current_picture.error[2]);
3467
3468     if(dst->avctx->noise_reduction){
3469         for(i=0; i<64; i++){
3470             MERGE(dct_error_sum[0][i]);
3471             MERGE(dct_error_sum[1][i]);
3472         }
3473     }
3474
3475     assert(put_bits_count(&src->pb) % 8 ==0);
3476     assert(put_bits_count(&dst->pb) % 8 ==0);
3477     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3478     flush_put_bits(&dst->pb);
3479 }
3480
3481 static int estimate_qp(MpegEncContext *s, int dry_run){
3482     if (s->next_lambda){
3483         s->current_picture_ptr->f->quality =
3484         s->current_picture.f->quality = s->next_lambda;
3485         if(!dry_run) s->next_lambda= 0;
3486     } else if (!s->fixed_qscale) {
3487         s->current_picture_ptr->f->quality =
3488         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3489         if (s->current_picture.f->quality < 0)
3490             return -1;
3491     }
3492
3493     if(s->adaptive_quant){
3494         switch(s->codec_id){
3495         case AV_CODEC_ID_MPEG4:
3496             if (CONFIG_MPEG4_ENCODER)
3497                 ff_clean_mpeg4_qscales(s);
3498             break;
3499         case AV_CODEC_ID_H263:
3500         case AV_CODEC_ID_H263P:
3501         case AV_CODEC_ID_FLV1:
3502             if (CONFIG_H263_ENCODER)
3503                 ff_clean_h263_qscales(s);
3504             break;
3505         default:
3506             ff_init_qscale_tab(s);
3507         }
3508
3509         s->lambda= s->lambda_table[0];
3510         //FIXME broken
3511     }else
3512         s->lambda = s->current_picture.f->quality;
3513     update_qscale(s);
3514     return 0;
3515 }
3516
3517 /* must be called before writing the header */
3518 static void set_frame_distances(MpegEncContext * s){
3519     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3520     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3521
3522     if(s->pict_type==AV_PICTURE_TYPE_B){
3523         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3524         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3525     }else{
3526         s->pp_time= s->time - s->last_non_b_time;
3527         s->last_non_b_time= s->time;
3528         assert(s->picture_number==0 || s->pp_time > 0);
3529     }
3530 }
3531
3532 static int encode_picture(MpegEncContext *s, int picture_number)
3533 {
3534     int i, ret;
3535     int bits;
3536     int context_count = s->slice_context_count;
3537
3538     s->picture_number = picture_number;
3539
3540     /* Reset the average MB variance */
3541     s->me.mb_var_sum_temp    =
3542     s->me.mc_mb_var_sum_temp = 0;
3543
3544     /* we need to initialize some time vars before we can encode b-frames */
3545     // RAL: Condition added for MPEG1VIDEO
3546     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3547         set_frame_distances(s);
3548     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3549         ff_set_mpeg4_time(s);
3550
3551     s->me.scene_change_score=0;
3552
3553 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3554
3555     if(s->pict_type==AV_PICTURE_TYPE_I){
3556         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3557         else                        s->no_rounding=0;
3558     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3559         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3560             s->no_rounding ^= 1;
3561     }
3562
3563     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3564         if (estimate_qp(s,1) < 0)
3565             return -1;
3566         ff_get_2pass_fcode(s);
3567     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3568         if(s->pict_type==AV_PICTURE_TYPE_B)
3569             s->lambda= s->last_lambda_for[s->pict_type];
3570         else
3571             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3572         update_qscale(s);
3573     }
3574
3575     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3576         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3577         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3578         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3579         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3580     }
3581
3582     s->mb_intra=0; //for the rate distortion & bit compare functions
3583     for(i=1; i<context_count; i++){
3584         ret = ff_update_duplicate_context(s->thread_context[i], s);
3585         if (ret < 0)
3586             return ret;
3587     }
3588
3589     if(ff_init_me(s)<0)
3590         return -1;
3591
3592     /* Estimate motion for every MB */
3593     if(s->pict_type != AV_PICTURE_TYPE_I){
3594         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3595         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3596         if (s->pict_type != AV_PICTURE_TYPE_B) {
3597             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3598                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3599             }
3600         }
3601
3602         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3603     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3604         /* I-Frame */
3605         for(i=0; i<s->mb_stride*s->mb_height; i++)
3606             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3607
3608         if(!s->fixed_qscale){
3609             /* finding spatial complexity for I-frame rate control */
3610             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3611         }
3612     }
3613     for(i=1; i<context_count; i++){
3614         merge_context_after_me(s, s->thread_context[i]);
3615     }
3616     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3617     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3618     emms_c();
3619
3620     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3621         s->pict_type= AV_PICTURE_TYPE_I;
3622         for(i=0; i<s->mb_stride*s->mb_height; i++)
3623             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3624         if(s->msmpeg4_version >= 3)
3625             s->no_rounding=1;
3626         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3627                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3628     }
3629
3630     if(!s->umvplus){
3631         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3632             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3633
3634             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3635                 int a,b;
3636                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3637                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3638                 s->f_code= FFMAX3(s->f_code, a, b);
3639             }
3640
3641             ff_fix_long_p_mvs(s);
3642             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3643             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3644                 int j;
3645                 for(i=0; i<2; i++){
3646                     for(j=0; j<2; j++)
3647                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3648                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3649                 }
3650             }
3651         }
3652
3653         if(s->pict_type==AV_PICTURE_TYPE_B){
3654             int a, b;
3655
3656             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3657             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3658             s->f_code = FFMAX(a, b);
3659
3660             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3661             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3662             s->b_code = FFMAX(a, b);
3663
3664             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3665             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3666             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3667             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3668             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3669                 int dir, j;
3670                 for(dir=0; dir<2; dir++){
3671                     for(i=0; i<2; i++){
3672                         for(j=0; j<2; j++){
3673                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3674                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3675                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3676                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3677                         }
3678                     }
3679                 }
3680             }
3681         }
3682     }
3683
3684     if (estimate_qp(s, 0) < 0)
3685         return -1;
3686
3687     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3688         s->pict_type == AV_PICTURE_TYPE_I &&
3689         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3690         s->qscale= 3; //reduce clipping problems
3691
3692     if (s->out_format == FMT_MJPEG) {
3693         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3694         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3695
3696         if (s->avctx->intra_matrix) {
3697             chroma_matrix =
3698             luma_matrix = s->avctx->intra_matrix;
3699         }
3700         if (s->avctx->chroma_intra_matrix)
3701             chroma_matrix = s->avctx->chroma_intra_matrix;
3702
3703         /* for mjpeg, we do include qscale in the matrix */
3704         for(i=1;i<64;i++){
3705             int j = s->idsp.idct_permutation[i];
3706
3707             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3708             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3709         }
3710         s->y_dc_scale_table=
3711         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3712         s->chroma_intra_matrix[0] =
3713         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3714         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3715                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3716         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3717                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3718         s->qscale= 8;
3719     }
3720     if(s->codec_id == AV_CODEC_ID_AMV){
3721         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3722         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3723         for(i=1;i<64;i++){
3724             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3725
3726             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3727             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3728         }
3729         s->y_dc_scale_table= y;
3730         s->c_dc_scale_table= c;
3731         s->intra_matrix[0] = 13;
3732         s->chroma_intra_matrix[0] = 14;
3733         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3734                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3735         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3736                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3737         s->qscale= 8;
3738     }
3739
3740     //FIXME var duplication
3741     s->current_picture_ptr->f->key_frame =
3742     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3743     s->current_picture_ptr->f->pict_type =
3744     s->current_picture.f->pict_type = s->pict_type;
3745
3746     if (s->current_picture.f->key_frame)
3747         s->picture_in_gop_number=0;
3748
3749     s->mb_x = s->mb_y = 0;
3750     s->last_bits= put_bits_count(&s->pb);
3751     switch(s->out_format) {
3752     case FMT_MJPEG:
3753         if (CONFIG_MJPEG_ENCODER)
3754             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3755                                            s->intra_matrix, s->chroma_intra_matrix);
3756         break;
3757     case FMT_H261:
3758         if (CONFIG_H261_ENCODER)
3759             ff_h261_encode_picture_header(s, picture_number);
3760         break;
3761     case FMT_H263:
3762         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3763             ff_wmv2_encode_picture_header(s, picture_number);
3764         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3765             ff_msmpeg4_encode_picture_header(s, picture_number);
3766         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3767             ff_mpeg4_encode_picture_header(s, picture_number);
3768         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3769             ret = ff_rv10_encode_picture_header(s, picture_number);
3770             if (ret < 0)
3771                 return ret;
3772         }
3773         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3774             ff_rv20_encode_picture_header(s, picture_number);
3775         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3776             ff_flv_encode_picture_header(s, picture_number);
3777         else if (CONFIG_H263_ENCODER)
3778             ff_h263_encode_picture_header(s, picture_number);
3779         break;
3780     case FMT_MPEG1:
3781         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3782             ff_mpeg1_encode_picture_header(s, picture_number);
3783         break;
3784     default:
3785         av_assert0(0);
3786     }
3787     bits= put_bits_count(&s->pb);
3788     s->header_bits= bits - s->last_bits;
3789
3790     for(i=1; i<context_count; i++){
3791         update_duplicate_context_after_me(s->thread_context[i], s);
3792     }
3793     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3794     for(i=1; i<context_count; i++){
3795         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3796             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-32));
3797         merge_context_after_encode(s, s->thread_context[i]);
3798     }
3799     emms_c();
3800     return 0;
3801 }
3802
3803 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3804     const int intra= s->mb_intra;
3805     int i;
3806
3807     s->dct_count[intra]++;
3808
3809     for(i=0; i<64; i++){
3810         int level= block[i];
3811
3812         if(level){
3813             if(level>0){
3814                 s->dct_error_sum[intra][i] += level;
3815                 level -= s->dct_offset[intra][i];
3816                 if(level<0) level=0;
3817             }else{
3818                 s->dct_error_sum[intra][i] -= level;
3819                 level += s->dct_offset[intra][i];
3820                 if(level>0) level=0;
3821             }
3822             block[i]= level;
3823         }
3824     }
3825 }
3826
3827 static int dct_quantize_trellis_c(MpegEncContext *s,
3828                                   int16_t *block, int n,
3829                                   int qscale, int *overflow){
3830     const int *qmat;
3831     const uint16_t *matrix;
3832     const uint8_t *scantable= s->intra_scantable.scantable;
3833     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3834     int max=0;
3835     unsigned int threshold1, threshold2;
3836     int bias=0;
3837     int run_tab[65];
3838     int level_tab[65];
3839     int score_tab[65];
3840     int survivor[65];
3841     int survivor_count;
3842     int last_run=0;
3843     int last_level=0;
3844     int last_score= 0;
3845     int last_i;
3846     int coeff[2][64];
3847     int coeff_count[64];
3848     int qmul, qadd, start_i, last_non_zero, i, dc;
3849     const int esc_length= s->ac_esc_length;
3850     uint8_t * length;
3851     uint8_t * last_length;
3852     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3853
3854     s->fdsp.fdct(block);
3855
3856     if(s->dct_error_sum)
3857         s->denoise_dct(s, block);
3858     qmul= qscale*16;
3859     qadd= ((qscale-1)|1)*8;
3860
3861     if (s->mb_intra) {
3862         int q;
3863         if (!s->h263_aic) {
3864             if (n < 4)
3865                 q = s->y_dc_scale;
3866             else
3867                 q = s->c_dc_scale;
3868             q = q << 3;
3869         } else{
3870             /* For AIC we skip quant/dequant of INTRADC */
3871             q = 1 << 3;
3872             qadd=0;
3873         }
3874
3875         /* note: block[0] is assumed to be positive */
3876         block[0] = (block[0] + (q >> 1)) / q;
3877         start_i = 1;
3878         last_non_zero = 0;
3879         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3880         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3881         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3882             bias= 1<<(QMAT_SHIFT-1);
3883
3884         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3885             length     = s->intra_chroma_ac_vlc_length;
3886             last_length= s->intra_chroma_ac_vlc_last_length;
3887         } else {
3888             length     = s->intra_ac_vlc_length;
3889             last_length= s->intra_ac_vlc_last_length;
3890         }
3891     } else {
3892         start_i = 0;
3893         last_non_zero = -1;
3894         qmat = s->q_inter_matrix[qscale];
3895         matrix = s->inter_matrix;
3896         length     = s->inter_ac_vlc_length;
3897         last_length= s->inter_ac_vlc_last_length;
3898     }
3899     last_i= start_i;
3900
3901     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3902     threshold2= (threshold1<<1);
3903
3904     for(i=63; i>=start_i; i--) {
3905         const int j = scantable[i];
3906         int level = block[j] * qmat[j];
3907
3908         if(((unsigned)(level+threshold1))>threshold2){
3909             last_non_zero = i;
3910             break;
3911         }
3912     }
3913
3914     for(i=start_i; i<=last_non_zero; i++) {
3915         const int j = scantable[i];
3916         int level = block[j] * qmat[j];
3917
3918 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3919 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3920         if(((unsigned)(level+threshold1))>threshold2){
3921             if(level>0){
3922                 level= (bias + level)>>QMAT_SHIFT;
3923                 coeff[0][i]= level;
3924                 coeff[1][i]= level-1;
3925 //                coeff[2][k]= level-2;
3926             }else{
3927                 level= (bias - level)>>QMAT_SHIFT;
3928                 coeff[0][i]= -level;
3929                 coeff[1][i]= -level+1;
3930 //                coeff[2][k]= -level+2;
3931             }
3932             coeff_count[i]= FFMIN(level, 2);
3933             av_assert2(coeff_count[i]);
3934             max |=level;
3935         }else{
3936             coeff[0][i]= (level>>31)|1;
3937             coeff_count[i]= 1;
3938         }
3939     }
3940
3941     *overflow= s->max_qcoeff < max; //overflow might have happened
3942
3943     if(last_non_zero < start_i){
3944         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3945         return last_non_zero;
3946     }
3947
3948     score_tab[start_i]= 0;
3949     survivor[0]= start_i;
3950     survivor_count= 1;
3951
3952     for(i=start_i; i<=last_non_zero; i++){
3953         int level_index, j, zero_distortion;
3954         int dct_coeff= FFABS(block[ scantable[i] ]);
3955         int best_score=256*256*256*120;
3956
3957         if (s->fdsp.fdct == ff_fdct_ifast)
3958             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3959         zero_distortion= dct_coeff*dct_coeff;
3960
3961         for(level_index=0; level_index < coeff_count[i]; level_index++){
3962             int distortion;
3963             int level= coeff[level_index][i];
3964             const int alevel= FFABS(level);
3965             int unquant_coeff;
3966
3967             av_assert2(level);
3968
3969             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3970                 unquant_coeff= alevel*qmul + qadd;
3971             } else if(s->out_format == FMT_MJPEG) {
3972                 j = s->idsp.idct_permutation[scantable[i]];
3973                 unquant_coeff = alevel * matrix[j] * 8;
3974             }else{ //MPEG1
3975                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3976                 if(s->mb_intra){
3977                         unquant_coeff = (int)(  alevel  * qscale * matrix[j]) >> 3;
3978                         unquant_coeff =   (unquant_coeff - 1) | 1;
3979                 }else{
3980                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[j])) >> 4;
3981                         unquant_coeff =   (unquant_coeff - 1) | 1;
3982                 }
3983                 unquant_coeff<<= 3;
3984             }
3985
3986             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3987             level+=64;
3988             if((level&(~127)) == 0){
3989                 for(j=survivor_count-1; j>=0; j--){
3990                     int run= i - survivor[j];
3991                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3992                     score += score_tab[i-run];
3993
3994                     if(score < best_score){
3995                         best_score= score;
3996                         run_tab[i+1]= run;
3997                         level_tab[i+1]= level-64;
3998                     }
3999                 }
4000
4001                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4002                     for(j=survivor_count-1; j>=0; j--){
4003                         int run= i - survivor[j];
4004                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4005                         score += score_tab[i-run];
4006                         if(score < last_score){
4007                             last_score= score;
4008                             last_run= run;
4009                             last_level= level-64;
4010                             last_i= i+1;
4011                         }
4012                     }
4013                 }
4014             }else{
4015                 distortion += esc_length*lambda;
4016                 for(j=survivor_count-1; j>=0; j--){
4017                     int run= i - survivor[j];
4018                     int score= distortion + score_tab[i-run];
4019
4020                     if(score < best_score){
4021                         best_score= score;
4022                         run_tab[i+1]= run;
4023                         level_tab[i+1]= level-64;
4024                     }
4025                 }
4026
4027                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4028                   for(j=survivor_count-1; j>=0; j--){
4029                         int run= i - survivor[j];
4030                         int score= distortion + score_tab[i-run];
4031                         if(score < last_score){
4032                             last_score= score;
4033                             last_run= run;
4034                             last_level= level-64;
4035                             last_i= i+1;
4036                         }
4037                     }
4038                 }
4039             }
4040         }
4041
4042         score_tab[i+1]= best_score;
4043
4044         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
4045         if(last_non_zero <= 27){
4046             for(; survivor_count; survivor_count--){
4047                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4048                     break;
4049             }
4050         }else{
4051             for(; survivor_count; survivor_count--){
4052                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4053                     break;
4054             }
4055         }
4056
4057         survivor[ survivor_count++ ]= i+1;
4058     }
4059
4060     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4061         last_score= 256*256*256*120;
4062         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4063             int score= score_tab[i];
4064             if(i) score += lambda*2; //FIXME exacter?
4065
4066             if(score < last_score){
4067                 last_score= score;
4068                 last_i= i;
4069                 last_level= level_tab[i];
4070                 last_run= run_tab[i];
4071             }
4072         }
4073     }
4074
4075     s->coded_score[n] = last_score;
4076
4077     dc= FFABS(block[0]);
4078     last_non_zero= last_i - 1;
4079     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4080
4081     if(last_non_zero < start_i)
4082         return last_non_zero;
4083
4084     if(last_non_zero == 0 && start_i == 0){
4085         int best_level= 0;
4086         int best_score= dc * dc;
4087
4088         for(i=0; i<coeff_count[0]; i++){
4089             int level= coeff[i][0];
4090             int alevel= FFABS(level);
4091             int unquant_coeff, score, distortion;
4092
4093             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4094                     unquant_coeff= (alevel*qmul + qadd)>>3;
4095             }else{ //MPEG1
4096                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[0])) >> 4;
4097                     unquant_coeff =   (unquant_coeff - 1) | 1;
4098             }
4099             unquant_coeff = (unquant_coeff + 4) >> 3;
4100             unquant_coeff<<= 3 + 3;
4101
4102             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4103             level+=64;
4104             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4105             else                    score= distortion + esc_length*lambda;
4106
4107             if(score < best_score){
4108                 best_score= score;
4109                 best_level= level - 64;
4110             }
4111         }
4112         block[0]= best_level;
4113         s->coded_score[n] = best_score - dc*dc;
4114         if(best_level == 0) return -1;
4115         else                return last_non_zero;
4116     }
4117
4118     i= last_i;
4119     av_assert2(last_level);
4120
4121     block[ perm_scantable[last_non_zero] ]= last_level;
4122     i -= last_run + 1;
4123
4124     for(; i>start_i; i -= run_tab[i] + 1){
4125         block[ perm_scantable[i-1] ]= level_tab[i];
4126     }
4127
4128     return last_non_zero;
4129 }
4130
4131 //#define REFINE_STATS 1
4132 static int16_t basis[64][64];
4133
4134 static void build_basis(uint8_t *perm){
4135     int i, j, x, y;
4136     emms_c();
4137     for(i=0; i<8; i++){
4138         for(j=0; j<8; j++){
4139             for(y=0; y<8; y++){
4140                 for(x=0; x<8; x++){
4141                     double s= 0.25*(1<<BASIS_SHIFT);
4142                     int index= 8*i + j;
4143                     int perm_index= perm[index];
4144                     if(i==0) s*= sqrt(0.5);
4145                     if(j==0) s*= sqrt(0.5);
4146                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4147                 }
4148             }
4149         }
4150     }
4151 }
4152
4153 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4154                         int16_t *block, int16_t *weight, int16_t *orig,
4155                         int n, int qscale){
4156     int16_t rem[64];
4157     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4158     const uint8_t *scantable= s->intra_scantable.scantable;
4159     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4160 //    unsigned int threshold1, threshold2;
4161 //    int bias=0;
4162     int run_tab[65];
4163     int prev_run=0;
4164     int prev_level=0;
4165     int qmul, qadd, start_i, last_non_zero, i, dc;
4166     uint8_t * length;
4167     uint8_t * last_length;
4168     int lambda;
4169     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4170 #ifdef REFINE_STATS
4171 static int count=0;
4172 static int after_last=0;
4173 static int to_zero=0;
4174 static int from_zero=0;
4175 static int raise=0;
4176 static int lower=0;
4177 static int messed_sign=0;
4178 #endif
4179
4180     if(basis[0][0] == 0)
4181         build_basis(s->idsp.idct_permutation);
4182
4183     qmul= qscale*2;
4184     qadd= (qscale-1)|1;
4185     if (s->mb_intra) {
4186         if (!s->h263_aic) {
4187             if (n < 4)
4188                 q = s->y_dc_scale;
4189             else
4190                 q = s->c_dc_scale;
4191         } else{
4192             /* For AIC we skip quant/dequant of INTRADC */
4193             q = 1;
4194             qadd=0;
4195         }
4196         q <<= RECON_SHIFT-3;
4197         /* note: block[0] is assumed to be positive */
4198         dc= block[0]*q;
4199 //        block[0] = (block[0] + (q >> 1)) / q;
4200         start_i = 1;
4201 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4202 //            bias= 1<<(QMAT_SHIFT-1);
4203         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4204             length     = s->intra_chroma_ac_vlc_length;
4205             last_length= s->intra_chroma_ac_vlc_last_length;
4206         } else {
4207             length     = s->intra_ac_vlc_length;
4208             last_length= s->intra_ac_vlc_last_length;
4209         }
4210     } else {
4211         dc= 0;
4212         start_i = 0;
4213         length     = s->inter_ac_vlc_length;
4214         last_length= s->inter_ac_vlc_last_length;
4215     }
4216     last_non_zero = s->block_last_index[n];
4217
4218 #ifdef REFINE_STATS
4219 {START_TIMER
4220 #endif
4221     dc += (1<<(RECON_SHIFT-1));
4222     for(i=0; i<64; i++){
4223         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4224     }
4225 #ifdef REFINE_STATS
4226 STOP_TIMER("memset rem[]")}
4227 #endif
4228     sum=0;
4229     for(i=0; i<64; i++){
4230         int one= 36;
4231         int qns=4;
4232         int w;
4233
4234         w= FFABS(weight[i]) + qns*one;
4235         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4236
4237         weight[i] = w;
4238 //        w=weight[i] = (63*qns + (w/2)) / w;
4239
4240         av_assert2(w>0);
4241         av_assert2(w<(1<<6));
4242         sum += w*w;
4243     }
4244     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4245 #ifdef REFINE_STATS
4246 {START_TIMER
4247 #endif
4248     run=0;
4249     rle_index=0;
4250     for(i=start_i; i<=last_non_zero; i++){
4251         int j= perm_scantable[i];
4252         const int level= block[j];
4253         int coeff;
4254
4255         if(level){
4256             if(level<0) coeff= qmul*level - qadd;
4257             else        coeff= qmul*level + qadd;
4258             run_tab[rle_index++]=run;
4259             run=0;
4260
4261             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4262         }else{
4263             run++;
4264         }
4265     }
4266 #ifdef REFINE_STATS
4267 if(last_non_zero>0){
4268 STOP_TIMER("init rem[]")
4269 }
4270 }
4271
4272 {START_TIMER
4273 #endif
4274     for(;;){
4275         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4276         int best_coeff=0;
4277         int best_change=0;
4278         int run2, best_unquant_change=0, analyze_gradient;
4279 #ifdef REFINE_STATS
4280 {START_TIMER
4281 #endif
4282         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4283
4284         if(analyze_gradient){
4285 #ifdef REFINE_STATS
4286 {START_TIMER
4287 #endif
4288             for(i=0; i<64; i++){
4289                 int w= weight[i];
4290
4291                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4292             }
4293 #ifdef REFINE_STATS
4294 STOP_TIMER("rem*w*w")}
4295 {START_TIMER
4296 #endif
4297             s->fdsp.fdct(d1);
4298 #ifdef REFINE_STATS
4299 STOP_TIMER("dct")}
4300 #endif
4301         }
4302
4303         if(start_i){
4304             const int level= block[0];
4305             int change, old_coeff;
4306
4307             av_assert2(s->mb_intra);
4308
4309             old_coeff= q*level;
4310
4311             for(change=-1; change<=1; change+=2){
4312                 int new_level= level + change;
4313                 int score, new_coeff;
4314
4315                 new_coeff= q*new_level;
4316                 if(new_coeff >= 2048 || new_coeff < 0)
4317                     continue;
4318
4319                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4320                                                   new_coeff - old_coeff);
4321                 if(score<best_score){
4322                     best_score= score;
4323                     best_coeff= 0;
4324                     best_change= change;
4325                     best_unquant_change= new_coeff - old_coeff;
4326                 }
4327             }
4328         }
4329
4330         run=0;
4331         rle_index=0;
4332         run2= run_tab[rle_index++];
4333         prev_level=0;
4334         prev_run=0;
4335
4336         for(i=start_i; i<64; i++){
4337             int j= perm_scantable[i];
4338             const int level= block[j];
4339             int change, old_coeff;
4340
4341             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4342                 break;
4343
4344             if(level){
4345                 if(level<0) old_coeff= qmul*level - qadd;
4346                 else        old_coeff= qmul*level + qadd;
4347                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4348             }else{
4349                 old_coeff=0;
4350                 run2--;
4351                 av_assert2(run2>=0 || i >= last_non_zero );
4352             }
4353
4354             for(change=-1; change<=1; change+=2){
4355                 int new_level= level + change;
4356                 int score, new_coeff, unquant_change;
4357
4358                 score=0;
4359                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4360                    continue;
4361
4362                 if(new_level){
4363                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4364                     else            new_coeff= qmul*new_level + qadd;
4365                     if(new_coeff >= 2048 || new_coeff <= -2048)
4366                         continue;
4367                     //FIXME check for overflow
4368
4369                     if(level){
4370                         if(level < 63 && level > -63){
4371                             if(i < last_non_zero)
4372                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4373                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4374                             else
4375                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4376                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4377                         }
4378                     }else{
4379                         av_assert2(FFABS(new_level)==1);
4380
4381                         if(analyze_gradient){
4382                             int g= d1[ scantable[i] ];
4383                             if(g && (g^new_level) >= 0)
4384                                 continue;
4385                         }
4386
4387                         if(i < last_non_zero){
4388                             int next_i= i + run2 + 1;
4389                             int next_level= block[ perm_scantable[next_i] ] + 64;
4390
4391                             if(next_level&(~127))
4392                                 next_level= 0;
4393
4394                             if(next_i < last_non_zero)
4395                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4396                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4397                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4398                             else
4399                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4400                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4401                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4402                         }else{
4403                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4404                             if(prev_level){
4405                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4406                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4407                             }
4408                         }
4409                     }
4410                 }else{
4411                     new_coeff=0;
4412                     av_assert2(FFABS(level)==1);
4413
4414                     if(i < last_non_zero){
4415                         int next_i= i + run2 + 1;
4416                         int next_level= block[ perm_scantable[next_i] ] + 64;
4417
4418                         if(next_level&(~127))
4419                             next_level= 0;
4420
4421                         if(next_i < last_non_zero)
4422                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4423                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4424                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4425                         else
4426                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4427                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4428                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4429                     }else{
4430                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4431                         if(prev_level){
4432                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4433                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4434                         }
4435                     }
4436                 }
4437
4438                 score *= lambda;
4439
4440                 unquant_change= new_coeff - old_coeff;
4441                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4442
4443                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4444                                                    unquant_change);
4445                 if(score<best_score){
4446                     best_score= score;
4447                     best_coeff= i;
4448                     best_change= change;
4449                     best_unquant_change= unquant_change;
4450                 }
4451             }
4452             if(level){
4453                 prev_level= level + 64;
4454                 if(prev_level&(~127))
4455                     prev_level= 0;
4456                 prev_run= run;
4457                 run=0;
4458             }else{
4459                 run++;
4460             }
4461         }
4462 #ifdef REFINE_STATS
4463 STOP_TIMER("iterative step")}
4464 #endif
4465
4466         if(best_change){
4467             int j= perm_scantable[ best_coeff ];
4468
4469             block[j] += best_change;
4470
4471             if(best_coeff > last_non_zero){
4472                 last_non_zero= best_coeff;
4473                 av_assert2(block[j]);
4474 #ifdef REFINE_STATS
4475 after_last++;
4476 #endif
4477             }else{
4478 #ifdef REFINE_STATS
4479 if(block[j]){
4480     if(block[j] - best_change){
4481         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4482             raise++;
4483         }else{
4484             lower++;
4485         }
4486     }else{
4487         from_zero++;
4488     }
4489 }else{
4490     to_zero++;
4491 }
4492 #endif
4493                 for(; last_non_zero>=start_i; last_non_zero--){
4494                     if(block[perm_scantable[last_non_zero]])
4495                         break;
4496                 }
4497             }
4498 #ifdef REFINE_STATS
4499 count++;
4500 if(256*256*256*64 % count == 0){
4501     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4502 }
4503 #endif
4504             run=0;
4505             rle_index=0;
4506             for(i=start_i; i<=last_non_zero; i++){
4507                 int j= perm_scantable[i];
4508                 const int level= block[j];
4509
4510                  if(level){
4511                      run_tab[rle_index++]=run;
4512                      run=0;
4513                  }else{
4514                      run++;
4515                  }
4516             }
4517
4518             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4519         }else{
4520             break;
4521         }
4522     }
4523 #ifdef REFINE_STATS
4524 if(last_non_zero>0){
4525 STOP_TIMER("iterative search")
4526 }
4527 }
4528 #endif
4529
4530     return last_non_zero;
4531 }
4532
4533 /**
4534  * Permute an 8x8 block according to permuatation.
4535  * @param block the block which will be permuted according to
4536  *              the given permutation vector
4537  * @param permutation the permutation vector
4538  * @param last the last non zero coefficient in scantable order, used to
4539  *             speed the permutation up
4540  * @param scantable the used scantable, this is only used to speed the
4541  *                  permutation up, the block is not (inverse) permutated
4542  *                  to scantable order!
4543  */
4544 static void block_permute(int16_t *block, uint8_t *permutation,
4545                           const uint8_t *scantable, int last)
4546 {
4547     int i;
4548     int16_t temp[64];
4549
4550     if (last <= 0)
4551         return;
4552     //FIXME it is ok but not clean and might fail for some permutations
4553     // if (permutation[1] == 1)
4554     // return;
4555
4556     for (i = 0; i <= last; i++) {
4557         const int j = scantable[i];
4558         temp[j] = block[j];
4559         block[j] = 0;
4560     }
4561
4562     for (i = 0; i <= last; i++) {
4563         const int j = scantable[i];
4564         const int perm_j = permutation[j];
4565         block[perm_j] = temp[j];
4566     }
4567 }
4568
4569 int ff_dct_quantize_c(MpegEncContext *s,
4570                         int16_t *block, int n,
4571                         int qscale, int *overflow)
4572 {
4573     int i, j, level, last_non_zero, q, start_i;
4574     const int *qmat;
4575     const uint8_t *scantable= s->intra_scantable.scantable;
4576     int bias;
4577     int max=0;
4578     unsigned int threshold1, threshold2;
4579
4580     s->fdsp.fdct(block);
4581
4582     if(s->dct_error_sum)
4583         s->denoise_dct(s, block);
4584
4585     if (s->mb_intra) {
4586         if (!s->h263_aic) {
4587             if (n < 4)
4588                 q = s->y_dc_scale;
4589             else
4590                 q = s->c_dc_scale;
4591             q = q << 3;
4592         } else
4593             /* For AIC we skip quant/dequant of INTRADC */
4594             q = 1 << 3;
4595
4596         /* note: block[0] is assumed to be positive */
4597         block[0] = (block[0] + (q >> 1)) / q;
4598         start_i = 1;
4599         last_non_zero = 0;
4600         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4601         bias= s->intra_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4602     } else {
4603         start_i = 0;
4604         last_non_zero = -1;
4605         qmat = s->q_inter_matrix[qscale];
4606         bias= s->inter_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4607     }
4608     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4609     threshold2= (threshold1<<1);
4610     for(i=63;i>=start_i;i--) {
4611         j = scantable[i];
4612         level = block[j] * qmat[j];
4613
4614         if(((unsigned)(level+threshold1))>threshold2){
4615             last_non_zero = i;
4616             break;
4617         }else{
4618             block[j]=0;
4619         }
4620     }
4621     for(i=start_i; i<=last_non_zero; i++) {
4622         j = scantable[i];
4623         level = block[j] * qmat[j];
4624
4625 //        if(   bias+level >= (1<<QMAT_SHIFT)
4626 //           || bias-level >= (1<<QMAT_SHIFT)){
4627         if(((unsigned)(level+threshold1))>threshold2){
4628             if(level>0){
4629                 level= (bias + level)>>QMAT_SHIFT;
4630                 block[j]= level;
4631             }else{
4632                 level= (bias - level)>>QMAT_SHIFT;
4633                 block[j]= -level;
4634             }
4635             max |=level;
4636         }else{
4637             block[j]=0;
4638         }
4639     }
4640     *overflow= s->max_qcoeff < max; //overflow might have happened
4641
4642     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4643     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4644         block_permute(block, s->idsp.idct_permutation,
4645                       scantable, last_non_zero);
4646
4647     return last_non_zero;
4648 }
4649
4650 #define OFFSET(x) offsetof(MpegEncContext, x)
4651 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4652 static const AVOption h263_options[] = {
4653     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4654     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4655     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4656     FF_MPV_COMMON_OPTS
4657     { NULL },
4658 };
4659
4660 static const AVClass h263_class = {
4661     .class_name = "H.263 encoder",
4662     .item_name  = av_default_item_name,
4663     .option     = h263_options,
4664     .version    = LIBAVUTIL_VERSION_INT,
4665 };
4666
4667 AVCodec ff_h263_encoder = {
4668     .name           = "h263",
4669     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4670     .type           = AVMEDIA_TYPE_VIDEO,
4671     .id             = AV_CODEC_ID_H263,
4672     .priv_data_size = sizeof(MpegEncContext),
4673     .init           = ff_mpv_encode_init,
4674     .encode2        = ff_mpv_encode_picture,
4675     .close          = ff_mpv_encode_end,
4676     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4677     .priv_class     = &h263_class,
4678 };
4679
4680 static const AVOption h263p_options[] = {
4681     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4682     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4683     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4684     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4685     FF_MPV_COMMON_OPTS
4686     { NULL },
4687 };
4688 static const AVClass h263p_class = {
4689     .class_name = "H.263p encoder",
4690     .item_name  = av_default_item_name,
4691     .option     = h263p_options,
4692     .version    = LIBAVUTIL_VERSION_INT,
4693 };
4694
4695 AVCodec ff_h263p_encoder = {
4696     .name           = "h263p",
4697     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4698     .type           = AVMEDIA_TYPE_VIDEO,
4699     .id             = AV_CODEC_ID_H263P,
4700     .priv_data_size = sizeof(MpegEncContext),
4701     .init           = ff_mpv_encode_init,
4702     .encode2        = ff_mpv_encode_picture,
4703     .close          = ff_mpv_encode_end,
4704     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4705     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4706     .priv_class     = &h263p_class,
4707 };
4708
4709 static const AVClass msmpeg4v2_class = {
4710     .class_name = "msmpeg4v2 encoder",
4711     .item_name  = av_default_item_name,
4712     .option     = ff_mpv_generic_options,
4713     .version    = LIBAVUTIL_VERSION_INT,
4714 };
4715
4716 AVCodec ff_msmpeg4v2_encoder = {
4717     .name           = "msmpeg4v2",
4718     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4719     .type           = AVMEDIA_TYPE_VIDEO,
4720     .id             = AV_CODEC_ID_MSMPEG4V2,
4721     .priv_data_size = sizeof(MpegEncContext),
4722     .init           = ff_mpv_encode_init,
4723     .encode2        = ff_mpv_encode_picture,
4724     .close          = ff_mpv_encode_end,
4725     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4726     .priv_class     = &msmpeg4v2_class,
4727 };
4728
4729 static const AVClass msmpeg4v3_class = {
4730     .class_name = "msmpeg4v3 encoder",
4731     .item_name  = av_default_item_name,
4732     .option     = ff_mpv_generic_options,
4733     .version    = LIBAVUTIL_VERSION_INT,
4734 };
4735
4736 AVCodec ff_msmpeg4v3_encoder = {
4737     .name           = "msmpeg4",
4738     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4739     .type           = AVMEDIA_TYPE_VIDEO,
4740     .id             = AV_CODEC_ID_MSMPEG4V3,
4741     .priv_data_size = sizeof(MpegEncContext),
4742     .init           = ff_mpv_encode_init,
4743     .encode2        = ff_mpv_encode_picture,
4744     .close          = ff_mpv_encode_end,
4745     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4746     .priv_class     = &msmpeg4v3_class,
4747 };
4748
4749 static const AVClass wmv1_class = {
4750     .class_name = "wmv1 encoder",
4751     .item_name  = av_default_item_name,
4752     .option     = ff_mpv_generic_options,
4753     .version    = LIBAVUTIL_VERSION_INT,
4754 };
4755
4756 AVCodec ff_wmv1_encoder = {
4757     .name           = "wmv1",
4758     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4759     .type           = AVMEDIA_TYPE_VIDEO,
4760     .id             = AV_CODEC_ID_WMV1,
4761     .priv_data_size = sizeof(MpegEncContext),
4762     .init           = ff_mpv_encode_init,
4763     .encode2        = ff_mpv_encode_picture,
4764     .close          = ff_mpv_encode_end,
4765     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4766     .priv_class     = &wmv1_class,
4767 };