]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
462f323f907244922341d25dbbe12057856b6414
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  *
22  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
23  */
24
25 /**
26  * @file mpegvideo.c
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "avcodec.h"
31 #include "dsputil.h"
32 #include "mpegvideo.h"
33 #include "faandct.h"
34 #include <limits.h>
35
36 #ifdef USE_FASTMEMCPY
37 #include "libvo/fastmemcpy.h"
38 #endif
39
40 //#undef NDEBUG
41 //#include <assert.h>
42
43 #ifdef CONFIG_ENCODERS
44 static int encode_picture(MpegEncContext *s, int picture_number);
45 #endif //CONFIG_ENCODERS
46 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
47                                    DCTELEM *block, int n, int qscale);
48 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
49                                    DCTELEM *block, int n, int qscale);
50 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
51                                    DCTELEM *block, int n, int qscale);
52 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
53                                    DCTELEM *block, int n, int qscale);
54 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
55                                    DCTELEM *block, int n, int qscale);
56 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
57                                   DCTELEM *block, int n, int qscale);
58 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
59                                   DCTELEM *block, int n, int qscale);
60 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
61 #ifdef CONFIG_ENCODERS
62 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
63 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
64 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
65 static int sse_mb(MpegEncContext *s);
66 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
67 #endif //CONFIG_ENCODERS
68
69 #ifdef HAVE_XVMC
70 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
71 extern void XVMC_field_end(MpegEncContext *s);
72 extern void XVMC_decode_mb(MpegEncContext *s);
73 #endif
74
75 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
76
77
78 /* enable all paranoid tests for rounding, overflows, etc... */
79 //#define PARANOID
80
81 //#define DEBUG
82
83
84 /* for jpeg fast DCT */
85 #define CONST_BITS 14
86
87 static const uint16_t aanscales[64] = {
88     /* precomputed values scaled up by 14 bits */
89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
90     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
91     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
92     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
93     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
94     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
95     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
96     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
97 };
98
99 static const uint8_t h263_chroma_roundtab[16] = {
100 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
101     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
102 };
103
104 static const uint8_t ff_default_chroma_qscale_table[32]={
105 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
106     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
107 };
108
109 #ifdef CONFIG_ENCODERS
110 static uint8_t default_mv_penalty[MAX_FCODE+1][MAX_MV*2+1];
111 static uint8_t default_fcode_tab[MAX_MV*2+1];
112
113 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
114
115 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
116                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
117 {
118     int qscale;
119     int shift=0;
120
121     for(qscale=qmin; qscale<=qmax; qscale++){
122         int i;
123         if (dsp->fdct == ff_jpeg_fdct_islow
124 #ifdef FAAN_POSTSCALE
125             || dsp->fdct == ff_faandct
126 #endif
127             ) {
128             for(i=0;i<64;i++) {
129                 const int j= dsp->idct_permutation[i];
130                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
131                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
132                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
133                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
134
135                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
136                                 (qscale * quant_matrix[j]));
137             }
138         } else if (dsp->fdct == fdct_ifast
139 #ifndef FAAN_POSTSCALE
140                    || dsp->fdct == ff_faandct
141 #endif
142                    ) {
143             for(i=0;i<64;i++) {
144                 const int j= dsp->idct_permutation[i];
145                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
146                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
147                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
148                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
149
150                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
151                                 (aanscales[i] * qscale * quant_matrix[j]));
152             }
153         } else {
154             for(i=0;i<64;i++) {
155                 const int j= dsp->idct_permutation[i];
156                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
157                    So 16           <= qscale * quant_matrix[i]             <= 7905
158                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
159                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
160                 */
161                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
162 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
163                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
164
165                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
166                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
167             }
168         }
169
170         for(i=intra; i<64; i++){
171             int64_t max= 8191;
172             if (dsp->fdct == fdct_ifast
173 #ifndef FAAN_POSTSCALE
174                    || dsp->fdct == ff_faandct
175 #endif
176                    ) {
177                 max= (8191LL*aanscales[i]) >> 14;
178             }
179             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
180                 shift++;
181             }
182         }
183     }
184     if(shift){
185         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger than %d, overflows possible\n", QMAT_SHIFT - shift);
186     }
187 }
188
189 static inline void update_qscale(MpegEncContext *s){
190     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
191     s->qscale= av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
192
193     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
194 }
195 #endif //CONFIG_ENCODERS
196
197 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
198     int i;
199     int end;
200
201     st->scantable= src_scantable;
202
203     for(i=0; i<64; i++){
204         int j;
205         j = src_scantable[i];
206         st->permutated[i] = permutation[j];
207 #ifdef ARCH_POWERPC
208         st->inverse[j] = i;
209 #endif
210     }
211
212     end=-1;
213     for(i=0; i<64; i++){
214         int j;
215         j = st->permutated[i];
216         if(j>end) end=j;
217         st->raster_end[i]= end;
218     }
219 }
220
221 #ifdef CONFIG_ENCODERS
222 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix){
223     int i;
224
225     if(matrix){
226         put_bits(pb, 1, 1);
227         for(i=0;i<64;i++) {
228             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
229         }
230     }else
231         put_bits(pb, 1, 0);
232 }
233 #endif //CONFIG_ENCODERS
234
235 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
236     int i;
237
238     assert(p<=end);
239     if(p>=end)
240         return end;
241
242     for(i=0; i<3; i++){
243         uint32_t tmp= *state << 8;
244         *state= tmp + *(p++);
245         if(tmp == 0x100 || p==end)
246             return p;
247     }
248
249     while(p<end){
250         if     (p[-1] > 1      ) p+= 3;
251         else if(p[-2]          ) p+= 2;
252         else if(p[-3]|(p[-1]-1)) p++;
253         else{
254             p++;
255             break;
256         }
257     }
258
259     p= FFMIN(p, end)-4;
260     *state=  be2me_32(unaligned32(p));
261
262     return p+4;
263 }
264
265 /* init common dct for both encoder and decoder */
266 int DCT_common_init(MpegEncContext *s)
267 {
268     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
269     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
270     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
271     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
272     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
273     if(s->flags & CODEC_FLAG_BITEXACT)
274         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
275     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
276
277 #ifdef CONFIG_ENCODERS
278     s->dct_quantize= dct_quantize_c;
279     s->denoise_dct= denoise_dct_c;
280 #endif //CONFIG_ENCODERS
281
282 #ifdef HAVE_MMX
283     MPV_common_init_mmx(s);
284 #endif
285 #ifdef ARCH_ALPHA
286     MPV_common_init_axp(s);
287 #endif
288 #ifdef HAVE_MLIB
289     MPV_common_init_mlib(s);
290 #endif
291 #ifdef HAVE_MMI
292     MPV_common_init_mmi(s);
293 #endif
294 #ifdef ARCH_ARMV4L
295     MPV_common_init_armv4l(s);
296 #endif
297 #ifdef ARCH_POWERPC
298     MPV_common_init_ppc(s);
299 #endif
300
301 #ifdef CONFIG_ENCODERS
302     s->fast_dct_quantize= s->dct_quantize;
303
304     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
305         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
306     }
307
308 #endif //CONFIG_ENCODERS
309
310     /* load & permutate scantables
311        note: only wmv uses different ones
312     */
313     if(s->alternate_scan){
314         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
315         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
316     }else{
317         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
318         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
319     }
320     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
321     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
322
323     return 0;
324 }
325
326 static void copy_picture(Picture *dst, Picture *src){
327     *dst = *src;
328     dst->type= FF_BUFFER_TYPE_COPY;
329 }
330
331 #ifdef CONFIG_ENCODERS
332 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
333     int i;
334
335     dst->pict_type              = src->pict_type;
336     dst->quality                = src->quality;
337     dst->coded_picture_number   = src->coded_picture_number;
338     dst->display_picture_number = src->display_picture_number;
339 //    dst->reference              = src->reference;
340     dst->pts                    = src->pts;
341     dst->interlaced_frame       = src->interlaced_frame;
342     dst->top_field_first        = src->top_field_first;
343
344     if(s->avctx->me_threshold){
345         if(!src->motion_val[0])
346             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
347         if(!src->mb_type)
348             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
349         if(!src->ref_index[0])
350             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
351         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
352             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
353             src->motion_subsample_log2, dst->motion_subsample_log2);
354
355         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
356
357         for(i=0; i<2; i++){
358             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
359             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
360
361             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
362                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
363             }
364             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
365                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
366             }
367         }
368     }
369 }
370 #endif
371
372 /**
373  * allocates a Picture
374  * The pixels are allocated/set by calling get_buffer() if shared=0
375  */
376 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
377     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
378     const int mb_array_size= s->mb_stride*s->mb_height;
379     const int b8_array_size= s->b8_stride*s->mb_height*2;
380     const int b4_array_size= s->b4_stride*s->mb_height*4;
381     int i;
382
383     if(shared){
384         assert(pic->data[0]);
385         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
386         pic->type= FF_BUFFER_TYPE_SHARED;
387     }else{
388         int r;
389
390         assert(!pic->data[0]);
391
392         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
393
394         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
395             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
396             return -1;
397         }
398
399         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
400             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
401             return -1;
402         }
403
404         if(pic->linesize[1] != pic->linesize[2]){
405             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
406             return -1;
407         }
408
409         s->linesize  = pic->linesize[0];
410         s->uvlinesize= pic->linesize[1];
411     }
412
413     if(pic->qscale_table==NULL){
414         if (s->encoding) {
415             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
416             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
417             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
418         }
419
420         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
421         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
422         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
423         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
424         if(s->out_format == FMT_H264){
425             for(i=0; i<2; i++){
426                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
427                 pic->motion_val[i]= pic->motion_val_base[i]+4;
428                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
429             }
430             pic->motion_subsample_log2= 2;
431         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
432             for(i=0; i<2; i++){
433                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
434                 pic->motion_val[i]= pic->motion_val_base[i]+4;
435                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
436             }
437             pic->motion_subsample_log2= 3;
438         }
439         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
440             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
441         }
442         pic->qstride= s->mb_stride;
443         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
444     }
445
446     //it might be nicer if the application would keep track of these but it would require a API change
447     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
448     s->prev_pict_types[0]= s->pict_type;
449     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
450         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
451
452     return 0;
453 fail: //for the CHECKED_ALLOCZ macro
454     return -1;
455 }
456
457 /**
458  * deallocates a picture
459  */
460 static void free_picture(MpegEncContext *s, Picture *pic){
461     int i;
462
463     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
464         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
465     }
466
467     av_freep(&pic->mb_var);
468     av_freep(&pic->mc_mb_var);
469     av_freep(&pic->mb_mean);
470     av_freep(&pic->mbskip_table);
471     av_freep(&pic->qscale_table);
472     av_freep(&pic->mb_type_base);
473     av_freep(&pic->dct_coeff);
474     av_freep(&pic->pan_scan);
475     pic->mb_type= NULL;
476     for(i=0; i<2; i++){
477         av_freep(&pic->motion_val_base[i]);
478         av_freep(&pic->ref_index[i]);
479     }
480
481     if(pic->type == FF_BUFFER_TYPE_SHARED){
482         for(i=0; i<4; i++){
483             pic->base[i]=
484             pic->data[i]= NULL;
485         }
486         pic->type= 0;
487     }
488 }
489
490 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
491     int i;
492
493     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
494     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
495     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
496
497      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
498     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
499     s->rd_scratchpad=   s->me.scratchpad;
500     s->b_scratchpad=    s->me.scratchpad;
501     s->obmc_scratchpad= s->me.scratchpad + 16;
502     if (s->encoding) {
503         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
504         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
505         if(s->avctx->noise_reduction){
506             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
507         }
508     }
509     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
510     s->block= s->blocks[0];
511
512     for(i=0;i<12;i++){
513         s->pblocks[i] = (short *)(&s->block[i]);
514     }
515     return 0;
516 fail:
517     return -1; //free() through MPV_common_end()
518 }
519
520 static void free_duplicate_context(MpegEncContext *s){
521     if(s==NULL) return;
522
523     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
524     av_freep(&s->me.scratchpad);
525     s->rd_scratchpad=
526     s->b_scratchpad=
527     s->obmc_scratchpad= NULL;
528
529     av_freep(&s->dct_error_sum);
530     av_freep(&s->me.map);
531     av_freep(&s->me.score_map);
532     av_freep(&s->blocks);
533     s->block= NULL;
534 }
535
536 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
537 #define COPY(a) bak->a= src->a
538     COPY(allocated_edge_emu_buffer);
539     COPY(edge_emu_buffer);
540     COPY(me.scratchpad);
541     COPY(rd_scratchpad);
542     COPY(b_scratchpad);
543     COPY(obmc_scratchpad);
544     COPY(me.map);
545     COPY(me.score_map);
546     COPY(blocks);
547     COPY(block);
548     COPY(start_mb_y);
549     COPY(end_mb_y);
550     COPY(me.map_generation);
551     COPY(pb);
552     COPY(dct_error_sum);
553     COPY(dct_count[0]);
554     COPY(dct_count[1]);
555 #undef COPY
556 }
557
558 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
559     MpegEncContext bak;
560     int i;
561     //FIXME copy only needed parts
562 //START_TIMER
563     backup_duplicate_context(&bak, dst);
564     memcpy(dst, src, sizeof(MpegEncContext));
565     backup_duplicate_context(dst, &bak);
566     for(i=0;i<12;i++){
567         dst->pblocks[i] = (short *)(&dst->block[i]);
568     }
569 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
570 }
571
572 #ifdef CONFIG_ENCODERS
573 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
574 #define COPY(a) dst->a= src->a
575     COPY(pict_type);
576     COPY(current_picture);
577     COPY(f_code);
578     COPY(b_code);
579     COPY(qscale);
580     COPY(lambda);
581     COPY(lambda2);
582     COPY(picture_in_gop_number);
583     COPY(gop_picture_number);
584     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
585     COPY(progressive_frame); //FIXME don't set in encode_header
586     COPY(partitioned_frame); //FIXME don't set in encode_header
587 #undef COPY
588 }
589 #endif
590
591 /**
592  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
593  * the changed fields will not depend upon the prior state of the MpegEncContext.
594  */
595 static void MPV_common_defaults(MpegEncContext *s){
596     s->y_dc_scale_table=
597     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
598     s->chroma_qscale_table= ff_default_chroma_qscale_table;
599     s->progressive_frame= 1;
600     s->progressive_sequence= 1;
601     s->picture_structure= PICT_FRAME;
602
603     s->coded_picture_number = 0;
604     s->picture_number = 0;
605     s->input_picture_number = 0;
606
607     s->picture_in_gop_number = 0;
608
609     s->f_code = 1;
610     s->b_code = 1;
611 }
612
613 /**
614  * sets the given MpegEncContext to defaults for decoding.
615  * the changed fields will not depend upon the prior state of the MpegEncContext.
616  */
617 void MPV_decode_defaults(MpegEncContext *s){
618     MPV_common_defaults(s);
619 }
620
621 /**
622  * sets the given MpegEncContext to defaults for encoding.
623  * the changed fields will not depend upon the prior state of the MpegEncContext.
624  */
625
626 #ifdef CONFIG_ENCODERS
627 static void MPV_encode_defaults(MpegEncContext *s){
628     static int done=0;
629
630     MPV_common_defaults(s);
631
632     if(!done){
633         int i;
634         done=1;
635
636         for(i=-16; i<16; i++){
637             default_fcode_tab[i + MAX_MV]= 1;
638         }
639     }
640     s->me.mv_penalty= default_mv_penalty;
641     s->fcode_tab= default_fcode_tab;
642 }
643 #endif //CONFIG_ENCODERS
644
645 /**
646  * init common structure for both encoder and decoder.
647  * this assumes that some variables like width/height are already set
648  */
649 int MPV_common_init(MpegEncContext *s)
650 {
651     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
652
653     s->mb_height = (s->height + 15) / 16;
654
655     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
656         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
657         return -1;
658     }
659
660     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
661         return -1;
662
663     dsputil_init(&s->dsp, s->avctx);
664     DCT_common_init(s);
665
666     s->flags= s->avctx->flags;
667     s->flags2= s->avctx->flags2;
668
669     s->mb_width  = (s->width  + 15) / 16;
670     s->mb_stride = s->mb_width + 1;
671     s->b8_stride = s->mb_width*2 + 1;
672     s->b4_stride = s->mb_width*4 + 1;
673     mb_array_size= s->mb_height * s->mb_stride;
674     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
675
676     /* set chroma shifts */
677     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
678                                                     &(s->chroma_y_shift) );
679
680     /* set default edge pos, will be overriden in decode_header if needed */
681     s->h_edge_pos= s->mb_width*16;
682     s->v_edge_pos= s->mb_height*16;
683
684     s->mb_num = s->mb_width * s->mb_height;
685
686     s->block_wrap[0]=
687     s->block_wrap[1]=
688     s->block_wrap[2]=
689     s->block_wrap[3]= s->b8_stride;
690     s->block_wrap[4]=
691     s->block_wrap[5]= s->mb_stride;
692
693     y_size = s->b8_stride * (2 * s->mb_height + 1);
694     c_size = s->mb_stride * (s->mb_height + 1);
695     yc_size = y_size + 2 * c_size;
696
697     /* convert fourcc to upper case */
698     s->codec_tag=          toupper( s->avctx->codec_tag     &0xFF)
699                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
700                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
701                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
702
703     s->stream_codec_tag=          toupper( s->avctx->stream_codec_tag     &0xFF)
704                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
705                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
706                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
707
708     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
709
710     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
711     for(y=0; y<s->mb_height; y++){
712         for(x=0; x<s->mb_width; x++){
713             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
714         }
715     }
716     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
717
718     if (s->encoding) {
719         /* Allocate MV tables */
720         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
721         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
722         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
723         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
724         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
725         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
726         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
727         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
728         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
729         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
730         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
731         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
732
733         if(s->msmpeg4_version){
734             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
735         }
736         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
737
738         /* Allocate MB type table */
739         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
740
741         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
742
743         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
744         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
745         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
746         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
747         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
748         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
749
750         if(s->avctx->noise_reduction){
751             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
752         }
753     }
754     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
755
756     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
757
758     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
759         /* interlaced direct mode decoding tables */
760             for(i=0; i<2; i++){
761                 int j, k;
762                 for(j=0; j<2; j++){
763                     for(k=0; k<2; k++){
764                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
765                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
766                     }
767                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
768                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
769                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
770                 }
771                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
772             }
773     }
774     if (s->out_format == FMT_H263) {
775         /* ac values */
776         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
777         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
778         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
779         s->ac_val[2] = s->ac_val[1] + c_size;
780
781         /* cbp values */
782         CHECKED_ALLOCZ(s->coded_block_base, y_size);
783         s->coded_block= s->coded_block_base + s->b8_stride + 1;
784
785         /* cbp, ac_pred, pred_dir */
786         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
787         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
788     }
789
790     if (s->h263_pred || s->h263_plus || !s->encoding) {
791         /* dc values */
792         //MN: we need these for error resilience of intra-frames
793         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
794         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
795         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
796         s->dc_val[2] = s->dc_val[1] + c_size;
797         for(i=0;i<yc_size;i++)
798             s->dc_val_base[i] = 1024;
799     }
800
801     /* which mb is a intra block */
802     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
803     memset(s->mbintra_table, 1, mb_array_size);
804
805     /* init macroblock skip table */
806     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
807     //Note the +1 is for a quicker mpeg4 slice_end detection
808     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
809
810     s->parse_context.state= -1;
811     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
812        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
813        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
814        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
815     }
816
817     s->context_initialized = 1;
818
819     s->thread_context[0]= s;
820     for(i=1; i<s->avctx->thread_count; i++){
821         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
822         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
823     }
824
825     for(i=0; i<s->avctx->thread_count; i++){
826         if(init_duplicate_context(s->thread_context[i], s) < 0)
827            goto fail;
828         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
829         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
830     }
831
832     return 0;
833  fail:
834     MPV_common_end(s);
835     return -1;
836 }
837
838 /* init common structure for both encoder and decoder */
839 void MPV_common_end(MpegEncContext *s)
840 {
841     int i, j, k;
842
843     for(i=0; i<s->avctx->thread_count; i++){
844         free_duplicate_context(s->thread_context[i]);
845     }
846     for(i=1; i<s->avctx->thread_count; i++){
847         av_freep(&s->thread_context[i]);
848     }
849
850     av_freep(&s->parse_context.buffer);
851     s->parse_context.buffer_size=0;
852
853     av_freep(&s->mb_type);
854     av_freep(&s->p_mv_table_base);
855     av_freep(&s->b_forw_mv_table_base);
856     av_freep(&s->b_back_mv_table_base);
857     av_freep(&s->b_bidir_forw_mv_table_base);
858     av_freep(&s->b_bidir_back_mv_table_base);
859     av_freep(&s->b_direct_mv_table_base);
860     s->p_mv_table= NULL;
861     s->b_forw_mv_table= NULL;
862     s->b_back_mv_table= NULL;
863     s->b_bidir_forw_mv_table= NULL;
864     s->b_bidir_back_mv_table= NULL;
865     s->b_direct_mv_table= NULL;
866     for(i=0; i<2; i++){
867         for(j=0; j<2; j++){
868             for(k=0; k<2; k++){
869                 av_freep(&s->b_field_mv_table_base[i][j][k]);
870                 s->b_field_mv_table[i][j][k]=NULL;
871             }
872             av_freep(&s->b_field_select_table[i][j]);
873             av_freep(&s->p_field_mv_table_base[i][j]);
874             s->p_field_mv_table[i][j]=NULL;
875         }
876         av_freep(&s->p_field_select_table[i]);
877     }
878
879     av_freep(&s->dc_val_base);
880     av_freep(&s->ac_val_base);
881     av_freep(&s->coded_block_base);
882     av_freep(&s->mbintra_table);
883     av_freep(&s->cbp_table);
884     av_freep(&s->pred_dir_table);
885
886     av_freep(&s->mbskip_table);
887     av_freep(&s->prev_pict_types);
888     av_freep(&s->bitstream_buffer);
889     s->allocated_bitstream_buffer_size=0;
890
891     av_freep(&s->avctx->stats_out);
892     av_freep(&s->ac_stats);
893     av_freep(&s->error_status_table);
894     av_freep(&s->mb_index2xy);
895     av_freep(&s->lambda_table);
896     av_freep(&s->q_intra_matrix);
897     av_freep(&s->q_inter_matrix);
898     av_freep(&s->q_intra_matrix16);
899     av_freep(&s->q_inter_matrix16);
900     av_freep(&s->input_picture);
901     av_freep(&s->reordered_input_picture);
902     av_freep(&s->dct_offset);
903
904     if(s->picture){
905         for(i=0; i<MAX_PICTURE_COUNT; i++){
906             free_picture(s, &s->picture[i]);
907         }
908     }
909     av_freep(&s->picture);
910     s->context_initialized = 0;
911     s->last_picture_ptr=
912     s->next_picture_ptr=
913     s->current_picture_ptr= NULL;
914     s->linesize= s->uvlinesize= 0;
915
916     for(i=0; i<3; i++)
917         av_freep(&s->visualization_buffer[i]);
918
919     avcodec_default_free_buffers(s->avctx);
920 }
921
922 #ifdef CONFIG_ENCODERS
923
924 /* init video encoder */
925 int MPV_encode_init(AVCodecContext *avctx)
926 {
927     MpegEncContext *s = avctx->priv_data;
928     int i;
929     int chroma_h_shift, chroma_v_shift;
930
931     MPV_encode_defaults(s);
932
933     switch (avctx->codec_id) {
934     case CODEC_ID_MPEG2VIDEO:
935         if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P){
936             av_log(avctx, AV_LOG_ERROR, "only YUV420 and YUV422 are supported\n");
937             return -1;
938         }
939         break;
940     case CODEC_ID_LJPEG:
941     case CODEC_ID_MJPEG:
942         if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUVJ422P &&
943            ((avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P) || avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL)){
944             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
945             return -1;
946         }
947         break;
948     default:
949         if(avctx->pix_fmt != PIX_FMT_YUV420P){
950             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
951             return -1;
952         }
953     }
954
955     switch (avctx->pix_fmt) {
956     case PIX_FMT_YUVJ422P:
957     case PIX_FMT_YUV422P:
958         s->chroma_format = CHROMA_422;
959         break;
960     case PIX_FMT_YUVJ420P:
961     case PIX_FMT_YUV420P:
962     default:
963         s->chroma_format = CHROMA_420;
964         break;
965     }
966
967     s->bit_rate = avctx->bit_rate;
968     s->width = avctx->width;
969     s->height = avctx->height;
970     if(avctx->gop_size > 600 && avctx->strict_std_compliance>FF_COMPLIANCE_EXPERIMENTAL){
971         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
972         avctx->gop_size=600;
973     }
974     s->gop_size = avctx->gop_size;
975     s->avctx = avctx;
976     s->flags= avctx->flags;
977     s->flags2= avctx->flags2;
978     s->max_b_frames= avctx->max_b_frames;
979     s->codec_id= avctx->codec->id;
980     s->luma_elim_threshold  = avctx->luma_elim_threshold;
981     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
982     s->strict_std_compliance= avctx->strict_std_compliance;
983     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
984     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
985     s->mpeg_quant= avctx->mpeg_quant;
986     s->rtp_mode= !!avctx->rtp_payload_size;
987     s->intra_dc_precision= avctx->intra_dc_precision;
988     s->user_specified_pts = AV_NOPTS_VALUE;
989
990     if (s->gop_size <= 1) {
991         s->intra_only = 1;
992         s->gop_size = 12;
993     } else {
994         s->intra_only = 0;
995     }
996
997     s->me_method = avctx->me_method;
998
999     /* Fixed QSCALE */
1000     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
1001
1002     s->adaptive_quant= (   s->avctx->lumi_masking
1003                         || s->avctx->dark_masking
1004                         || s->avctx->temporal_cplx_masking
1005                         || s->avctx->spatial_cplx_masking
1006                         || s->avctx->p_masking
1007                         || s->avctx->border_masking
1008                         || (s->flags&CODEC_FLAG_QP_RD))
1009                        && !s->fixed_qscale;
1010
1011     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
1012     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
1013     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
1014     s->intra_vlc_format= !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
1015     s->q_scale_type= !!(s->flags2 & CODEC_FLAG2_NON_LINEAR_QUANT);
1016
1017     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
1018         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
1019         return -1;
1020     }
1021
1022     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
1023         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
1024     }
1025
1026     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
1027         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
1028         return -1;
1029     }
1030
1031     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
1032         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
1033         return -1;
1034     }
1035
1036     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
1037        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
1038        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
1039
1040         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
1041     }
1042
1043     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
1044        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
1045         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
1046         return -1;
1047     }
1048
1049     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
1050         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
1051         return -1;
1052     }
1053
1054     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
1055         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
1056         return -1;
1057     }
1058
1059     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1060         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1061         return -1;
1062     }
1063
1064     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1065         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1066         return -1;
1067     }
1068
1069     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1070         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1071         return -1;
1072     }
1073
1074     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1075        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1076         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1077         return -1;
1078     }
1079
1080     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1081         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1082         return -1;
1083     }
1084
1085     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1086         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1087         return -1;
1088     }
1089
1090     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1091         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1092         return -1;
1093     }
1094
1095     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1096         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet, set threshold to 1000000000\n");
1097         return -1;
1098     }
1099
1100     if((s->flags2 & CODEC_FLAG2_INTRA_VLC) && s->codec_id != CODEC_ID_MPEG2VIDEO){
1101         av_log(avctx, AV_LOG_ERROR, "intra vlc table not supported by codec\n");
1102         return -1;
1103     }
1104
1105     if(s->flags & CODEC_FLAG_LOW_DELAY){
1106         if (s->codec_id != CODEC_ID_MPEG2VIDEO && s->codec_id != CODEC_ID_MPEG1VIDEO){
1107             av_log(avctx, AV_LOG_ERROR, "low delay forcing is only available for mpeg1/2\n");
1108             return -1;
1109         }
1110         if (s->max_b_frames != 0){
1111             av_log(avctx, AV_LOG_ERROR, "b frames cannot be used with low delay\n");
1112             return -1;
1113         }
1114     }
1115
1116     if(s->q_scale_type == 1){
1117         if(s->codec_id != CODEC_ID_MPEG2VIDEO){
1118             av_log(avctx, AV_LOG_ERROR, "non linear quant is only available for mpeg2\n");
1119             return -1;
1120         }
1121         if(avctx->qmax > 12){
1122             av_log(avctx, AV_LOG_ERROR, "non linear quant only supports qmax <= 12 currently\n");
1123             return -1;
1124         }
1125     }
1126
1127     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1128        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1129        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1130         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1131         return -1;
1132     }
1133
1134     if(s->avctx->thread_count > 1)
1135         s->rtp_mode= 1;
1136
1137     if(!avctx->time_base.den || !avctx->time_base.num){
1138         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1139         return -1;
1140     }
1141
1142     i= (INT_MAX/2+128)>>8;
1143     if(avctx->me_threshold >= i){
1144         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1145         return -1;
1146     }
1147     if(avctx->mb_threshold >= i){
1148         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1149         return -1;
1150     }
1151
1152     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1153         av_log(avctx, AV_LOG_INFO, "notice: b_frame_strategy only affects the first pass\n");
1154         avctx->b_frame_strategy = 0;
1155     }
1156
1157     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1158     if(i > 1){
1159         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1160         avctx->time_base.den /= i;
1161         avctx->time_base.num /= i;
1162 //        return -1;
1163     }
1164
1165     if(s->codec_id==CODEC_ID_MJPEG){
1166         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1167         s->inter_quant_bias= 0;
1168     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1169         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1170         s->inter_quant_bias= 0;
1171     }else{
1172         s->intra_quant_bias=0;
1173         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1174     }
1175
1176     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1177         s->intra_quant_bias= avctx->intra_quant_bias;
1178     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1179         s->inter_quant_bias= avctx->inter_quant_bias;
1180
1181     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1182
1183     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1184         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1185         return -1;
1186     }
1187     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1188
1189     switch(avctx->codec->id) {
1190     case CODEC_ID_MPEG1VIDEO:
1191         s->out_format = FMT_MPEG1;
1192         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
1193         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1194         break;
1195     case CODEC_ID_MPEG2VIDEO:
1196         s->out_format = FMT_MPEG1;
1197         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
1198         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1199         s->rtp_mode= 1;
1200         break;
1201     case CODEC_ID_LJPEG:
1202     case CODEC_ID_JPEGLS:
1203     case CODEC_ID_MJPEG:
1204         s->out_format = FMT_MJPEG;
1205         s->intra_only = 1; /* force intra only for jpeg */
1206         s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
1207         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1208         s->mjpeg_vsample[0] = 2;
1209         s->mjpeg_vsample[1] = 2>>chroma_v_shift;
1210         s->mjpeg_vsample[2] = 2>>chroma_v_shift;
1211         s->mjpeg_hsample[0] = 2;
1212         s->mjpeg_hsample[1] = 2>>chroma_h_shift;
1213         s->mjpeg_hsample[2] = 2>>chroma_h_shift;
1214         if (mjpeg_init(s) < 0)
1215             return -1;
1216         avctx->delay=0;
1217         s->low_delay=1;
1218         break;
1219 #ifdef CONFIG_H261_ENCODER
1220     case CODEC_ID_H261:
1221         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
1222             av_log(avctx, AV_LOG_ERROR, "The specified picture size of %dx%d is not valid for the H.261 codec.\nValid sizes are 176x144, 352x288\n", s->width, s->height);
1223             return -1;
1224         }
1225         s->out_format = FMT_H261;
1226         avctx->delay=0;
1227         s->low_delay=1;
1228         break;
1229 #endif
1230     case CODEC_ID_H263:
1231         if (h263_get_picture_format(s->width, s->height) == 7) {
1232             av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
1233             return -1;
1234         }
1235         s->out_format = FMT_H263;
1236         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1237         avctx->delay=0;
1238         s->low_delay=1;
1239         break;
1240     case CODEC_ID_H263P:
1241         s->out_format = FMT_H263;
1242         s->h263_plus = 1;
1243         /* Fx */
1244         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1245         s->h263_aic= (avctx->flags & CODEC_FLAG_AC_PRED) ? 1:0;
1246         s->modified_quant= s->h263_aic;
1247         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1248         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1249         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1250         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1251         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1252
1253         /* /Fx */
1254         /* These are just to be sure */
1255         avctx->delay=0;
1256         s->low_delay=1;
1257         break;
1258     case CODEC_ID_FLV1:
1259         s->out_format = FMT_H263;
1260         s->h263_flv = 2; /* format = 1; 11-bit codes */
1261         s->unrestricted_mv = 1;
1262         s->rtp_mode=0; /* don't allow GOB */
1263         avctx->delay=0;
1264         s->low_delay=1;
1265         break;
1266     case CODEC_ID_RV10:
1267         s->out_format = FMT_H263;
1268         avctx->delay=0;
1269         s->low_delay=1;
1270         break;
1271     case CODEC_ID_RV20:
1272         s->out_format = FMT_H263;
1273         avctx->delay=0;
1274         s->low_delay=1;
1275         s->modified_quant=1;
1276         s->h263_aic=1;
1277         s->h263_plus=1;
1278         s->loop_filter=1;
1279         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1280         break;
1281     case CODEC_ID_MPEG4:
1282         s->out_format = FMT_H263;
1283         s->h263_pred = 1;
1284         s->unrestricted_mv = 1;
1285         s->low_delay= s->max_b_frames ? 0 : 1;
1286         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1287         break;
1288     case CODEC_ID_MSMPEG4V1:
1289         s->out_format = FMT_H263;
1290         s->h263_msmpeg4 = 1;
1291         s->h263_pred = 1;
1292         s->unrestricted_mv = 1;
1293         s->msmpeg4_version= 1;
1294         avctx->delay=0;
1295         s->low_delay=1;
1296         break;
1297     case CODEC_ID_MSMPEG4V2:
1298         s->out_format = FMT_H263;
1299         s->h263_msmpeg4 = 1;
1300         s->h263_pred = 1;
1301         s->unrestricted_mv = 1;
1302         s->msmpeg4_version= 2;
1303         avctx->delay=0;
1304         s->low_delay=1;
1305         break;
1306     case CODEC_ID_MSMPEG4V3:
1307         s->out_format = FMT_H263;
1308         s->h263_msmpeg4 = 1;
1309         s->h263_pred = 1;
1310         s->unrestricted_mv = 1;
1311         s->msmpeg4_version= 3;
1312         s->flipflop_rounding=1;
1313         avctx->delay=0;
1314         s->low_delay=1;
1315         break;
1316     case CODEC_ID_WMV1:
1317         s->out_format = FMT_H263;
1318         s->h263_msmpeg4 = 1;
1319         s->h263_pred = 1;
1320         s->unrestricted_mv = 1;
1321         s->msmpeg4_version= 4;
1322         s->flipflop_rounding=1;
1323         avctx->delay=0;
1324         s->low_delay=1;
1325         break;
1326     case CODEC_ID_WMV2:
1327         s->out_format = FMT_H263;
1328         s->h263_msmpeg4 = 1;
1329         s->h263_pred = 1;
1330         s->unrestricted_mv = 1;
1331         s->msmpeg4_version= 5;
1332         s->flipflop_rounding=1;
1333         avctx->delay=0;
1334         s->low_delay=1;
1335         break;
1336     default:
1337         return -1;
1338     }
1339
1340     avctx->has_b_frames= !s->low_delay;
1341
1342     s->encoding = 1;
1343
1344     /* init */
1345     if (MPV_common_init(s) < 0)
1346         return -1;
1347
1348     if(s->modified_quant)
1349         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1350     s->progressive_frame=
1351     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN));
1352     s->quant_precision=5;
1353
1354     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1355     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1356
1357 #ifdef CONFIG_H261_ENCODER
1358     if (s->out_format == FMT_H261)
1359         ff_h261_encode_init(s);
1360 #endif
1361     if (s->out_format == FMT_H263)
1362         h263_encode_init(s);
1363     if(s->msmpeg4_version)
1364         ff_msmpeg4_encode_init(s);
1365     if (s->out_format == FMT_MPEG1)
1366         ff_mpeg1_encode_init(s);
1367
1368     /* init q matrix */
1369     for(i=0;i<64;i++) {
1370         int j= s->dsp.idct_permutation[i];
1371         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1372             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1373             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1374         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1375             s->intra_matrix[j] =
1376             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1377         }else
1378         { /* mpeg1/2 */
1379             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1380             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1381         }
1382         if(s->avctx->intra_matrix)
1383             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1384         if(s->avctx->inter_matrix)
1385             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1386     }
1387
1388     /* precompute matrix */
1389     /* for mjpeg, we do include qscale in the matrix */
1390     if (s->out_format != FMT_MJPEG) {
1391         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1392                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1393         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1394                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1395     }
1396
1397     if(ff_rate_control_init(s) < 0)
1398         return -1;
1399
1400     return 0;
1401 }
1402
1403 int MPV_encode_end(AVCodecContext *avctx)
1404 {
1405     MpegEncContext *s = avctx->priv_data;
1406
1407     ff_rate_control_uninit(s);
1408
1409     MPV_common_end(s);
1410     if (s->out_format == FMT_MJPEG)
1411         mjpeg_close(s);
1412
1413     av_freep(&avctx->extradata);
1414
1415     return 0;
1416 }
1417
1418 #endif //CONFIG_ENCODERS
1419
1420 void init_rl(RLTable *rl, int use_static)
1421 {
1422     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1423     uint8_t index_run[MAX_RUN+1];
1424     int last, run, level, start, end, i;
1425
1426     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1427     if(use_static && rl->max_level[0])
1428         return;
1429
1430     /* compute max_level[], max_run[] and index_run[] */
1431     for(last=0;last<2;last++) {
1432         if (last == 0) {
1433             start = 0;
1434             end = rl->last;
1435         } else {
1436             start = rl->last;
1437             end = rl->n;
1438         }
1439
1440         memset(max_level, 0, MAX_RUN + 1);
1441         memset(max_run, 0, MAX_LEVEL + 1);
1442         memset(index_run, rl->n, MAX_RUN + 1);
1443         for(i=start;i<end;i++) {
1444             run = rl->table_run[i];
1445             level = rl->table_level[i];
1446             if (index_run[run] == rl->n)
1447                 index_run[run] = i;
1448             if (level > max_level[run])
1449                 max_level[run] = level;
1450             if (run > max_run[level])
1451                 max_run[level] = run;
1452         }
1453         if(use_static)
1454             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1455         else
1456             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1457         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1458         if(use_static)
1459             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1460         else
1461             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1462         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1463         if(use_static)
1464             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1465         else
1466             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1467         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1468     }
1469 }
1470
1471 /* draw the edges of width 'w' of an image of size width, height */
1472 //FIXME check that this is ok for mpeg4 interlaced
1473 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1474 {
1475     uint8_t *ptr, *last_line;
1476     int i;
1477
1478     last_line = buf + (height - 1) * wrap;
1479     for(i=0;i<w;i++) {
1480         /* top and bottom */
1481         memcpy(buf - (i + 1) * wrap, buf, width);
1482         memcpy(last_line + (i + 1) * wrap, last_line, width);
1483     }
1484     /* left and right */
1485     ptr = buf;
1486     for(i=0;i<height;i++) {
1487         memset(ptr - w, ptr[0], w);
1488         memset(ptr + width, ptr[width-1], w);
1489         ptr += wrap;
1490     }
1491     /* corners */
1492     for(i=0;i<w;i++) {
1493         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1494         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1495         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1496         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1497     }
1498 }
1499
1500 int ff_find_unused_picture(MpegEncContext *s, int shared){
1501     int i;
1502
1503     if(shared){
1504         for(i=0; i<MAX_PICTURE_COUNT; i++){
1505             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1506         }
1507     }else{
1508         for(i=0; i<MAX_PICTURE_COUNT; i++){
1509             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1510         }
1511         for(i=0; i<MAX_PICTURE_COUNT; i++){
1512             if(s->picture[i].data[0]==NULL) return i;
1513         }
1514     }
1515
1516     assert(0);
1517     return -1;
1518 }
1519
1520 static void update_noise_reduction(MpegEncContext *s){
1521     int intra, i;
1522
1523     for(intra=0; intra<2; intra++){
1524         if(s->dct_count[intra] > (1<<16)){
1525             for(i=0; i<64; i++){
1526                 s->dct_error_sum[intra][i] >>=1;
1527             }
1528             s->dct_count[intra] >>= 1;
1529         }
1530
1531         for(i=0; i<64; i++){
1532             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1533         }
1534     }
1535 }
1536
1537 /**
1538  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1539  */
1540 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1541 {
1542     int i;
1543     AVFrame *pic;
1544     s->mb_skipped = 0;
1545
1546     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1547
1548     /* mark&release old frames */
1549     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1550       if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1551         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1552
1553         /* release forgotten pictures */
1554         /* if(mpeg124/h263) */
1555         if(!s->encoding){
1556             for(i=0; i<MAX_PICTURE_COUNT; i++){
1557                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1558                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1559                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1560                 }
1561             }
1562         }
1563       }
1564     }
1565 alloc:
1566     if(!s->encoding){
1567         /* release non reference frames */
1568         for(i=0; i<MAX_PICTURE_COUNT; i++){
1569             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1570                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1571             }
1572         }
1573
1574         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1575             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1576         else{
1577             i= ff_find_unused_picture(s, 0);
1578             pic= (AVFrame*)&s->picture[i];
1579         }
1580
1581         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1582                         && !s->dropable ? 3 : 0;
1583
1584         pic->coded_picture_number= s->coded_picture_number++;
1585
1586         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1587             return -1;
1588
1589         s->current_picture_ptr= (Picture*)pic;
1590         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1591         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1592     }
1593
1594     s->current_picture_ptr->pict_type= s->pict_type;
1595 //    if(s->flags && CODEC_FLAG_QSCALE)
1596   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1597     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1598
1599     copy_picture(&s->current_picture, s->current_picture_ptr);
1600
1601     if (s->pict_type != B_TYPE) {
1602         s->last_picture_ptr= s->next_picture_ptr;
1603         if(!s->dropable)
1604             s->next_picture_ptr= s->current_picture_ptr;
1605     }
1606 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1607         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1608         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1609         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1610         s->pict_type, s->dropable);*/
1611
1612     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1613     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1614
1615     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL) && !s->dropable){
1616         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1617         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1618         goto alloc;
1619     }
1620
1621     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1622
1623     if(s->picture_structure!=PICT_FRAME){
1624         int i;
1625         for(i=0; i<4; i++){
1626             if(s->picture_structure == PICT_BOTTOM_FIELD){
1627                  s->current_picture.data[i] += s->current_picture.linesize[i];
1628             }
1629             s->current_picture.linesize[i] *= 2;
1630             s->last_picture.linesize[i] *=2;
1631             s->next_picture.linesize[i] *=2;
1632         }
1633     }
1634
1635     s->hurry_up= s->avctx->hurry_up;
1636     s->error_resilience= avctx->error_resilience;
1637
1638     /* set dequantizer, we can't do it during init as it might change for mpeg4
1639        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1640     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1641         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1642         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1643     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1644         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1645         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1646     }else{
1647         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1648         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1649     }
1650
1651     if(s->dct_error_sum){
1652         assert(s->avctx->noise_reduction && s->encoding);
1653
1654         update_noise_reduction(s);
1655     }
1656
1657 #ifdef HAVE_XVMC
1658     if(s->avctx->xvmc_acceleration)
1659         return XVMC_field_start(s, avctx);
1660 #endif
1661     return 0;
1662 }
1663
1664 /* generic function for encode/decode called after a frame has been coded/decoded */
1665 void MPV_frame_end(MpegEncContext *s)
1666 {
1667     int i;
1668     /* draw edge for correct motion prediction if outside */
1669 #ifdef HAVE_XVMC
1670 //just to make sure that all data is rendered.
1671     if(s->avctx->xvmc_acceleration){
1672         XVMC_field_end(s);
1673     }else
1674 #endif
1675     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1676             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1677             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1678             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1679     }
1680     emms_c();
1681
1682     s->last_pict_type    = s->pict_type;
1683     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1684     if(s->pict_type!=B_TYPE){
1685         s->last_non_b_pict_type= s->pict_type;
1686     }
1687 #if 0
1688         /* copy back current_picture variables */
1689     for(i=0; i<MAX_PICTURE_COUNT; i++){
1690         if(s->picture[i].data[0] == s->current_picture.data[0]){
1691             s->picture[i]= s->current_picture;
1692             break;
1693         }
1694     }
1695     assert(i<MAX_PICTURE_COUNT);
1696 #endif
1697
1698     if(s->encoding){
1699         /* release non-reference frames */
1700         for(i=0; i<MAX_PICTURE_COUNT; i++){
1701             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1702                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1703             }
1704         }
1705     }
1706     // clear copies, to avoid confusion
1707 #if 0
1708     memset(&s->last_picture, 0, sizeof(Picture));
1709     memset(&s->next_picture, 0, sizeof(Picture));
1710     memset(&s->current_picture, 0, sizeof(Picture));
1711 #endif
1712     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1713 }
1714
1715 /**
1716  * draws an line from (ex, ey) -> (sx, sy).
1717  * @param w width of the image
1718  * @param h height of the image
1719  * @param stride stride/linesize of the image
1720  * @param color color of the arrow
1721  */
1722 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1723     int x, y, fr, f;
1724
1725     sx= av_clip(sx, 0, w-1);
1726     sy= av_clip(sy, 0, h-1);
1727     ex= av_clip(ex, 0, w-1);
1728     ey= av_clip(ey, 0, h-1);
1729
1730     buf[sy*stride + sx]+= color;
1731
1732     if(FFABS(ex - sx) > FFABS(ey - sy)){
1733         if(sx > ex){
1734             FFSWAP(int, sx, ex);
1735             FFSWAP(int, sy, ey);
1736         }
1737         buf+= sx + sy*stride;
1738         ex-= sx;
1739         f= ((ey-sy)<<16)/ex;
1740         for(x= 0; x <= ex; x++){
1741             y = (x*f)>>16;
1742             fr= (x*f)&0xFFFF;
1743             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1744             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1745         }
1746     }else{
1747         if(sy > ey){
1748             FFSWAP(int, sx, ex);
1749             FFSWAP(int, sy, ey);
1750         }
1751         buf+= sx + sy*stride;
1752         ey-= sy;
1753         if(ey) f= ((ex-sx)<<16)/ey;
1754         else   f= 0;
1755         for(y= 0; y <= ey; y++){
1756             x = (y*f)>>16;
1757             fr= (y*f)&0xFFFF;
1758             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1759             buf[y*stride + x+1]+= (color*         fr )>>16;;
1760         }
1761     }
1762 }
1763
1764 /**
1765  * draws an arrow from (ex, ey) -> (sx, sy).
1766  * @param w width of the image
1767  * @param h height of the image
1768  * @param stride stride/linesize of the image
1769  * @param color color of the arrow
1770  */
1771 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1772     int dx,dy;
1773
1774     sx= av_clip(sx, -100, w+100);
1775     sy= av_clip(sy, -100, h+100);
1776     ex= av_clip(ex, -100, w+100);
1777     ey= av_clip(ey, -100, h+100);
1778
1779     dx= ex - sx;
1780     dy= ey - sy;
1781
1782     if(dx*dx + dy*dy > 3*3){
1783         int rx=  dx + dy;
1784         int ry= -dx + dy;
1785         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1786
1787         //FIXME subpixel accuracy
1788         rx= ROUNDED_DIV(rx*3<<4, length);
1789         ry= ROUNDED_DIV(ry*3<<4, length);
1790
1791         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1792         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1793     }
1794     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1795 }
1796
1797 /**
1798  * prints debuging info for the given picture.
1799  */
1800 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1801
1802     if(!pict || !pict->mb_type) return;
1803
1804     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1805         int x,y;
1806
1807         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1808         switch (pict->pict_type) {
1809             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1810             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1811             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1812             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1813             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1814             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1815         }
1816         for(y=0; y<s->mb_height; y++){
1817             for(x=0; x<s->mb_width; x++){
1818                 if(s->avctx->debug&FF_DEBUG_SKIP){
1819                     int count= s->mbskip_table[x + y*s->mb_stride];
1820                     if(count>9) count=9;
1821                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1822                 }
1823                 if(s->avctx->debug&FF_DEBUG_QP){
1824                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1825                 }
1826                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1827                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1828                     //Type & MV direction
1829                     if(IS_PCM(mb_type))
1830                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1831                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1832                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1833                     else if(IS_INTRA4x4(mb_type))
1834                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1835                     else if(IS_INTRA16x16(mb_type))
1836                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1837                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1838                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1839                     else if(IS_DIRECT(mb_type))
1840                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1841                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1842                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1843                     else if(IS_GMC(mb_type))
1844                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1845                     else if(IS_SKIP(mb_type))
1846                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1847                     else if(!USES_LIST(mb_type, 1))
1848                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1849                     else if(!USES_LIST(mb_type, 0))
1850                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1851                     else{
1852                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1853                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1854                     }
1855
1856                     //segmentation
1857                     if(IS_8X8(mb_type))
1858                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1859                     else if(IS_16X8(mb_type))
1860                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1861                     else if(IS_8X16(mb_type))
1862                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1863                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1864                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1865                     else
1866                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1867
1868
1869                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1870                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1871                     else
1872                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1873                 }
1874 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1875             }
1876             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1877         }
1878     }
1879
1880     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1881         const int shift= 1 + s->quarter_sample;
1882         int mb_y;
1883         uint8_t *ptr;
1884         int i;
1885         int h_chroma_shift, v_chroma_shift;
1886         const int width = s->avctx->width;
1887         const int height= s->avctx->height;
1888         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1889         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1890         s->low_delay=0; //needed to see the vectors without trashing the buffers
1891
1892         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1893         for(i=0; i<3; i++){
1894             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1895             pict->data[i]= s->visualization_buffer[i];
1896         }
1897         pict->type= FF_BUFFER_TYPE_COPY;
1898         ptr= pict->data[0];
1899
1900         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1901             int mb_x;
1902             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1903                 const int mb_index= mb_x + mb_y*s->mb_stride;
1904                 if((s->avctx->debug_mv) && pict->motion_val){
1905                   int type;
1906                   for(type=0; type<3; type++){
1907                     int direction = 0;
1908                     switch (type) {
1909                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1910                                 continue;
1911                               direction = 0;
1912                               break;
1913                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1914                                 continue;
1915                               direction = 0;
1916                               break;
1917                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1918                                 continue;
1919                               direction = 1;
1920                               break;
1921                     }
1922                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1923                         continue;
1924
1925                     if(IS_8X8(pict->mb_type[mb_index])){
1926                       int i;
1927                       for(i=0; i<4; i++){
1928                         int sx= mb_x*16 + 4 + 8*(i&1);
1929                         int sy= mb_y*16 + 4 + 8*(i>>1);
1930                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1931                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1932                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1933                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1934                       }
1935                     }else if(IS_16X8(pict->mb_type[mb_index])){
1936                       int i;
1937                       for(i=0; i<2; i++){
1938                         int sx=mb_x*16 + 8;
1939                         int sy=mb_y*16 + 4 + 8*i;
1940                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1941                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1942                         int my=(pict->motion_val[direction][xy][1]>>shift);
1943
1944                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1945                             my*=2;
1946
1947                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1948                       }
1949                     }else if(IS_8X16(pict->mb_type[mb_index])){
1950                       int i;
1951                       for(i=0; i<2; i++){
1952                         int sx=mb_x*16 + 4 + 8*i;
1953                         int sy=mb_y*16 + 8;
1954                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1955                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1956                         int my=(pict->motion_val[direction][xy][1]>>shift);
1957
1958                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1959                             my*=2;
1960
1961                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1962                       }
1963                     }else{
1964                       int sx= mb_x*16 + 8;
1965                       int sy= mb_y*16 + 8;
1966                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1967                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1968                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1969                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1970                     }
1971                   }
1972                 }
1973                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1974                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1975                     int y;
1976                     for(y=0; y<8; y++){
1977                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1978                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1979                     }
1980                 }
1981                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1982                     int mb_type= pict->mb_type[mb_index];
1983                     uint64_t u,v;
1984                     int y;
1985 #define COLOR(theta, r)\
1986 u= (int)(128 + r*cos(theta*3.141592/180));\
1987 v= (int)(128 + r*sin(theta*3.141592/180));
1988
1989
1990                     u=v=128;
1991                     if(IS_PCM(mb_type)){
1992                         COLOR(120,48)
1993                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1994                         COLOR(30,48)
1995                     }else if(IS_INTRA4x4(mb_type)){
1996                         COLOR(90,48)
1997                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1998 //                        COLOR(120,48)
1999                     }else if(IS_DIRECT(mb_type)){
2000                         COLOR(150,48)
2001                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
2002                         COLOR(170,48)
2003                     }else if(IS_GMC(mb_type)){
2004                         COLOR(190,48)
2005                     }else if(IS_SKIP(mb_type)){
2006 //                        COLOR(180,48)
2007                     }else if(!USES_LIST(mb_type, 1)){
2008                         COLOR(240,48)
2009                     }else if(!USES_LIST(mb_type, 0)){
2010                         COLOR(0,48)
2011                     }else{
2012                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
2013                         COLOR(300,48)
2014                     }
2015
2016                     u*= 0x0101010101010101ULL;
2017                     v*= 0x0101010101010101ULL;
2018                     for(y=0; y<8; y++){
2019                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
2020                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
2021                     }
2022
2023                     //segmentation
2024                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
2025                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
2026                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
2027                     }
2028                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
2029                         for(y=0; y<16; y++)
2030                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
2031                     }
2032                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
2033                         int dm= 1 << (mv_sample_log2-2);
2034                         for(i=0; i<4; i++){
2035                             int sx= mb_x*16 + 8*(i&1);
2036                             int sy= mb_y*16 + 8*(i>>1);
2037                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
2038                             //FIXME bidir
2039                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
2040                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
2041                                 for(y=0; y<8; y++)
2042                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
2043                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
2044                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
2045                         }
2046                     }
2047
2048                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
2049                         // hmm
2050                     }
2051                 }
2052                 s->mbskip_table[mb_index]=0;
2053             }
2054         }
2055     }
2056 }
2057
2058 #ifdef CONFIG_ENCODERS
2059
2060 static int get_sae(uint8_t *src, int ref, int stride){
2061     int x,y;
2062     int acc=0;
2063
2064     for(y=0; y<16; y++){
2065         for(x=0; x<16; x++){
2066             acc+= FFABS(src[x+y*stride] - ref);
2067         }
2068     }
2069
2070     return acc;
2071 }
2072
2073 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
2074     int x, y, w, h;
2075     int acc=0;
2076
2077     w= s->width &~15;
2078     h= s->height&~15;
2079
2080     for(y=0; y<h; y+=16){
2081         for(x=0; x<w; x+=16){
2082             int offset= x + y*stride;
2083             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
2084             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
2085             int sae = get_sae(src + offset, mean, stride);
2086
2087             acc+= sae + 500 < sad;
2088         }
2089     }
2090     return acc;
2091 }
2092
2093
2094 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2095     AVFrame *pic=NULL;
2096     int64_t pts;
2097     int i;
2098     const int encoding_delay= s->max_b_frames;
2099     int direct=1;
2100
2101     if(pic_arg){
2102         pts= pic_arg->pts;
2103         pic_arg->display_picture_number= s->input_picture_number++;
2104
2105         if(pts != AV_NOPTS_VALUE){
2106             if(s->user_specified_pts != AV_NOPTS_VALUE){
2107                 int64_t time= pts;
2108                 int64_t last= s->user_specified_pts;
2109
2110                 if(time <= last){
2111                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2112                     return -1;
2113                 }
2114             }
2115             s->user_specified_pts= pts;
2116         }else{
2117             if(s->user_specified_pts != AV_NOPTS_VALUE){
2118                 s->user_specified_pts=
2119                 pts= s->user_specified_pts + 1;
2120                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2121             }else{
2122                 pts= pic_arg->display_picture_number;
2123             }
2124         }
2125     }
2126
2127   if(pic_arg){
2128     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2129     if(pic_arg->linesize[0] != s->linesize) direct=0;
2130     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2131     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2132
2133 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2134
2135     if(direct){
2136         i= ff_find_unused_picture(s, 1);
2137
2138         pic= (AVFrame*)&s->picture[i];
2139         pic->reference= 3;
2140
2141         for(i=0; i<4; i++){
2142             pic->data[i]= pic_arg->data[i];
2143             pic->linesize[i]= pic_arg->linesize[i];
2144         }
2145         alloc_picture(s, (Picture*)pic, 1);
2146     }else{
2147         i= ff_find_unused_picture(s, 0);
2148
2149         pic= (AVFrame*)&s->picture[i];
2150         pic->reference= 3;
2151
2152         alloc_picture(s, (Picture*)pic, 0);
2153
2154         if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
2155            && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
2156            && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
2157        // empty
2158         }else{
2159             int h_chroma_shift, v_chroma_shift;
2160             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2161
2162             for(i=0; i<3; i++){
2163                 int src_stride= pic_arg->linesize[i];
2164                 int dst_stride= i ? s->uvlinesize : s->linesize;
2165                 int h_shift= i ? h_chroma_shift : 0;
2166                 int v_shift= i ? v_chroma_shift : 0;
2167                 int w= s->width >>h_shift;
2168                 int h= s->height>>v_shift;
2169                 uint8_t *src= pic_arg->data[i];
2170                 uint8_t *dst= pic->data[i];
2171
2172                 if(!s->avctx->rc_buffer_size)
2173                     dst +=INPLACE_OFFSET;
2174
2175                 if(src_stride==dst_stride)
2176                     memcpy(dst, src, src_stride*h);
2177                 else{
2178                     while(h--){
2179                         memcpy(dst, src, w);
2180                         dst += dst_stride;
2181                         src += src_stride;
2182                     }
2183                 }
2184             }
2185         }
2186     }
2187     copy_picture_attributes(s, pic, pic_arg);
2188     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2189   }
2190
2191     /* shift buffer entries */
2192     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2193         s->input_picture[i-1]= s->input_picture[i];
2194
2195     s->input_picture[encoding_delay]= (Picture*)pic;
2196
2197     return 0;
2198 }
2199
2200 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2201     int x, y, plane;
2202     int score=0;
2203     int64_t score64=0;
2204
2205     for(plane=0; plane<3; plane++){
2206         const int stride= p->linesize[plane];
2207         const int bw= plane ? 1 : 2;
2208         for(y=0; y<s->mb_height*bw; y++){
2209             for(x=0; x<s->mb_width*bw; x++){
2210                 int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2211                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2212
2213                 switch(s->avctx->frame_skip_exp){
2214                     case 0: score= FFMAX(score, v); break;
2215                     case 1: score+= FFABS(v);break;
2216                     case 2: score+= v*v;break;
2217                     case 3: score64+= FFABS(v*v*(int64_t)v);break;
2218                     case 4: score64+= v*v*(int64_t)(v*v);break;
2219                 }
2220             }
2221         }
2222     }
2223
2224     if(score) score64= score;
2225
2226     if(score64 < s->avctx->frame_skip_threshold)
2227         return 1;
2228     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2229         return 1;
2230     return 0;
2231 }
2232
2233 static int estimate_best_b_count(MpegEncContext *s){
2234     AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2235     AVCodecContext *c= avcodec_alloc_context();
2236     AVFrame input[FF_MAX_B_FRAMES+2];
2237     const int scale= s->avctx->brd_scale;
2238     int i, j, out_size, p_lambda, b_lambda, lambda2;
2239     int outbuf_size= s->width * s->height; //FIXME
2240     uint8_t *outbuf= av_malloc(outbuf_size);
2241     int64_t best_rd= INT64_MAX;
2242     int best_b_count= -1;
2243
2244     assert(scale>=0 && scale <=3);
2245
2246 //    emms_c();
2247     p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
2248     b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
2249     if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
2250     lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
2251
2252     c->width = s->width >> scale;
2253     c->height= s->height>> scale;
2254     c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2255     c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2256     c->mb_decision= s->avctx->mb_decision;
2257     c->me_cmp= s->avctx->me_cmp;
2258     c->mb_cmp= s->avctx->mb_cmp;
2259     c->me_sub_cmp= s->avctx->me_sub_cmp;
2260     c->pix_fmt = PIX_FMT_YUV420P;
2261     c->time_base= s->avctx->time_base;
2262     c->max_b_frames= s->max_b_frames;
2263
2264     if (avcodec_open(c, codec) < 0)
2265         return -1;
2266
2267     for(i=0; i<s->max_b_frames+2; i++){
2268         int ysize= c->width*c->height;
2269         int csize= (c->width/2)*(c->height/2);
2270         Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
2271
2272         avcodec_get_frame_defaults(&input[i]);
2273         input[i].data[0]= av_malloc(ysize + 2*csize);
2274         input[i].data[1]= input[i].data[0] + ysize;
2275         input[i].data[2]= input[i].data[1] + csize;
2276         input[i].linesize[0]= c->width;
2277         input[i].linesize[1]=
2278         input[i].linesize[2]= c->width/2;
2279
2280         if(pre_input_ptr && (!i || s->input_picture[i-1])) {
2281             pre_input= *pre_input_ptr;
2282
2283             if(pre_input.type != FF_BUFFER_TYPE_SHARED && i) {
2284                 pre_input.data[0]+=INPLACE_OFFSET;
2285                 pre_input.data[1]+=INPLACE_OFFSET;
2286                 pre_input.data[2]+=INPLACE_OFFSET;
2287             }
2288
2289             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0], pre_input.data[0], pre_input.linesize[0], c->width, c->height);
2290             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1], pre_input.data[1], pre_input.linesize[1], c->width>>1, c->height>>1);
2291             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2], pre_input.data[2], pre_input.linesize[2], c->width>>1, c->height>>1);
2292         }
2293     }
2294
2295     for(j=0; j<s->max_b_frames+1; j++){
2296         int64_t rd=0;
2297
2298         if(!s->input_picture[j])
2299             break;
2300
2301         c->error[0]= c->error[1]= c->error[2]= 0;
2302
2303         input[0].pict_type= I_TYPE;
2304         input[0].quality= 1 * FF_QP2LAMBDA;
2305         out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2306 //        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2307
2308         for(i=0; i<s->max_b_frames+1; i++){
2309             int is_p= i % (j+1) == j || i==s->max_b_frames;
2310
2311             input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2312             input[i+1].quality= is_p ? p_lambda : b_lambda;
2313             out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2314             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2315         }
2316
2317         /* get the delayed frames */
2318         while(out_size){
2319             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2320             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2321         }
2322
2323         rd += c->error[0] + c->error[1] + c->error[2];
2324
2325         if(rd < best_rd){
2326             best_rd= rd;
2327             best_b_count= j;
2328         }
2329     }
2330
2331     av_freep(&outbuf);
2332     avcodec_close(c);
2333     av_freep(&c);
2334
2335     for(i=0; i<s->max_b_frames+2; i++){
2336         av_freep(&input[i].data[0]);
2337     }
2338
2339     return best_b_count;
2340 }
2341
2342 static void select_input_picture(MpegEncContext *s){
2343     int i;
2344
2345     for(i=1; i<MAX_PICTURE_COUNT; i++)
2346         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2347     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2348
2349     /* set next picture type & ordering */
2350     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2351         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2352             s->reordered_input_picture[0]= s->input_picture[0];
2353             s->reordered_input_picture[0]->pict_type= I_TYPE;
2354             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2355         }else{
2356             int b_frames;
2357
2358             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2359                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2360                 //FIXME check that te gop check above is +-1 correct
2361 //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2362
2363                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2364                         for(i=0; i<4; i++)
2365                             s->input_picture[0]->data[i]= NULL;
2366                         s->input_picture[0]->type= 0;
2367                     }else{
2368                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2369                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2370
2371                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2372                     }
2373
2374                     emms_c();
2375                     ff_vbv_update(s, 0);
2376
2377                     goto no_output_pic;
2378                 }
2379             }
2380
2381             if(s->flags&CODEC_FLAG_PASS2){
2382                 for(i=0; i<s->max_b_frames+1; i++){
2383                     int pict_num= s->input_picture[0]->display_picture_number + i;
2384
2385                     if(pict_num >= s->rc_context.num_entries)
2386                         break;
2387                     if(!s->input_picture[i]){
2388                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2389                         break;
2390                     }
2391
2392                     s->input_picture[i]->pict_type=
2393                         s->rc_context.entry[pict_num].new_pict_type;
2394                 }
2395             }
2396
2397             if(s->avctx->b_frame_strategy==0){
2398                 b_frames= s->max_b_frames;
2399                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2400             }else if(s->avctx->b_frame_strategy==1){
2401                 for(i=1; i<s->max_b_frames+1; i++){
2402                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2403                         s->input_picture[i]->b_frame_score=
2404                             get_intra_count(s, s->input_picture[i  ]->data[0],
2405                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2406                     }
2407                 }
2408                 for(i=0; i<s->max_b_frames+1; i++){
2409                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/s->avctx->b_sensitivity) break;
2410                 }
2411
2412                 b_frames= FFMAX(0, i-1);
2413
2414                 /* reset scores */
2415                 for(i=0; i<b_frames+1; i++){
2416                     s->input_picture[i]->b_frame_score=0;
2417                 }
2418             }else if(s->avctx->b_frame_strategy==2){
2419                 b_frames= estimate_best_b_count(s);
2420             }else{
2421                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2422                 b_frames=0;
2423             }
2424
2425             emms_c();
2426 //static int b_count=0;
2427 //b_count+= b_frames;
2428 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2429
2430             for(i= b_frames - 1; i>=0; i--){
2431                 int type= s->input_picture[i]->pict_type;
2432                 if(type && type != B_TYPE)
2433                     b_frames= i;
2434             }
2435             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2436                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2437             }
2438
2439             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2440               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2441                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2442               }else{
2443                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2444                     b_frames=0;
2445                 s->input_picture[b_frames]->pict_type= I_TYPE;
2446               }
2447             }
2448
2449             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2450                && b_frames
2451                && s->input_picture[b_frames]->pict_type== I_TYPE)
2452                 b_frames--;
2453
2454             s->reordered_input_picture[0]= s->input_picture[b_frames];
2455             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2456                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2457             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2458             for(i=0; i<b_frames; i++){
2459                 s->reordered_input_picture[i+1]= s->input_picture[i];
2460                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2461                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2462             }
2463         }
2464     }
2465 no_output_pic:
2466     if(s->reordered_input_picture[0]){
2467         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2468
2469         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2470
2471         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED || s->avctx->rc_buffer_size){
2472             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2473
2474             int i= ff_find_unused_picture(s, 0);
2475             Picture *pic= &s->picture[i];
2476
2477             pic->reference              = s->reordered_input_picture[0]->reference;
2478             alloc_picture(s, pic, 0);
2479
2480             /* mark us unused / free shared pic */
2481             if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_INTERNAL)
2482                 s->avctx->release_buffer(s->avctx, (AVFrame*)s->reordered_input_picture[0]);
2483             for(i=0; i<4; i++)
2484                 s->reordered_input_picture[0]->data[i]= NULL;
2485             s->reordered_input_picture[0]->type= 0;
2486
2487             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2488
2489             s->current_picture_ptr= pic;
2490         }else{
2491             // input is not a shared pix -> reuse buffer for current_pix
2492
2493             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2494                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2495
2496             s->current_picture_ptr= s->reordered_input_picture[0];
2497             for(i=0; i<4; i++){
2498                 s->new_picture.data[i]+= INPLACE_OFFSET;
2499             }
2500         }
2501         copy_picture(&s->current_picture, s->current_picture_ptr);
2502
2503         s->picture_number= s->new_picture.display_picture_number;
2504 //printf("dpn:%d\n", s->picture_number);
2505     }else{
2506        memset(&s->new_picture, 0, sizeof(Picture));
2507     }
2508 }
2509
2510 int MPV_encode_picture(AVCodecContext *avctx,
2511                        unsigned char *buf, int buf_size, void *data)
2512 {
2513     MpegEncContext *s = avctx->priv_data;
2514     AVFrame *pic_arg = data;
2515     int i, stuffing_count;
2516
2517     for(i=0; i<avctx->thread_count; i++){
2518         int start_y= s->thread_context[i]->start_mb_y;
2519         int   end_y= s->thread_context[i]->  end_mb_y;
2520         int h= s->mb_height;
2521         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2522         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2523
2524         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2525     }
2526
2527     s->picture_in_gop_number++;
2528
2529     if(load_input_picture(s, pic_arg) < 0)
2530         return -1;
2531
2532     select_input_picture(s);
2533
2534     /* output? */
2535     if(s->new_picture.data[0]){
2536         s->pict_type= s->new_picture.pict_type;
2537 //emms_c();
2538 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2539         MPV_frame_start(s, avctx);
2540 vbv_retry:
2541         if (encode_picture(s, s->picture_number) < 0)
2542             return -1;
2543
2544         avctx->real_pict_num  = s->picture_number;
2545         avctx->header_bits = s->header_bits;
2546         avctx->mv_bits     = s->mv_bits;
2547         avctx->misc_bits   = s->misc_bits;
2548         avctx->i_tex_bits  = s->i_tex_bits;
2549         avctx->p_tex_bits  = s->p_tex_bits;
2550         avctx->i_count     = s->i_count;
2551         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2552         avctx->skip_count  = s->skip_count;
2553
2554         MPV_frame_end(s);
2555
2556         if (s->out_format == FMT_MJPEG)
2557             mjpeg_picture_trailer(s);
2558
2559         if(avctx->rc_buffer_size){
2560             RateControlContext *rcc= &s->rc_context;
2561             int max_size= rcc->buffer_index/3;
2562
2563             if(put_bits_count(&s->pb) > max_size && s->lambda < s->avctx->lmax){
2564                 s->next_lambda= FFMAX(s->lambda+1, s->lambda*(s->qscale+1) / s->qscale);
2565                 if(s->adaptive_quant){
2566                     int i;
2567                     for(i=0; i<s->mb_height*s->mb_stride; i++)
2568                         s->lambda_table[i]= FFMAX(s->lambda_table[i]+1, s->lambda_table[i]*(s->qscale+1) / s->qscale);
2569                 }
2570                 s->mb_skipped = 0;        //done in MPV_frame_start()
2571                 if(s->pict_type==P_TYPE){ //done in encode_picture() so we must undo it
2572                     if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
2573                         s->no_rounding ^= 1;
2574                 }
2575                 if(s->pict_type!=B_TYPE){
2576                     s->time_base= s->last_time_base;
2577                     s->last_non_b_time= s->time - s->pp_time;
2578                 }
2579 //                av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
2580                 for(i=0; i<avctx->thread_count; i++){
2581                     PutBitContext *pb= &s->thread_context[i]->pb;
2582                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
2583                 }
2584                 goto vbv_retry;
2585             }
2586
2587             assert(s->avctx->rc_max_rate);
2588         }
2589
2590         if(s->flags&CODEC_FLAG_PASS1)
2591             ff_write_pass1_stats(s);
2592
2593         for(i=0; i<4; i++){
2594             s->current_picture_ptr->error[i]= s->current_picture.error[i];
2595             avctx->error[i] += s->current_picture_ptr->error[i];
2596         }
2597
2598         if(s->flags&CODEC_FLAG_PASS1)
2599             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2600         flush_put_bits(&s->pb);
2601         s->frame_bits  = put_bits_count(&s->pb);
2602
2603         stuffing_count= ff_vbv_update(s, s->frame_bits);
2604         if(stuffing_count){
2605             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2606                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2607                 return -1;
2608             }
2609
2610             switch(s->codec_id){
2611             case CODEC_ID_MPEG1VIDEO:
2612             case CODEC_ID_MPEG2VIDEO:
2613                 while(stuffing_count--){
2614                     put_bits(&s->pb, 8, 0);
2615                 }
2616             break;
2617             case CODEC_ID_MPEG4:
2618                 put_bits(&s->pb, 16, 0);
2619                 put_bits(&s->pb, 16, 0x1C3);
2620                 stuffing_count -= 4;
2621                 while(stuffing_count--){
2622                     put_bits(&s->pb, 8, 0xFF);
2623                 }
2624             break;
2625             default:
2626                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2627             }
2628             flush_put_bits(&s->pb);
2629             s->frame_bits  = put_bits_count(&s->pb);
2630         }
2631
2632         /* update mpeg1/2 vbv_delay for CBR */
2633         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2634            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2635             int vbv_delay;
2636
2637             assert(s->repeat_first_field==0);
2638
2639             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2640             assert(vbv_delay < 0xFFFF);
2641
2642             s->vbv_delay_ptr[0] &= 0xF8;
2643             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2644             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2645             s->vbv_delay_ptr[2] &= 0x07;
2646             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2647         }
2648         s->total_bits += s->frame_bits;
2649         avctx->frame_bits  = s->frame_bits;
2650     }else{
2651         assert((pbBufPtr(&s->pb) == s->pb.buf));
2652         s->frame_bits=0;
2653     }
2654     assert((s->frame_bits&7)==0);
2655
2656     return s->frame_bits/8;
2657 }
2658
2659 #endif //CONFIG_ENCODERS
2660
2661 static inline void gmc1_motion(MpegEncContext *s,
2662                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2663                                uint8_t **ref_picture)
2664 {
2665     uint8_t *ptr;
2666     int offset, src_x, src_y, linesize, uvlinesize;
2667     int motion_x, motion_y;
2668     int emu=0;
2669
2670     motion_x= s->sprite_offset[0][0];
2671     motion_y= s->sprite_offset[0][1];
2672     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2673     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2674     motion_x<<=(3-s->sprite_warping_accuracy);
2675     motion_y<<=(3-s->sprite_warping_accuracy);
2676     src_x = av_clip(src_x, -16, s->width);
2677     if (src_x == s->width)
2678         motion_x =0;
2679     src_y = av_clip(src_y, -16, s->height);
2680     if (src_y == s->height)
2681         motion_y =0;
2682
2683     linesize = s->linesize;
2684     uvlinesize = s->uvlinesize;
2685
2686     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2687
2688     if(s->flags&CODEC_FLAG_EMU_EDGE){
2689         if(   (unsigned)src_x >= s->h_edge_pos - 17
2690            || (unsigned)src_y >= s->v_edge_pos - 17){
2691             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2692             ptr= s->edge_emu_buffer;
2693         }
2694     }
2695
2696     if((motion_x|motion_y)&7){
2697         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2698         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2699     }else{
2700         int dxy;
2701
2702         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2703         if (s->no_rounding){
2704             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2705         }else{
2706             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2707         }
2708     }
2709
2710     if(s->flags&CODEC_FLAG_GRAY) return;
2711
2712     motion_x= s->sprite_offset[1][0];
2713     motion_y= s->sprite_offset[1][1];
2714     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2715     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2716     motion_x<<=(3-s->sprite_warping_accuracy);
2717     motion_y<<=(3-s->sprite_warping_accuracy);
2718     src_x = av_clip(src_x, -8, s->width>>1);
2719     if (src_x == s->width>>1)
2720         motion_x =0;
2721     src_y = av_clip(src_y, -8, s->height>>1);
2722     if (src_y == s->height>>1)
2723         motion_y =0;
2724
2725     offset = (src_y * uvlinesize) + src_x;
2726     ptr = ref_picture[1] + offset;
2727     if(s->flags&CODEC_FLAG_EMU_EDGE){
2728         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2729            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2730             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2731             ptr= s->edge_emu_buffer;
2732             emu=1;
2733         }
2734     }
2735     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2736
2737     ptr = ref_picture[2] + offset;
2738     if(emu){
2739         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2740         ptr= s->edge_emu_buffer;
2741     }
2742     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2743
2744     return;
2745 }
2746
2747 static inline void gmc_motion(MpegEncContext *s,
2748                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2749                                uint8_t **ref_picture)
2750 {
2751     uint8_t *ptr;
2752     int linesize, uvlinesize;
2753     const int a= s->sprite_warping_accuracy;
2754     int ox, oy;
2755
2756     linesize = s->linesize;
2757     uvlinesize = s->uvlinesize;
2758
2759     ptr = ref_picture[0];
2760
2761     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2762     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2763
2764     s->dsp.gmc(dest_y, ptr, linesize, 16,
2765            ox,
2766            oy,
2767            s->sprite_delta[0][0], s->sprite_delta[0][1],
2768            s->sprite_delta[1][0], s->sprite_delta[1][1],
2769            a+1, (1<<(2*a+1)) - s->no_rounding,
2770            s->h_edge_pos, s->v_edge_pos);
2771     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2772            ox + s->sprite_delta[0][0]*8,
2773            oy + s->sprite_delta[1][0]*8,
2774            s->sprite_delta[0][0], s->sprite_delta[0][1],
2775            s->sprite_delta[1][0], s->sprite_delta[1][1],
2776            a+1, (1<<(2*a+1)) - s->no_rounding,
2777            s->h_edge_pos, s->v_edge_pos);
2778
2779     if(s->flags&CODEC_FLAG_GRAY) return;
2780
2781     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2782     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2783
2784     ptr = ref_picture[1];
2785     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2786            ox,
2787            oy,
2788            s->sprite_delta[0][0], s->sprite_delta[0][1],
2789            s->sprite_delta[1][0], s->sprite_delta[1][1],
2790            a+1, (1<<(2*a+1)) - s->no_rounding,
2791            s->h_edge_pos>>1, s->v_edge_pos>>1);
2792
2793     ptr = ref_picture[2];
2794     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2795            ox,
2796            oy,
2797            s->sprite_delta[0][0], s->sprite_delta[0][1],
2798            s->sprite_delta[1][0], s->sprite_delta[1][1],
2799            a+1, (1<<(2*a+1)) - s->no_rounding,
2800            s->h_edge_pos>>1, s->v_edge_pos>>1);
2801 }
2802
2803 /**
2804  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2805  * @param buf destination buffer
2806  * @param src source buffer
2807  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2808  * @param block_w width of block
2809  * @param block_h height of block
2810  * @param src_x x coordinate of the top left sample of the block in the source buffer
2811  * @param src_y y coordinate of the top left sample of the block in the source buffer
2812  * @param w width of the source buffer
2813  * @param h height of the source buffer
2814  */
2815 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2816                                     int src_x, int src_y, int w, int h){
2817     int x, y;
2818     int start_y, start_x, end_y, end_x;
2819
2820     if(src_y>= h){
2821         src+= (h-1-src_y)*linesize;
2822         src_y=h-1;
2823     }else if(src_y<=-block_h){
2824         src+= (1-block_h-src_y)*linesize;
2825         src_y=1-block_h;
2826     }
2827     if(src_x>= w){
2828         src+= (w-1-src_x);
2829         src_x=w-1;
2830     }else if(src_x<=-block_w){
2831         src+= (1-block_w-src_x);
2832         src_x=1-block_w;
2833     }
2834
2835     start_y= FFMAX(0, -src_y);
2836     start_x= FFMAX(0, -src_x);
2837     end_y= FFMIN(block_h, h-src_y);
2838     end_x= FFMIN(block_w, w-src_x);
2839
2840     // copy existing part
2841     for(y=start_y; y<end_y; y++){
2842         for(x=start_x; x<end_x; x++){
2843             buf[x + y*linesize]= src[x + y*linesize];
2844         }
2845     }
2846
2847     //top
2848     for(y=0; y<start_y; y++){
2849         for(x=start_x; x<end_x; x++){
2850             buf[x + y*linesize]= buf[x + start_y*linesize];
2851         }
2852     }
2853
2854     //bottom
2855     for(y=end_y; y<block_h; y++){
2856         for(x=start_x; x<end_x; x++){
2857             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2858         }
2859     }
2860
2861     for(y=0; y<block_h; y++){
2862        //left
2863         for(x=0; x<start_x; x++){
2864             buf[x + y*linesize]= buf[start_x + y*linesize];
2865         }
2866
2867        //right
2868         for(x=end_x; x<block_w; x++){
2869             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2870         }
2871     }
2872 }
2873
2874 static inline int hpel_motion(MpegEncContext *s,
2875                                   uint8_t *dest, uint8_t *src,
2876                                   int field_based, int field_select,
2877                                   int src_x, int src_y,
2878                                   int width, int height, int stride,
2879                                   int h_edge_pos, int v_edge_pos,
2880                                   int w, int h, op_pixels_func *pix_op,
2881                                   int motion_x, int motion_y)
2882 {
2883     int dxy;
2884     int emu=0;
2885
2886     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2887     src_x += motion_x >> 1;
2888     src_y += motion_y >> 1;
2889
2890     /* WARNING: do no forget half pels */
2891     src_x = av_clip(src_x, -16, width); //FIXME unneeded for emu?
2892     if (src_x == width)
2893         dxy &= ~1;
2894     src_y = av_clip(src_y, -16, height);
2895     if (src_y == height)
2896         dxy &= ~2;
2897     src += src_y * stride + src_x;
2898
2899     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2900         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2901            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2902             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2903                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2904             src= s->edge_emu_buffer;
2905             emu=1;
2906         }
2907     }
2908     if(field_select)
2909         src += s->linesize;
2910     pix_op[dxy](dest, src, stride, h);
2911     return emu;
2912 }
2913
2914 static inline int hpel_motion_lowres(MpegEncContext *s,
2915                                   uint8_t *dest, uint8_t *src,
2916                                   int field_based, int field_select,
2917                                   int src_x, int src_y,
2918                                   int width, int height, int stride,
2919                                   int h_edge_pos, int v_edge_pos,
2920                                   int w, int h, h264_chroma_mc_func *pix_op,
2921                                   int motion_x, int motion_y)
2922 {
2923     const int lowres= s->avctx->lowres;
2924     const int s_mask= (2<<lowres)-1;
2925     int emu=0;
2926     int sx, sy;
2927
2928     if(s->quarter_sample){
2929         motion_x/=2;
2930         motion_y/=2;
2931     }
2932
2933     sx= motion_x & s_mask;
2934     sy= motion_y & s_mask;
2935     src_x += motion_x >> (lowres+1);
2936     src_y += motion_y >> (lowres+1);
2937
2938     src += src_y * stride + src_x;
2939
2940     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2941        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2942         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2943                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2944         src= s->edge_emu_buffer;
2945         emu=1;
2946     }
2947
2948     sx <<= 2 - lowres;
2949     sy <<= 2 - lowres;
2950     if(field_select)
2951         src += s->linesize;
2952     pix_op[lowres](dest, src, stride, h, sx, sy);
2953     return emu;
2954 }
2955
2956 /* apply one mpeg motion vector to the three components */
2957 static av_always_inline void mpeg_motion(MpegEncContext *s,
2958                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2959                                int field_based, int bottom_field, int field_select,
2960                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2961                                int motion_x, int motion_y, int h)
2962 {
2963     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2964     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2965
2966 #if 0
2967 if(s->quarter_sample)
2968 {
2969     motion_x>>=1;
2970     motion_y>>=1;
2971 }
2972 #endif
2973
2974     v_edge_pos = s->v_edge_pos >> field_based;
2975     linesize   = s->current_picture.linesize[0] << field_based;
2976     uvlinesize = s->current_picture.linesize[1] << field_based;
2977
2978     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2979     src_x = s->mb_x* 16               + (motion_x >> 1);
2980     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2981
2982     if (s->out_format == FMT_H263) {
2983         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2984             mx = (motion_x>>1)|(motion_x&1);
2985             my = motion_y >>1;
2986             uvdxy = ((my & 1) << 1) | (mx & 1);
2987             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2988             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2989         }else{
2990             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2991             uvsrc_x = src_x>>1;
2992             uvsrc_y = src_y>>1;
2993         }
2994     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2995         mx = motion_x / 4;
2996         my = motion_y / 4;
2997         uvdxy = 0;
2998         uvsrc_x = s->mb_x*8 + mx;
2999         uvsrc_y = s->mb_y*8 + my;
3000     } else {
3001         if(s->chroma_y_shift){
3002             mx = motion_x / 2;
3003             my = motion_y / 2;
3004             uvdxy = ((my & 1) << 1) | (mx & 1);
3005             uvsrc_x = s->mb_x* 8               + (mx >> 1);
3006             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
3007         } else {
3008             if(s->chroma_x_shift){
3009             //Chroma422
3010                 mx = motion_x / 2;
3011                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
3012                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
3013                 uvsrc_y = src_y;
3014             } else {
3015             //Chroma444
3016                 uvdxy = dxy;
3017                 uvsrc_x = src_x;
3018                 uvsrc_y = src_y;
3019             }
3020         }
3021     }
3022
3023     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3024     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3025     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3026
3027     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
3028        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
3029             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
3030                s->codec_id == CODEC_ID_MPEG1VIDEO){
3031                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
3032                 return ;
3033             }
3034             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3035                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3036             ptr_y = s->edge_emu_buffer;
3037             if(!(s->flags&CODEC_FLAG_GRAY)){
3038                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3039                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3040                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3041                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3042                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3043                 ptr_cb= uvbuf;
3044                 ptr_cr= uvbuf+16;
3045             }
3046     }
3047
3048     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3049         dest_y += s->linesize;
3050         dest_cb+= s->uvlinesize;
3051         dest_cr+= s->uvlinesize;
3052     }
3053
3054     if(field_select){
3055         ptr_y += s->linesize;
3056         ptr_cb+= s->uvlinesize;
3057         ptr_cr+= s->uvlinesize;
3058     }
3059
3060     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
3061
3062     if(!(s->flags&CODEC_FLAG_GRAY)){
3063         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
3064         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
3065     }
3066 #if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
3067     if(s->out_format == FMT_H261){
3068         ff_h261_loop_filter(s);
3069     }
3070 #endif
3071 }
3072
3073 /* apply one mpeg motion vector to the three components */
3074 static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
3075                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3076                                int field_based, int bottom_field, int field_select,
3077                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
3078                                int motion_x, int motion_y, int h)
3079 {
3080     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3081     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
3082     const int lowres= s->avctx->lowres;
3083     const int block_s= 8>>lowres;
3084     const int s_mask= (2<<lowres)-1;
3085     const int h_edge_pos = s->h_edge_pos >> lowres;
3086     const int v_edge_pos = s->v_edge_pos >> lowres;
3087     linesize   = s->current_picture.linesize[0] << field_based;
3088     uvlinesize = s->current_picture.linesize[1] << field_based;
3089
3090     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
3091         motion_x/=2;
3092         motion_y/=2;
3093     }
3094
3095     if(field_based){
3096         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
3097     }
3098
3099     sx= motion_x & s_mask;
3100     sy= motion_y & s_mask;
3101     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
3102     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
3103
3104     if (s->out_format == FMT_H263) {
3105         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
3106         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
3107         uvsrc_x = src_x>>1;
3108         uvsrc_y = src_y>>1;
3109     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
3110         mx = motion_x / 4;
3111         my = motion_y / 4;
3112         uvsx = (2*mx) & s_mask;
3113         uvsy = (2*my) & s_mask;
3114         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
3115         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
3116     } else {
3117         mx = motion_x / 2;
3118         my = motion_y / 2;
3119         uvsx = mx & s_mask;
3120         uvsy = my & s_mask;
3121         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
3122         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
3123     }
3124
3125     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3126     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3127     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3128
3129     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
3130        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
3131             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3132                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3133             ptr_y = s->edge_emu_buffer;
3134             if(!(s->flags&CODEC_FLAG_GRAY)){
3135                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3136                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3137                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3138                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3139                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3140                 ptr_cb= uvbuf;
3141                 ptr_cr= uvbuf+16;
3142             }
3143     }
3144
3145     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3146         dest_y += s->linesize;
3147         dest_cb+= s->uvlinesize;
3148         dest_cr+= s->uvlinesize;
3149     }
3150
3151     if(field_select){
3152         ptr_y += s->linesize;
3153         ptr_cb+= s->uvlinesize;
3154         ptr_cr+= s->uvlinesize;
3155     }
3156
3157     sx <<= 2 - lowres;
3158     sy <<= 2 - lowres;
3159     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3160
3161     if(!(s->flags&CODEC_FLAG_GRAY)){
3162         uvsx <<= 2 - lowres;
3163         uvsy <<= 2 - lowres;
3164         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3165         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3166     }
3167     //FIXME h261 lowres loop filter
3168 }
3169
3170 //FIXME move to dsputil, avg variant, 16x16 version
3171 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3172     int x;
3173     uint8_t * const top   = src[1];
3174     uint8_t * const left  = src[2];
3175     uint8_t * const mid   = src[0];
3176     uint8_t * const right = src[3];
3177     uint8_t * const bottom= src[4];
3178 #define OBMC_FILTER(x, t, l, m, r, b)\
3179     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3180 #define OBMC_FILTER4(x, t, l, m, r, b)\
3181     OBMC_FILTER(x         , t, l, m, r, b);\
3182     OBMC_FILTER(x+1       , t, l, m, r, b);\
3183     OBMC_FILTER(x  +stride, t, l, m, r, b);\
3184     OBMC_FILTER(x+1+stride, t, l, m, r, b);
3185
3186     x=0;
3187     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3188     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3189     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3190     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3191     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3192     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3193     x+= stride;
3194     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3195     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3196     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3197     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3198     x+= stride;
3199     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3200     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3201     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3202     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3203     x+= 2*stride;
3204     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3205     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3206     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3207     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3208     x+= 2*stride;
3209     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3210     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3211     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3212     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3213     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3214     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3215     x+= stride;
3216     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3217     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3218     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3219     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3220 }
3221
3222 /* obmc for 1 8x8 luma block */
3223 static inline void obmc_motion(MpegEncContext *s,
3224                                uint8_t *dest, uint8_t *src,
3225                                int src_x, int src_y,
3226                                op_pixels_func *pix_op,
3227                                int16_t mv[5][2]/* mid top left right bottom*/)
3228 #define MID    0
3229 {
3230     int i;
3231     uint8_t *ptr[5];
3232
3233     assert(s->quarter_sample==0);
3234
3235     for(i=0; i<5; i++){
3236         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3237             ptr[i]= ptr[MID];
3238         }else{
3239             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3240             hpel_motion(s, ptr[i], src, 0, 0,
3241                         src_x, src_y,
3242                         s->width, s->height, s->linesize,
3243                         s->h_edge_pos, s->v_edge_pos,
3244                         8, 8, pix_op,
3245                         mv[i][0], mv[i][1]);
3246         }
3247     }
3248
3249     put_obmc(dest, ptr, s->linesize);
3250 }
3251
3252 static inline void qpel_motion(MpegEncContext *s,
3253                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3254                                int field_based, int bottom_field, int field_select,
3255                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3256                                qpel_mc_func (*qpix_op)[16],
3257                                int motion_x, int motion_y, int h)
3258 {
3259     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3260     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3261
3262     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3263     src_x = s->mb_x *  16                 + (motion_x >> 2);
3264     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3265
3266     v_edge_pos = s->v_edge_pos >> field_based;
3267     linesize = s->linesize << field_based;
3268     uvlinesize = s->uvlinesize << field_based;
3269
3270     if(field_based){
3271         mx= motion_x/2;
3272         my= motion_y>>1;
3273     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3274         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3275         mx= (motion_x>>1) + rtab[motion_x&7];
3276         my= (motion_y>>1) + rtab[motion_y&7];
3277     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3278         mx= (motion_x>>1)|(motion_x&1);
3279         my= (motion_y>>1)|(motion_y&1);
3280     }else{
3281         mx= motion_x/2;
3282         my= motion_y/2;
3283     }
3284     mx= (mx>>1)|(mx&1);
3285     my= (my>>1)|(my&1);
3286
3287     uvdxy= (mx&1) | ((my&1)<<1);
3288     mx>>=1;
3289     my>>=1;
3290
3291     uvsrc_x = s->mb_x *  8                 + mx;
3292     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3293
3294     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3295     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3296     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3297
3298     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3299        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3300         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3301                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3302         ptr_y= s->edge_emu_buffer;
3303         if(!(s->flags&CODEC_FLAG_GRAY)){
3304             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3305             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3306                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3307             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3308                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3309             ptr_cb= uvbuf;
3310             ptr_cr= uvbuf + 16;
3311         }
3312     }
3313
3314     if(!field_based)
3315         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3316     else{
3317         if(bottom_field){
3318             dest_y += s->linesize;
3319             dest_cb+= s->uvlinesize;
3320             dest_cr+= s->uvlinesize;
3321         }
3322
3323         if(field_select){
3324             ptr_y  += s->linesize;
3325             ptr_cb += s->uvlinesize;
3326             ptr_cr += s->uvlinesize;
3327         }
3328         //damn interlaced mode
3329         //FIXME boundary mirroring is not exactly correct here
3330         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3331         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3332     }
3333     if(!(s->flags&CODEC_FLAG_GRAY)){
3334         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3335         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3336     }
3337 }
3338
3339 inline int ff_h263_round_chroma(int x){
3340     if (x >= 0)
3341         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3342     else {
3343         x = -x;
3344         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3345     }
3346 }
3347
3348 /**
3349  * h263 chorma 4mv motion compensation.
3350  */
3351 static inline void chroma_4mv_motion(MpegEncContext *s,
3352                                      uint8_t *dest_cb, uint8_t *dest_cr,
3353                                      uint8_t **ref_picture,
3354                                      op_pixels_func *pix_op,
3355                                      int mx, int my){
3356     int dxy, emu=0, src_x, src_y, offset;
3357     uint8_t *ptr;
3358
3359     /* In case of 8X8, we construct a single chroma motion vector
3360        with a special rounding */
3361     mx= ff_h263_round_chroma(mx);
3362     my= ff_h263_round_chroma(my);
3363
3364     dxy = ((my & 1) << 1) | (mx & 1);
3365     mx >>= 1;
3366     my >>= 1;
3367
3368     src_x = s->mb_x * 8 + mx;
3369     src_y = s->mb_y * 8 + my;
3370     src_x = av_clip(src_x, -8, s->width/2);
3371     if (src_x == s->width/2)
3372         dxy &= ~1;
3373     src_y = av_clip(src_y, -8, s->height/2);
3374     if (src_y == s->height/2)
3375         dxy &= ~2;
3376
3377     offset = (src_y * (s->uvlinesize)) + src_x;
3378     ptr = ref_picture[1] + offset;
3379     if(s->flags&CODEC_FLAG_EMU_EDGE){
3380         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3381            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3382             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3383             ptr= s->edge_emu_buffer;
3384             emu=1;
3385         }
3386     }
3387     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3388
3389     ptr = ref_picture[2] + offset;
3390     if(emu){
3391         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3392         ptr= s->edge_emu_buffer;
3393     }
3394     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3395 }
3396
3397 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3398                                      uint8_t *dest_cb, uint8_t *dest_cr,
3399                                      uint8_t **ref_picture,
3400                                      h264_chroma_mc_func *pix_op,
3401                                      int mx, int my){
3402     const int lowres= s->avctx->lowres;
3403     const int block_s= 8>>lowres;
3404     const int s_mask= (2<<lowres)-1;
3405     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3406     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3407     int emu=0, src_x, src_y, offset, sx, sy;
3408     uint8_t *ptr;
3409
3410     if(s->quarter_sample){
3411         mx/=2;
3412         my/=2;
3413     }
3414
3415     /* In case of 8X8, we construct a single chroma motion vector
3416        with a special rounding */
3417     mx= ff_h263_round_chroma(mx);
3418     my= ff_h263_round_chroma(my);
3419
3420     sx= mx & s_mask;
3421     sy= my & s_mask;
3422     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3423     src_y = s->mb_y*block_s + (my >> (lowres+1));
3424
3425     offset = src_y * s->uvlinesize + src_x;
3426     ptr = ref_picture[1] + offset;
3427     if(s->flags&CODEC_FLAG_EMU_EDGE){
3428         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3429            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3430             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3431             ptr= s->edge_emu_buffer;
3432             emu=1;
3433         }
3434     }
3435     sx <<= 2 - lowres;
3436     sy <<= 2 - lowres;
3437     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3438
3439     ptr = ref_picture[2] + offset;
3440     if(emu){
3441         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3442         ptr= s->edge_emu_buffer;
3443     }
3444     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3445 }
3446
3447 static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
3448     /* fetch pixels for estimated mv 4 macroblocks ahead
3449      * optimized for 64byte cache lines */
3450     const int shift = s->quarter_sample ? 2 : 1;
3451     const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8;
3452     const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y;
3453     int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
3454     s->dsp.prefetch(pix[0]+off, s->linesize, 4);
3455     off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3456     s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2);
3457 }
3458
3459 /**
3460  * motion compensation of a single macroblock
3461  * @param s context
3462  * @param dest_y luma destination pointer
3463  * @param dest_cb chroma cb/u destination pointer
3464  * @param dest_cr chroma cr/v destination pointer
3465  * @param dir direction (0->forward, 1->backward)
3466  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3467  * @param pic_op halfpel motion compensation function (average or put normally)
3468  * @param pic_op qpel motion compensation function (average or put normally)
3469  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3470  */
3471 static inline void MPV_motion(MpegEncContext *s,
3472                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3473                               int dir, uint8_t **ref_picture,
3474                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3475 {
3476     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3477     int mb_x, mb_y, i;
3478     uint8_t *ptr, *dest;
3479
3480     mb_x = s->mb_x;
3481     mb_y = s->mb_y;
3482
3483     prefetch_motion(s, ref_picture, dir);
3484
3485     if(s->obmc && s->pict_type != B_TYPE){
3486         int16_t mv_cache[4][4][2];
3487         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3488         const int mot_stride= s->b8_stride;
3489         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3490
3491         assert(!s->mb_skipped);
3492
3493         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3494         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3495         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3496
3497         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3498             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3499         }else{
3500             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3501         }
3502
3503         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3504             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3505             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3506         }else{
3507             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3508             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3509         }
3510
3511         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3512             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3513             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3514         }else{
3515             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3516             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3517         }
3518
3519         mx = 0;
3520         my = 0;
3521         for(i=0;i<4;i++) {
3522             const int x= (i&1)+1;
3523             const int y= (i>>1)+1;
3524             int16_t mv[5][2]= {
3525                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3526                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3527                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3528                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3529                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3530             //FIXME cleanup
3531             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3532                         ref_picture[0],
3533                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3534                         pix_op[1],
3535                         mv);
3536
3537             mx += mv[0][0];
3538             my += mv[0][1];
3539         }
3540         if(!(s->flags&CODEC_FLAG_GRAY))
3541             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3542
3543         return;
3544     }
3545
3546     switch(s->mv_type) {
3547     case MV_TYPE_16X16:
3548         if(s->mcsel){
3549             if(s->real_sprite_warping_points==1){
3550                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3551                             ref_picture);
3552             }else{
3553                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3554                             ref_picture);
3555             }
3556         }else if(s->quarter_sample){
3557             qpel_motion(s, dest_y, dest_cb, dest_cr,
3558                         0, 0, 0,
3559                         ref_picture, pix_op, qpix_op,
3560                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3561         }else if(s->mspel){
3562             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3563                         ref_picture, pix_op,
3564                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3565         }else
3566         {
3567             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3568                         0, 0, 0,
3569                         ref_picture, pix_op,
3570                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3571         }
3572         break;
3573     case MV_TYPE_8X8:
3574         mx = 0;
3575         my = 0;
3576         if(s->quarter_sample){
3577             for(i=0;i<4;i++) {
3578                 motion_x = s->mv[dir][i][0];
3579                 motion_y = s->mv[dir][i][1];
3580
3581                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3582                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3583                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3584
3585                 /* WARNING: do no forget half pels */
3586                 src_x = av_clip(src_x, -16, s->width);
3587                 if (src_x == s->width)
3588                     dxy &= ~3;
3589                 src_y = av_clip(src_y, -16, s->height);
3590                 if (src_y == s->height)
3591                     dxy &= ~12;
3592
3593                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3594                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3595                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3596                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3597                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3598                         ptr= s->edge_emu_buffer;
3599                     }
3600                 }
3601                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3602                 qpix_op[1][dxy](dest, ptr, s->linesize);
3603
3604                 mx += s->mv[dir][i][0]/2;
3605                 my += s->mv[dir][i][1]/2;
3606             }
3607         }else{
3608             for(i=0;i<4;i++) {
3609                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3610                             ref_picture[0], 0, 0,
3611                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3612                             s->width, s->height, s->linesize,
3613                             s->h_edge_pos, s->v_edge_pos,
3614                             8, 8, pix_op[1],
3615                             s->mv[dir][i][0], s->mv[dir][i][1]);
3616
3617                 mx += s->mv[dir][i][0];
3618                 my += s->mv[dir][i][1];
3619             }
3620         }
3621
3622         if(!(s->flags&CODEC_FLAG_GRAY))
3623             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3624         break;
3625     case MV_TYPE_FIELD:
3626         if (s->picture_structure == PICT_FRAME) {
3627             if(s->quarter_sample){
3628                 for(i=0; i<2; i++){
3629                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3630                                 1, i, s->field_select[dir][i],
3631                                 ref_picture, pix_op, qpix_op,
3632                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3633                 }
3634             }else{
3635                 /* top field */
3636                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3637                             1, 0, s->field_select[dir][0],
3638                             ref_picture, pix_op,
3639                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3640                 /* bottom field */
3641                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3642                             1, 1, s->field_select[dir][1],
3643                             ref_picture, pix_op,
3644                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3645             }
3646         } else {
3647             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3648                 ref_picture= s->current_picture_ptr->data;
3649             }
3650
3651             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3652                         0, 0, s->field_select[dir][0],
3653                         ref_picture, pix_op,
3654                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3655         }
3656         break;
3657     case MV_TYPE_16X8:
3658         for(i=0; i<2; i++){
3659             uint8_t ** ref2picture;
3660
3661             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3662                 ref2picture= ref_picture;
3663             }else{
3664                 ref2picture= s->current_picture_ptr->data;
3665             }
3666
3667             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3668                         0, 0, s->field_select[dir][i],
3669                         ref2picture, pix_op,
3670                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3671
3672             dest_y += 16*s->linesize;
3673             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3674             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3675         }
3676         break;
3677     case MV_TYPE_DMV:
3678         if(s->picture_structure == PICT_FRAME){
3679             for(i=0; i<2; i++){
3680                 int j;
3681                 for(j=0; j<2; j++){
3682                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3683                                 1, j, j^i,
3684                                 ref_picture, pix_op,
3685                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3686                 }
3687                 pix_op = s->dsp.avg_pixels_tab;
3688             }
3689         }else{
3690             for(i=0; i<2; i++){
3691                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3692                             0, 0, s->picture_structure != i+1,
3693                             ref_picture, pix_op,
3694                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3695
3696                 // after put we make avg of the same block
3697                 pix_op=s->dsp.avg_pixels_tab;
3698
3699                 //opposite parity is always in the same frame if this is second field
3700                 if(!s->first_field){
3701                     ref_picture = s->current_picture_ptr->data;
3702                 }
3703             }
3704         }
3705     break;
3706     default: assert(0);
3707     }
3708 }
3709
3710 /**
3711  * motion compensation of a single macroblock
3712  * @param s context
3713  * @param dest_y luma destination pointer
3714  * @param dest_cb chroma cb/u destination pointer
3715  * @param dest_cr chroma cr/v destination pointer
3716  * @param dir direction (0->forward, 1->backward)
3717  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3718  * @param pic_op halfpel motion compensation function (average or put normally)
3719  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3720  */
3721 static inline void MPV_motion_lowres(MpegEncContext *s,
3722                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3723                               int dir, uint8_t **ref_picture,
3724                               h264_chroma_mc_func *pix_op)
3725 {
3726     int mx, my;
3727     int mb_x, mb_y, i;
3728     const int lowres= s->avctx->lowres;
3729     const int block_s= 8>>lowres;
3730
3731     mb_x = s->mb_x;
3732     mb_y = s->mb_y;
3733
3734     switch(s->mv_type) {
3735     case MV_TYPE_16X16:
3736         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3737                     0, 0, 0,
3738                     ref_picture, pix_op,
3739                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3740         break;
3741     case MV_TYPE_8X8:
3742         mx = 0;
3743         my = 0;
3744             for(i=0;i<4;i++) {
3745                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3746                             ref_picture[0], 0, 0,
3747                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3748                             s->width, s->height, s->linesize,
3749                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3750                             block_s, block_s, pix_op,
3751                             s->mv[dir][i][0], s->mv[dir][i][1]);
3752
3753                 mx += s->mv[dir][i][0];
3754                 my += s->mv[dir][i][1];
3755             }
3756
3757         if(!(s->flags&CODEC_FLAG_GRAY))
3758             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3759         break;
3760     case MV_TYPE_FIELD:
3761         if (s->picture_structure == PICT_FRAME) {
3762             /* top field */
3763             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3764                         1, 0, s->field_select[dir][0],
3765                         ref_picture, pix_op,
3766                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3767             /* bottom field */
3768             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3769                         1, 1, s->field_select[dir][1],
3770                         ref_picture, pix_op,
3771                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3772         } else {
3773             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3774                 ref_picture= s->current_picture_ptr->data;
3775             }
3776
3777             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3778                         0, 0, s->field_select[dir][0],
3779                         ref_picture, pix_op,
3780                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3781         }
3782         break;
3783     case MV_TYPE_16X8:
3784         for(i=0; i<2; i++){
3785             uint8_t ** ref2picture;
3786
3787             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3788                 ref2picture= ref_picture;
3789             }else{
3790                 ref2picture= s->current_picture_ptr->data;
3791             }
3792
3793             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3794                         0, 0, s->field_select[dir][i],
3795                         ref2picture, pix_op,
3796                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3797
3798             dest_y += 2*block_s*s->linesize;
3799             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3800             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3801         }
3802         break;
3803     case MV_TYPE_DMV:
3804         if(s->picture_structure == PICT_FRAME){
3805             for(i=0; i<2; i++){
3806                 int j;
3807                 for(j=0; j<2; j++){
3808                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3809                                 1, j, j^i,
3810                                 ref_picture, pix_op,
3811                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3812                 }
3813                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3814             }
3815         }else{
3816             for(i=0; i<2; i++){
3817                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3818                             0, 0, s->picture_structure != i+1,
3819                             ref_picture, pix_op,
3820                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3821
3822                 // after put we make avg of the same block
3823                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3824
3825                 //opposite parity is always in the same frame if this is second field
3826                 if(!s->first_field){
3827                     ref_picture = s->current_picture_ptr->data;
3828                 }
3829             }
3830         }
3831     break;
3832     default: assert(0);
3833     }
3834 }
3835
3836 /* put block[] to dest[] */
3837 static inline void put_dct(MpegEncContext *s,
3838                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3839 {
3840     s->dct_unquantize_intra(s, block, i, qscale);
3841     s->dsp.idct_put (dest, line_size, block);
3842 }
3843
3844 /* add block[] to dest[] */
3845 static inline void add_dct(MpegEncContext *s,
3846                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3847 {
3848     if (s->block_last_index[i] >= 0) {
3849         s->dsp.idct_add (dest, line_size, block);
3850     }
3851 }
3852
3853 static inline void add_dequant_dct(MpegEncContext *s,
3854                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3855 {
3856     if (s->block_last_index[i] >= 0) {
3857         s->dct_unquantize_inter(s, block, i, qscale);
3858
3859         s->dsp.idct_add (dest, line_size, block);
3860     }
3861 }
3862
3863 /**
3864  * cleans dc, ac, coded_block for the current non intra MB
3865  */
3866 void ff_clean_intra_table_entries(MpegEncContext *s)
3867 {
3868     int wrap = s->b8_stride;
3869     int xy = s->block_index[0];
3870
3871     s->dc_val[0][xy           ] =
3872     s->dc_val[0][xy + 1       ] =
3873     s->dc_val[0][xy     + wrap] =
3874     s->dc_val[0][xy + 1 + wrap] = 1024;
3875     /* ac pred */
3876     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3877     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3878     if (s->msmpeg4_version>=3) {
3879         s->coded_block[xy           ] =
3880         s->coded_block[xy + 1       ] =
3881         s->coded_block[xy     + wrap] =
3882         s->coded_block[xy + 1 + wrap] = 0;
3883     }
3884     /* chroma */
3885     wrap = s->mb_stride;
3886     xy = s->mb_x + s->mb_y * wrap;
3887     s->dc_val[1][xy] =
3888     s->dc_val[2][xy] = 1024;
3889     /* ac pred */
3890     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3891     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3892
3893     s->mbintra_table[xy]= 0;
3894 }
3895
3896 /* generic function called after a macroblock has been parsed by the
3897    decoder or after it has been encoded by the encoder.
3898
3899    Important variables used:
3900    s->mb_intra : true if intra macroblock
3901    s->mv_dir   : motion vector direction
3902    s->mv_type  : motion vector type
3903    s->mv       : motion vector
3904    s->interlaced_dct : true if interlaced dct used (mpeg2)
3905  */
3906 static av_always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3907 {
3908     int mb_x, mb_y;
3909     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3910 #ifdef HAVE_XVMC
3911     if(s->avctx->xvmc_acceleration){
3912         XVMC_decode_mb(s);//xvmc uses pblocks
3913         return;
3914     }
3915 #endif
3916
3917     mb_x = s->mb_x;
3918     mb_y = s->mb_y;
3919
3920     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3921        /* save DCT coefficients */
3922        int i,j;
3923        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3924        for(i=0; i<6; i++)
3925            for(j=0; j<64; j++)
3926                *dct++ = block[i][s->dsp.idct_permutation[j]];
3927     }
3928
3929     s->current_picture.qscale_table[mb_xy]= s->qscale;
3930
3931     /* update DC predictors for P macroblocks */
3932     if (!s->mb_intra) {
3933         if (s->h263_pred || s->h263_aic) {
3934             if(s->mbintra_table[mb_xy])
3935                 ff_clean_intra_table_entries(s);
3936         } else {
3937             s->last_dc[0] =
3938             s->last_dc[1] =
3939             s->last_dc[2] = 128 << s->intra_dc_precision;
3940         }
3941     }
3942     else if (s->h263_pred || s->h263_aic)
3943         s->mbintra_table[mb_xy]=1;
3944
3945     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
3946         uint8_t *dest_y, *dest_cb, *dest_cr;
3947         int dct_linesize, dct_offset;
3948         op_pixels_func (*op_pix)[4];
3949         qpel_mc_func (*op_qpix)[16];
3950         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3951         const int uvlinesize= s->current_picture.linesize[1];
3952         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3953         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3954
3955         /* avoid copy if macroblock skipped in last frame too */
3956         /* skip only during decoding as we might trash the buffers during encoding a bit */
3957         if(!s->encoding){
3958             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3959             const int age= s->current_picture.age;
3960
3961             assert(age);
3962
3963             if (s->mb_skipped) {
3964                 s->mb_skipped= 0;
3965                 assert(s->pict_type!=I_TYPE);
3966
3967                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3968                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3969
3970                 /* if previous was skipped too, then nothing to do !  */
3971                 if (*mbskip_ptr >= age && s->current_picture.reference){
3972                     return;
3973                 }
3974             } else if(!s->current_picture.reference){
3975                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3976                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3977             } else{
3978                 *mbskip_ptr = 0; /* not skipped */
3979             }
3980         }
3981
3982         dct_linesize = linesize << s->interlaced_dct;
3983         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3984
3985         if(readable){
3986             dest_y=  s->dest[0];
3987             dest_cb= s->dest[1];
3988             dest_cr= s->dest[2];
3989         }else{
3990             dest_y = s->b_scratchpad;
3991             dest_cb= s->b_scratchpad+16*linesize;
3992             dest_cr= s->b_scratchpad+32*linesize;
3993         }
3994
3995         if (!s->mb_intra) {
3996             /* motion handling */
3997             /* decoding or more than one mb_type (MC was already done otherwise) */
3998             if(!s->encoding){
3999                 if(lowres_flag){
4000                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
4001
4002                     if (s->mv_dir & MV_DIR_FORWARD) {
4003                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
4004                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
4005                     }
4006                     if (s->mv_dir & MV_DIR_BACKWARD) {
4007                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
4008                     }
4009                 }else{
4010                     op_qpix= s->me.qpel_put;
4011                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
4012                         op_pix = s->dsp.put_pixels_tab;
4013                     }else{
4014                         op_pix = s->dsp.put_no_rnd_pixels_tab;
4015                     }
4016                     if (s->mv_dir & MV_DIR_FORWARD) {
4017                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4018                         op_pix = s->dsp.avg_pixels_tab;
4019                         op_qpix= s->me.qpel_avg;
4020                     }
4021                     if (s->mv_dir & MV_DIR_BACKWARD) {
4022                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4023                     }
4024                 }
4025             }
4026
4027             /* skip dequant / idct if we are really late ;) */
4028             if(s->hurry_up>1) goto skip_idct;
4029             if(s->avctx->skip_idct){
4030                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
4031                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
4032                    || s->avctx->skip_idct >= AVDISCARD_ALL)
4033                     goto skip_idct;
4034             }
4035
4036             /* add dct residue */
4037             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
4038                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
4039                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4040                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4041                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4042                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4043
4044                 if(!(s->flags&CODEC_FLAG_GRAY)){
4045                     if (s->chroma_y_shift){
4046                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4047                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4048                     }else{
4049                         dct_linesize >>= 1;
4050                         dct_offset >>=1;
4051                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4052                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4053                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4054                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4055                     }
4056                 }
4057             } else if(s->codec_id != CODEC_ID_WMV2){
4058                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
4059                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
4060                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
4061                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
4062
4063                 if(!(s->flags&CODEC_FLAG_GRAY)){
4064                     if(s->chroma_y_shift){//Chroma420
4065                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
4066                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
4067                     }else{
4068                         //chroma422
4069                         dct_linesize = uvlinesize << s->interlaced_dct;
4070                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4071
4072                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
4073                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
4074                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
4075                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
4076                         if(!s->chroma_x_shift){//Chroma444
4077                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
4078                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
4079                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
4080                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
4081                         }
4082                     }
4083                 }//fi gray
4084             }
4085             else{
4086                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
4087             }
4088         } else {
4089             /* dct only in intra block */
4090             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
4091                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4092                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4093                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4094                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4095
4096                 if(!(s->flags&CODEC_FLAG_GRAY)){
4097                     if(s->chroma_y_shift){
4098                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4099                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4100                     }else{
4101                         dct_offset >>=1;
4102                         dct_linesize >>=1;
4103                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4104                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4105                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4106                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4107                     }
4108                 }
4109             }else{
4110                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
4111                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
4112                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
4113                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
4114
4115                 if(!(s->flags&CODEC_FLAG_GRAY)){
4116                     if(s->chroma_y_shift){
4117                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
4118                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
4119                     }else{
4120
4121                         dct_linesize = uvlinesize << s->interlaced_dct;
4122                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4123
4124                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
4125                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
4126                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
4127                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
4128                         if(!s->chroma_x_shift){//Chroma444
4129                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
4130                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
4131                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
4132                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
4133                         }
4134                     }
4135                 }//gray
4136             }
4137         }
4138 skip_idct:
4139         if(!readable){
4140             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
4141             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
4142             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
4143         }
4144     }
4145 }
4146
4147 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
4148     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
4149     else                  MPV_decode_mb_internal(s, block, 0);
4150 }
4151
4152 #ifdef CONFIG_ENCODERS
4153
4154 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
4155 {
4156     static const char tab[64]=
4157         {3,2,2,1,1,1,1,1,
4158          1,1,1,1,1,1,1,1,
4159          1,1,1,1,1,1,1,1,
4160          0,0,0,0,0,0,0,0,
4161          0,0,0,0,0,0,0,0,
4162          0,0,0,0,0,0,0,0,
4163          0,0,0,0,0,0,0,0,
4164          0,0,0,0,0,0,0,0};
4165     int score=0;
4166     int run=0;
4167     int i;
4168     DCTELEM *block= s->block[n];
4169     const int last_index= s->block_last_index[n];
4170     int skip_dc;
4171
4172     if(threshold<0){
4173         skip_dc=0;
4174         threshold= -threshold;
4175     }else
4176         skip_dc=1;
4177
4178     /* are all which we could set to zero are allready zero? */
4179     if(last_index<=skip_dc - 1) return;
4180
4181     for(i=0; i<=last_index; i++){
4182         const int j = s->intra_scantable.permutated[i];
4183         const int level = FFABS(block[j]);
4184         if(level==1){
4185             if(skip_dc && i==0) continue;
4186             score+= tab[run];
4187             run=0;
4188         }else if(level>1){
4189             return;
4190         }else{
4191             run++;
4192         }
4193     }
4194     if(score >= threshold) return;
4195     for(i=skip_dc; i<=last_index; i++){
4196         const int j = s->intra_scantable.permutated[i];
4197         block[j]=0;
4198     }
4199     if(block[0]) s->block_last_index[n]= 0;
4200     else         s->block_last_index[n]= -1;
4201 }
4202
4203 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4204 {
4205     int i;
4206     const int maxlevel= s->max_qcoeff;
4207     const int minlevel= s->min_qcoeff;
4208     int overflow=0;
4209
4210     if(s->mb_intra){
4211         i=1; //skip clipping of intra dc
4212     }else
4213         i=0;
4214
4215     for(;i<=last_index; i++){
4216         const int j= s->intra_scantable.permutated[i];
4217         int level = block[j];
4218
4219         if     (level>maxlevel){
4220             level=maxlevel;
4221             overflow++;
4222         }else if(level<minlevel){
4223             level=minlevel;
4224             overflow++;
4225         }
4226
4227         block[j]= level;
4228     }
4229
4230     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4231         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4232 }
4233
4234 #endif //CONFIG_ENCODERS
4235
4236 /**
4237  *
4238  * @param h is the normal height, this will be reduced automatically if needed for the last row
4239  */
4240 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4241     if (s->avctx->draw_horiz_band) {
4242         AVFrame *src;
4243         int offset[4];
4244
4245         if(s->picture_structure != PICT_FRAME){
4246             h <<= 1;
4247             y <<= 1;
4248             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4249         }
4250
4251         h= FFMIN(h, s->avctx->height - y);
4252
4253         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4254             src= (AVFrame*)s->current_picture_ptr;
4255         else if(s->last_picture_ptr)
4256             src= (AVFrame*)s->last_picture_ptr;
4257         else
4258             return;
4259
4260         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4261             offset[0]=
4262             offset[1]=
4263             offset[2]=
4264             offset[3]= 0;
4265         }else{
4266             offset[0]= y * s->linesize;;
4267             offset[1]=
4268             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4269             offset[3]= 0;
4270         }
4271
4272         emms_c();
4273
4274         s->avctx->draw_horiz_band(s->avctx, src, offset,
4275                                   y, s->picture_structure, h);
4276     }
4277 }
4278
4279 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4280     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4281     const int uvlinesize= s->current_picture.linesize[1];
4282     const int mb_size= 4 - s->avctx->lowres;
4283
4284     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4285     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4286     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4287     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4288     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4289     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4290     //block_index is not used by mpeg2, so it is not affected by chroma_format
4291
4292     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4293     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4294     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4295
4296     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4297     {
4298         s->dest[0] += s->mb_y *   linesize << mb_size;
4299         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4300         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4301     }
4302 }
4303
4304 #ifdef CONFIG_ENCODERS
4305
4306 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4307     int x, y;
4308 //FIXME optimize
4309     for(y=0; y<8; y++){
4310         for(x=0; x<8; x++){
4311             int x2, y2;
4312             int sum=0;
4313             int sqr=0;
4314             int count=0;
4315
4316             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4317                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4318                     int v= ptr[x2 + y2*stride];
4319                     sum += v;
4320                     sqr += v*v;
4321                     count++;
4322                 }
4323             }
4324             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4325         }
4326     }
4327 }
4328
4329 static av_always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
4330 {
4331     int16_t weight[8][64];
4332     DCTELEM orig[8][64];
4333     const int mb_x= s->mb_x;
4334     const int mb_y= s->mb_y;
4335     int i;
4336     int skip_dct[8];
4337     int dct_offset   = s->linesize*8; //default for progressive frames
4338     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4339     int wrap_y, wrap_c;
4340
4341     for(i=0; i<mb_block_count; i++) skip_dct[i]=s->skipdct;
4342
4343     if(s->adaptive_quant){
4344         const int last_qp= s->qscale;
4345         const int mb_xy= mb_x + mb_y*s->mb_stride;
4346
4347         s->lambda= s->lambda_table[mb_xy];
4348         update_qscale(s);
4349
4350         if(!(s->flags&CODEC_FLAG_QP_RD)){
4351             s->qscale= s->current_picture_ptr->qscale_table[mb_xy];
4352             s->dquant= s->qscale - last_qp;
4353
4354             if(s->out_format==FMT_H263){
4355                 s->dquant= av_clip(s->dquant, -2, 2);
4356
4357                 if(s->codec_id==CODEC_ID_MPEG4){
4358                     if(!s->mb_intra){
4359                         if(s->pict_type == B_TYPE){
4360                             if(s->dquant&1 || s->mv_dir&MV_DIRECT)
4361                                 s->dquant= 0;
4362                         }
4363                         if(s->mv_type==MV_TYPE_8X8)
4364                             s->dquant=0;
4365                     }
4366                 }
4367             }
4368         }
4369         ff_set_qscale(s, last_qp + s->dquant);
4370     }else if(s->flags&CODEC_FLAG_QP_RD)
4371         ff_set_qscale(s, s->qscale + s->dquant);
4372
4373     wrap_y = s->linesize;
4374     wrap_c = s->uvlinesize;
4375     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4376     ptr_cb = s->new_picture.data[1] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4377     ptr_cr = s->new_picture.data[2] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4378
4379     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4380         uint8_t *ebuf= s->edge_emu_buffer + 32;
4381         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4382         ptr_y= ebuf;
4383         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4384         ptr_cb= ebuf+18*wrap_y;
4385         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4386         ptr_cr= ebuf+18*wrap_y+8;
4387     }
4388
4389     if (s->mb_intra) {
4390         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4391             int progressive_score, interlaced_score;
4392
4393             s->interlaced_dct=0;
4394             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4395                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4396
4397             if(progressive_score > 0){
4398                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4399                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4400                 if(progressive_score > interlaced_score){
4401                     s->interlaced_dct=1;
4402
4403                     dct_offset= wrap_y;
4404                     wrap_y<<=1;
4405                     if (s->chroma_format == CHROMA_422)
4406                         wrap_c<<=1;
4407                 }
4408             }
4409         }
4410
4411         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4412         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4413         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4414         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4415
4416         if(s->flags&CODEC_FLAG_GRAY){
4417             skip_dct[4]= 1;
4418             skip_dct[5]= 1;
4419         }else{
4420             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4421             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4422             if(!s->chroma_y_shift){ /* 422 */
4423                 s->dsp.get_pixels(s->block[6], ptr_cb + (dct_offset>>1), wrap_c);
4424                 s->dsp.get_pixels(s->block[7], ptr_cr + (dct_offset>>1), wrap_c);
4425             }
4426         }
4427     }else{
4428         op_pixels_func (*op_pix)[4];
4429         qpel_mc_func (*op_qpix)[16];
4430         uint8_t *dest_y, *dest_cb, *dest_cr;
4431
4432         dest_y  = s->dest[0];
4433         dest_cb = s->dest[1];
4434         dest_cr = s->dest[2];
4435
4436         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4437             op_pix = s->dsp.put_pixels_tab;
4438             op_qpix= s->dsp.put_qpel_pixels_tab;
4439         }else{
4440             op_pix = s->dsp.put_no_rnd_pixels_tab;
4441             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4442         }
4443
4444         if (s->mv_dir & MV_DIR_FORWARD) {
4445             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4446             op_pix = s->dsp.avg_pixels_tab;
4447             op_qpix= s->dsp.avg_qpel_pixels_tab;
4448         }
4449         if (s->mv_dir & MV_DIR_BACKWARD) {
4450             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4451         }
4452
4453         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4454             int progressive_score, interlaced_score;
4455
4456             s->interlaced_dct=0;
4457             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4458                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4459
4460             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4461
4462             if(progressive_score>0){
4463                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4464                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4465
4466                 if(progressive_score > interlaced_score){
4467                     s->interlaced_dct=1;
4468
4469                     dct_offset= wrap_y;
4470                     wrap_y<<=1;
4471                     if (s->chroma_format == CHROMA_422)
4472                         wrap_c<<=1;
4473                 }
4474             }
4475         }
4476
4477         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4478         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4479         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4480         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4481
4482         if(s->flags&CODEC_FLAG_GRAY){
4483             skip_dct[4]= 1;
4484             skip_dct[5]= 1;
4485         }else{
4486             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4487             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4488             if(!s->chroma_y_shift){ /* 422 */
4489                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset>>1), dest_cb + (dct_offset>>1), wrap_c);
4490                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset>>1), dest_cr + (dct_offset>>1), wrap_c);
4491             }
4492         }
4493         /* pre quantization */
4494         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4495             //FIXME optimize
4496             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4497             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4498             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4499             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4500             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4501             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4502             if(!s->chroma_y_shift){ /* 422 */
4503                 if(s->dsp.sad[1](NULL, ptr_cb +(dct_offset>>1), dest_cb +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[6]= 1;
4504                 if(s->dsp.sad[1](NULL, ptr_cr +(dct_offset>>1), dest_cr +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[7]= 1;
4505             }
4506         }
4507     }
4508
4509     if(s->avctx->quantizer_noise_shaping){
4510         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4511         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4512         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4513         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4514         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4515         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4516         if(!s->chroma_y_shift){ /* 422 */
4517             if(!skip_dct[6]) get_vissual_weight(weight[6], ptr_cb + (dct_offset>>1), wrap_c);
4518             if(!skip_dct[7]) get_vissual_weight(weight[7], ptr_cr + (dct_offset>>1), wrap_c);
4519         }
4520         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*mb_block_count);
4521     }
4522
4523     /* DCT & quantize */
4524     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4525     {
4526         for(i=0;i<mb_block_count;i++) {
4527             if(!skip_dct[i]){
4528                 int overflow;
4529                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4530             // FIXME we could decide to change to quantizer instead of clipping
4531             // JS: I don't think that would be a good idea it could lower quality instead
4532             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4533                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4534             }else
4535                 s->block_last_index[i]= -1;
4536         }
4537         if(s->avctx->quantizer_noise_shaping){
4538             for(i=0;i<mb_block_count;i++) {
4539                 if(!skip_dct[i]){
4540                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4541                 }
4542             }
4543         }
4544
4545         if(s->luma_elim_threshold && !s->mb_intra)
4546             for(i=0; i<4; i++)
4547                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4548         if(s->chroma_elim_threshold && !s->mb_intra)
4549             for(i=4; i<mb_block_count; i++)
4550                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4551
4552         if(s->flags & CODEC_FLAG_CBP_RD){
4553             for(i=0;i<mb_block_count;i++) {
4554                 if(s->block_last_index[i] == -1)
4555                     s->coded_score[i]= INT_MAX/256;
4556             }
4557         }
4558     }
4559
4560     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4561         s->block_last_index[4]=
4562         s->block_last_index[5]= 0;
4563         s->block[4][0]=
4564         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4565     }
4566
4567     //non c quantize code returns incorrect block_last_index FIXME
4568     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4569         for(i=0; i<mb_block_count; i++){
4570             int j;
4571             if(s->block_last_index[i]>0){
4572                 for(j=63; j>0; j--){
4573                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4574                 }
4575                 s->block_last_index[i]= j;
4576             }
4577         }
4578     }
4579
4580     /* huffman encode */
4581     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4582     case CODEC_ID_MPEG1VIDEO:
4583     case CODEC_ID_MPEG2VIDEO:
4584         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4585     case CODEC_ID_MPEG4:
4586         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4587     case CODEC_ID_MSMPEG4V2:
4588     case CODEC_ID_MSMPEG4V3:
4589     case CODEC_ID_WMV1:
4590         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4591     case CODEC_ID_WMV2:
4592          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4593 #ifdef CONFIG_H261_ENCODER
4594     case CODEC_ID_H261:
4595         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4596 #endif
4597     case CODEC_ID_H263:
4598     case CODEC_ID_H263P:
4599     case CODEC_ID_FLV1:
4600     case CODEC_ID_RV10:
4601     case CODEC_ID_RV20:
4602         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4603     case CODEC_ID_MJPEG:
4604         mjpeg_encode_mb(s, s->block); break;
4605     default:
4606         assert(0);
4607     }
4608 }
4609
4610 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4611 {
4612     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
4613     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
4614 }
4615
4616 #endif //CONFIG_ENCODERS
4617
4618 void ff_mpeg_flush(AVCodecContext *avctx){
4619     int i;
4620     MpegEncContext *s = avctx->priv_data;
4621
4622     if(s==NULL || s->picture==NULL)
4623         return;
4624
4625     for(i=0; i<MAX_PICTURE_COUNT; i++){
4626        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4627                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4628         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4629     }
4630     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4631
4632     s->mb_x= s->mb_y= 0;
4633
4634     s->parse_context.state= -1;
4635     s->parse_context.frame_start_found= 0;
4636     s->parse_context.overread= 0;
4637     s->parse_context.overread_index= 0;
4638     s->parse_context.index= 0;
4639     s->parse_context.last_index= 0;
4640     s->bitstream_buffer_size=0;
4641     s->pp_time=0;
4642 }
4643
4644 #ifdef CONFIG_ENCODERS
4645 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4646 {
4647     const uint16_t *srcw= (uint16_t*)src;
4648     int words= length>>4;
4649     int bits= length&15;
4650     int i;
4651
4652     if(length==0) return;
4653
4654     if(words < 16){
4655         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4656     }else if(put_bits_count(pb)&7){
4657         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4658     }else{
4659         for(i=0; put_bits_count(pb)&31; i++)
4660             put_bits(pb, 8, src[i]);
4661         flush_put_bits(pb);
4662         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4663         skip_put_bytes(pb, 2*words-i);
4664     }
4665
4666     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4667 }
4668
4669 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4670     int i;
4671
4672     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4673
4674     /* mpeg1 */
4675     d->mb_skip_run= s->mb_skip_run;
4676     for(i=0; i<3; i++)
4677         d->last_dc[i]= s->last_dc[i];
4678
4679     /* statistics */
4680     d->mv_bits= s->mv_bits;
4681     d->i_tex_bits= s->i_tex_bits;
4682     d->p_tex_bits= s->p_tex_bits;
4683     d->i_count= s->i_count;
4684     d->f_count= s->f_count;
4685     d->b_count= s->b_count;
4686     d->skip_count= s->skip_count;
4687     d->misc_bits= s->misc_bits;
4688     d->last_bits= 0;
4689
4690     d->mb_skipped= 0;
4691     d->qscale= s->qscale;
4692     d->dquant= s->dquant;
4693 }
4694
4695 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4696     int i;
4697
4698     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4699     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4700
4701     /* mpeg1 */
4702     d->mb_skip_run= s->mb_skip_run;
4703     for(i=0; i<3; i++)
4704         d->last_dc[i]= s->last_dc[i];
4705
4706     /* statistics */
4707     d->mv_bits= s->mv_bits;
4708     d->i_tex_bits= s->i_tex_bits;
4709     d->p_tex_bits= s->p_tex_bits;
4710     d->i_count= s->i_count;
4711     d->f_count= s->f_count;
4712     d->b_count= s->b_count;
4713     d->skip_count= s->skip_count;
4714     d->misc_bits= s->misc_bits;
4715
4716     d->mb_intra= s->mb_intra;
4717     d->mb_skipped= s->mb_skipped;
4718     d->mv_type= s->mv_type;
4719     d->mv_dir= s->mv_dir;
4720     d->pb= s->pb;
4721     if(s->data_partitioning){
4722         d->pb2= s->pb2;
4723         d->tex_pb= s->tex_pb;
4724     }
4725     d->block= s->block;
4726     for(i=0; i<8; i++)
4727         d->block_last_index[i]= s->block_last_index[i];
4728     d->interlaced_dct= s->interlaced_dct;
4729     d->qscale= s->qscale;
4730 }
4731
4732 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4733                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4734                            int *dmin, int *next_block, int motion_x, int motion_y)
4735 {
4736     int score;
4737     uint8_t *dest_backup[3];
4738
4739     copy_context_before_encode(s, backup, type);
4740
4741     s->block= s->blocks[*next_block];
4742     s->pb= pb[*next_block];
4743     if(s->data_partitioning){
4744         s->pb2   = pb2   [*next_block];
4745         s->tex_pb= tex_pb[*next_block];
4746     }
4747
4748     if(*next_block){
4749         memcpy(dest_backup, s->dest, sizeof(s->dest));
4750         s->dest[0] = s->rd_scratchpad;
4751         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4752         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4753         assert(s->linesize >= 32); //FIXME
4754     }
4755
4756     encode_mb(s, motion_x, motion_y);
4757
4758     score= put_bits_count(&s->pb);
4759     if(s->data_partitioning){
4760         score+= put_bits_count(&s->pb2);
4761         score+= put_bits_count(&s->tex_pb);
4762     }
4763
4764     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4765         MPV_decode_mb(s, s->block);
4766
4767         score *= s->lambda2;
4768         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4769     }
4770
4771     if(*next_block){
4772         memcpy(s->dest, dest_backup, sizeof(s->dest));
4773     }
4774
4775     if(score<*dmin){
4776         *dmin= score;
4777         *next_block^=1;
4778
4779         copy_context_after_encode(best, s, type);
4780     }
4781 }
4782
4783 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4784     uint32_t *sq = ff_squareTbl + 256;
4785     int acc=0;
4786     int x,y;
4787
4788     if(w==16 && h==16)
4789         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4790     else if(w==8 && h==8)
4791         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4792
4793     for(y=0; y<h; y++){
4794         for(x=0; x<w; x++){
4795             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4796         }
4797     }
4798
4799     assert(acc>=0);
4800
4801     return acc;
4802 }
4803
4804 static int sse_mb(MpegEncContext *s){
4805     int w= 16;
4806     int h= 16;
4807
4808     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4809     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4810
4811     if(w==16 && h==16)
4812       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4813         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4814                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4815                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4816       }else{
4817         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4818                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4819                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4820       }
4821     else
4822         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4823                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4824                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4825 }
4826
4827 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4828     MpegEncContext *s= arg;
4829
4830
4831     s->me.pre_pass=1;
4832     s->me.dia_size= s->avctx->pre_dia_size;
4833     s->first_slice_line=1;
4834     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4835         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4836             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4837         }
4838         s->first_slice_line=0;
4839     }
4840
4841     s->me.pre_pass=0;
4842
4843     return 0;
4844 }
4845
4846 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4847     MpegEncContext *s= arg;
4848
4849     ff_check_alignment();
4850
4851     s->me.dia_size= s->avctx->dia_size;
4852     s->first_slice_line=1;
4853     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4854         s->mb_x=0; //for block init below
4855         ff_init_block_index(s);
4856         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4857             s->block_index[0]+=2;
4858             s->block_index[1]+=2;
4859             s->block_index[2]+=2;
4860             s->block_index[3]+=2;
4861
4862             /* compute motion vector & mb_type and store in context */
4863             if(s->pict_type==B_TYPE)
4864                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4865             else
4866                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4867         }
4868         s->first_slice_line=0;
4869     }
4870     return 0;
4871 }
4872
4873 static int mb_var_thread(AVCodecContext *c, void *arg){
4874     MpegEncContext *s= arg;
4875     int mb_x, mb_y;
4876
4877     ff_check_alignment();
4878
4879     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4880         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4881             int xx = mb_x * 16;
4882             int yy = mb_y * 16;
4883             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4884             int varc;
4885             int sum = s->dsp.pix_sum(pix, s->linesize);
4886
4887             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4888
4889             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4890             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4891             s->me.mb_var_sum_temp    += varc;
4892         }
4893     }
4894     return 0;
4895 }
4896
4897 static void write_slice_end(MpegEncContext *s){
4898     if(s->codec_id==CODEC_ID_MPEG4){
4899         if(s->partitioned_frame){
4900             ff_mpeg4_merge_partitions(s);
4901         }
4902
4903         ff_mpeg4_stuffing(&s->pb);
4904     }else if(s->out_format == FMT_MJPEG){
4905         ff_mjpeg_stuffing(&s->pb);
4906     }
4907
4908     align_put_bits(&s->pb);
4909     flush_put_bits(&s->pb);
4910
4911     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4912         s->misc_bits+= get_bits_diff(s);
4913 }
4914
4915 static int encode_thread(AVCodecContext *c, void *arg){
4916     MpegEncContext *s= arg;
4917     int mb_x, mb_y, pdif = 0;
4918     int i, j;
4919     MpegEncContext best_s, backup_s;
4920     uint8_t bit_buf[2][MAX_MB_BYTES];
4921     uint8_t bit_buf2[2][MAX_MB_BYTES];
4922     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4923     PutBitContext pb[2], pb2[2], tex_pb[2];
4924 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4925
4926     ff_check_alignment();
4927
4928     for(i=0; i<2; i++){
4929         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4930         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4931         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4932     }
4933
4934     s->last_bits= put_bits_count(&s->pb);
4935     s->mv_bits=0;
4936     s->misc_bits=0;
4937     s->i_tex_bits=0;
4938     s->p_tex_bits=0;
4939     s->i_count=0;
4940     s->f_count=0;
4941     s->b_count=0;
4942     s->skip_count=0;
4943
4944     for(i=0; i<3; i++){
4945         /* init last dc values */
4946         /* note: quant matrix value (8) is implied here */
4947         s->last_dc[i] = 128 << s->intra_dc_precision;
4948
4949         s->current_picture.error[i] = 0;
4950     }
4951     s->mb_skip_run = 0;
4952     memset(s->last_mv, 0, sizeof(s->last_mv));
4953
4954     s->last_mv_dir = 0;
4955
4956     switch(s->codec_id){
4957     case CODEC_ID_H263:
4958     case CODEC_ID_H263P:
4959     case CODEC_ID_FLV1:
4960         s->gob_index = ff_h263_get_gob_height(s);
4961         break;
4962     case CODEC_ID_MPEG4:
4963         if(s->partitioned_frame)
4964             ff_mpeg4_init_partitions(s);
4965         break;
4966     }
4967
4968     s->resync_mb_x=0;
4969     s->resync_mb_y=0;
4970     s->first_slice_line = 1;
4971     s->ptr_lastgob = s->pb.buf;
4972     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4973 //    printf("row %d at %X\n", s->mb_y, (int)s);
4974         s->mb_x=0;
4975         s->mb_y= mb_y;
4976
4977         ff_set_qscale(s, s->qscale);
4978         ff_init_block_index(s);
4979
4980         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4981             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4982             int mb_type= s->mb_type[xy];
4983 //            int d;
4984             int dmin= INT_MAX;
4985             int dir;
4986
4987             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4988                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4989                 return -1;
4990             }
4991             if(s->data_partitioning){
4992                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4993                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4994                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4995                     return -1;
4996                 }
4997             }
4998
4999             s->mb_x = mb_x;
5000             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
5001             ff_update_block_index(s);
5002
5003 #ifdef CONFIG_H261_ENCODER
5004             if(s->codec_id == CODEC_ID_H261){
5005                 ff_h261_reorder_mb_index(s);
5006                 xy= s->mb_y*s->mb_stride + s->mb_x;
5007                 mb_type= s->mb_type[xy];
5008             }
5009 #endif
5010
5011             /* write gob / video packet header  */
5012             if(s->rtp_mode){
5013                 int current_packet_size, is_gob_start;
5014
5015                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
5016
5017                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
5018
5019                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
5020
5021                 switch(s->codec_id){
5022                 case CODEC_ID_H263:
5023                 case CODEC_ID_H263P:
5024                     if(!s->h263_slice_structured)
5025                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
5026                     break;
5027                 case CODEC_ID_MPEG2VIDEO:
5028                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
5029                 case CODEC_ID_MPEG1VIDEO:
5030                     if(s->mb_skip_run) is_gob_start=0;
5031                     break;
5032                 }
5033
5034                 if(is_gob_start){
5035                     if(s->start_mb_y != mb_y || mb_x!=0){
5036                         write_slice_end(s);
5037
5038                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
5039                             ff_mpeg4_init_partitions(s);
5040                         }
5041                     }
5042
5043                     assert((put_bits_count(&s->pb)&7) == 0);
5044                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
5045
5046                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
5047                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
5048                         int d= 100 / s->avctx->error_rate;
5049                         if(r % d == 0){
5050                             current_packet_size=0;
5051 #ifndef ALT_BITSTREAM_WRITER
5052                             s->pb.buf_ptr= s->ptr_lastgob;
5053 #endif
5054                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
5055                         }
5056                     }
5057
5058                     if (s->avctx->rtp_callback){
5059                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
5060                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
5061                     }
5062
5063                     switch(s->codec_id){
5064                     case CODEC_ID_MPEG4:
5065                         ff_mpeg4_encode_video_packet_header(s);
5066                         ff_mpeg4_clean_buffers(s);
5067                     break;
5068                     case CODEC_ID_MPEG1VIDEO:
5069                     case CODEC_ID_MPEG2VIDEO:
5070                         ff_mpeg1_encode_slice_header(s);
5071                         ff_mpeg1_clean_buffers(s);
5072                     break;
5073                     case CODEC_ID_H263:
5074                     case CODEC_ID_H263P:
5075                         h263_encode_gob_header(s, mb_y);
5076                     break;
5077                     }
5078
5079                     if(s->flags&CODEC_FLAG_PASS1){
5080                         int bits= put_bits_count(&s->pb);
5081                         s->misc_bits+= bits - s->last_bits;
5082                         s->last_bits= bits;
5083                     }
5084
5085                     s->ptr_lastgob += current_packet_size;
5086                     s->first_slice_line=1;
5087                     s->resync_mb_x=mb_x;
5088                     s->resync_mb_y=mb_y;
5089                 }
5090             }
5091
5092             if(  (s->resync_mb_x   == s->mb_x)
5093                && s->resync_mb_y+1 == s->mb_y){
5094                 s->first_slice_line=0;
5095             }
5096
5097             s->mb_skipped=0;
5098             s->dquant=0; //only for QP_RD
5099
5100             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
5101                 int next_block=0;
5102                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
5103
5104                 copy_context_before_encode(&backup_s, s, -1);
5105                 backup_s.pb= s->pb;
5106                 best_s.data_partitioning= s->data_partitioning;
5107                 best_s.partitioned_frame= s->partitioned_frame;
5108                 if(s->data_partitioning){
5109                     backup_s.pb2= s->pb2;
5110                     backup_s.tex_pb= s->tex_pb;
5111                 }
5112
5113                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
5114                     s->mv_dir = MV_DIR_FORWARD;
5115                     s->mv_type = MV_TYPE_16X16;
5116                     s->mb_intra= 0;
5117                     s->mv[0][0][0] = s->p_mv_table[xy][0];
5118                     s->mv[0][0][1] = s->p_mv_table[xy][1];
5119                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
5120                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5121                 }
5122                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
5123                     s->mv_dir = MV_DIR_FORWARD;
5124                     s->mv_type = MV_TYPE_FIELD;
5125                     s->mb_intra= 0;
5126                     for(i=0; i<2; i++){
5127                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5128                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5129                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5130                     }
5131                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
5132                                  &dmin, &next_block, 0, 0);
5133                 }
5134                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
5135                     s->mv_dir = MV_DIR_FORWARD;
5136                     s->mv_type = MV_TYPE_16X16;
5137                     s->mb_intra= 0;
5138                     s->mv[0][0][0] = 0;
5139                     s->mv[0][0][1] = 0;
5140                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
5141                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5142                 }
5143                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
5144                     s->mv_dir = MV_DIR_FORWARD;
5145                     s->mv_type = MV_TYPE_8X8;
5146                     s->mb_intra= 0;
5147                     for(i=0; i<4; i++){
5148                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5149                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5150                     }
5151                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
5152                                  &dmin, &next_block, 0, 0);
5153                 }
5154                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
5155                     s->mv_dir = MV_DIR_FORWARD;
5156                     s->mv_type = MV_TYPE_16X16;
5157                     s->mb_intra= 0;
5158                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5159                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5160                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
5161                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5162                 }
5163                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
5164                     s->mv_dir = MV_DIR_BACKWARD;
5165                     s->mv_type = MV_TYPE_16X16;
5166                     s->mb_intra= 0;
5167                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5168                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5169                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
5170                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
5171                 }
5172                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
5173                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5174                     s->mv_type = MV_TYPE_16X16;
5175                     s->mb_intra= 0;
5176                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5177                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5178                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5179                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5180                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
5181                                  &dmin, &next_block, 0, 0);
5182                 }
5183                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
5184                     s->mv_dir = MV_DIR_FORWARD;
5185                     s->mv_type = MV_TYPE_FIELD;
5186                     s->mb_intra= 0;
5187                     for(i=0; i<2; i++){
5188                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5189                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5190                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5191                     }
5192                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
5193                                  &dmin, &next_block, 0, 0);
5194                 }
5195                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
5196                     s->mv_dir = MV_DIR_BACKWARD;
5197                     s->mv_type = MV_TYPE_FIELD;
5198                     s->mb_intra= 0;
5199                     for(i=0; i<2; i++){
5200                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5201                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5202                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5203                     }
5204                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
5205                                  &dmin, &next_block, 0, 0);
5206                 }
5207                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
5208                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5209                     s->mv_type = MV_TYPE_FIELD;
5210                     s->mb_intra= 0;
5211                     for(dir=0; dir<2; dir++){
5212                         for(i=0; i<2; i++){
5213                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5214                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5215                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5216                         }
5217                     }
5218                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
5219                                  &dmin, &next_block, 0, 0);
5220                 }
5221                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
5222                     s->mv_dir = 0;
5223                     s->mv_type = MV_TYPE_16X16;
5224                     s->mb_intra= 1;
5225                     s->mv[0][0][0] = 0;
5226                     s->mv[0][0][1] = 0;
5227                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
5228                                  &dmin, &next_block, 0, 0);
5229                     if(s->h263_pred || s->h263_aic){
5230                         if(best_s.mb_intra)
5231                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
5232                         else
5233                             ff_clean_intra_table_entries(s); //old mode?
5234                     }
5235                 }
5236
5237                 if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){
5238                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
5239                         const int last_qp= backup_s.qscale;
5240                         int qpi, qp, dc[6];
5241                         DCTELEM ac[6][16];
5242                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
5243                         static const int dquant_tab[4]={-1,1,-2,2};
5244
5245                         assert(backup_s.dquant == 0);
5246
5247                         //FIXME intra
5248                         s->mv_dir= best_s.mv_dir;
5249                         s->mv_type = MV_TYPE_16X16;
5250                         s->mb_intra= best_s.mb_intra;
5251                         s->mv[0][0][0] = best_s.mv[0][0][0];
5252                         s->mv[0][0][1] = best_s.mv[0][0][1];
5253                         s->mv[1][0][0] = best_s.mv[1][0][0];
5254                         s->mv[1][0][1] = best_s.mv[1][0][1];
5255
5256                         qpi = s->pict_type == B_TYPE ? 2 : 0;
5257                         for(; qpi<4; qpi++){
5258                             int dquant= dquant_tab[qpi];
5259                             qp= last_qp + dquant;
5260                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
5261                                 continue;
5262                             backup_s.dquant= dquant;
5263                             if(s->mb_intra && s->dc_val[0]){
5264                                 for(i=0; i<6; i++){
5265                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
5266                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
5267                                 }
5268                             }
5269
5270                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5271                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
5272                             if(best_s.qscale != qp){
5273                                 if(s->mb_intra && s->dc_val[0]){
5274                                     for(i=0; i<6; i++){
5275                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
5276                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
5277                                     }
5278                                 }
5279                             }
5280                         }
5281                     }
5282                 }
5283                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
5284                     int mx= s->b_direct_mv_table[xy][0];
5285                     int my= s->b_direct_mv_table[xy][1];
5286
5287                     backup_s.dquant = 0;
5288                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5289                     s->mb_intra= 0;
5290                     ff_mpeg4_set_direct_mv(s, mx, my);
5291                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5292                                  &dmin, &next_block, mx, my);
5293                 }
5294                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT0){
5295                     backup_s.dquant = 0;
5296                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5297                     s->mb_intra= 0;
5298                     ff_mpeg4_set_direct_mv(s, 0, 0);
5299                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5300                                  &dmin, &next_block, 0, 0);
5301                 }
5302                 if(!best_s.mb_intra && s->flags2&CODEC_FLAG2_SKIP_RD){
5303                     int coded=0;
5304                     for(i=0; i<6; i++)
5305                         coded |= s->block_last_index[i];
5306                     if(coded){
5307                         int mx,my;
5308                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
5309                         if(best_s.mv_dir & MV_DIRECT){
5310                             mx=my=0; //FIXME find the one we actually used
5311                             ff_mpeg4_set_direct_mv(s, mx, my);
5312                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
5313                             mx= s->mv[1][0][0];
5314                             my= s->mv[1][0][1];
5315                         }else{
5316                             mx= s->mv[0][0][0];
5317                             my= s->mv[0][0][1];
5318                         }
5319
5320                         s->mv_dir= best_s.mv_dir;
5321                         s->mv_type = best_s.mv_type;
5322                         s->mb_intra= 0;
5323 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
5324                         s->mv[0][0][1] = best_s.mv[0][0][1];
5325                         s->mv[1][0][0] = best_s.mv[1][0][0];
5326                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
5327                         backup_s.dquant= 0;
5328                         s->skipdct=1;
5329                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5330                                         &dmin, &next_block, mx, my);
5331                         s->skipdct=0;
5332                     }
5333                 }
5334
5335                 s->current_picture.qscale_table[xy]= best_s.qscale;
5336
5337                 copy_context_after_encode(s, &best_s, -1);
5338
5339                 pb_bits_count= put_bits_count(&s->pb);
5340                 flush_put_bits(&s->pb);
5341                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5342                 s->pb= backup_s.pb;
5343
5344                 if(s->data_partitioning){
5345                     pb2_bits_count= put_bits_count(&s->pb2);
5346                     flush_put_bits(&s->pb2);
5347                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5348                     s->pb2= backup_s.pb2;
5349
5350                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5351                     flush_put_bits(&s->tex_pb);
5352                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5353                     s->tex_pb= backup_s.tex_pb;
5354                 }
5355                 s->last_bits= put_bits_count(&s->pb);
5356
5357                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5358                     ff_h263_update_motion_val(s);
5359
5360                 if(next_block==0){ //FIXME 16 vs linesize16
5361                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5362                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5363                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5364                 }
5365
5366                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5367                     MPV_decode_mb(s, s->block);
5368             } else {
5369                 int motion_x = 0, motion_y = 0;
5370                 s->mv_type=MV_TYPE_16X16;
5371                 // only one MB-Type possible
5372
5373                 switch(mb_type){
5374                 case CANDIDATE_MB_TYPE_INTRA:
5375                     s->mv_dir = 0;
5376                     s->mb_intra= 1;
5377                     motion_x= s->mv[0][0][0] = 0;
5378                     motion_y= s->mv[0][0][1] = 0;
5379                     break;
5380                 case CANDIDATE_MB_TYPE_INTER:
5381                     s->mv_dir = MV_DIR_FORWARD;
5382                     s->mb_intra= 0;
5383                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5384                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5385                     break;
5386                 case CANDIDATE_MB_TYPE_INTER_I:
5387                     s->mv_dir = MV_DIR_FORWARD;
5388                     s->mv_type = MV_TYPE_FIELD;
5389                     s->mb_intra= 0;
5390                     for(i=0; i<2; i++){
5391                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5392                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5393                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5394                     }
5395                     break;
5396                 case CANDIDATE_MB_TYPE_INTER4V:
5397                     s->mv_dir = MV_DIR_FORWARD;
5398                     s->mv_type = MV_TYPE_8X8;
5399                     s->mb_intra= 0;
5400                     for(i=0; i<4; i++){
5401                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5402                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5403                     }
5404                     break;
5405                 case CANDIDATE_MB_TYPE_DIRECT:
5406                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5407                     s->mb_intra= 0;
5408                     motion_x=s->b_direct_mv_table[xy][0];
5409                     motion_y=s->b_direct_mv_table[xy][1];
5410                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5411                     break;
5412                 case CANDIDATE_MB_TYPE_DIRECT0:
5413                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5414                     s->mb_intra= 0;
5415                     ff_mpeg4_set_direct_mv(s, 0, 0);
5416                     break;
5417                 case CANDIDATE_MB_TYPE_BIDIR:
5418                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5419                     s->mb_intra= 0;
5420                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5421                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5422                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5423                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5424                     break;
5425                 case CANDIDATE_MB_TYPE_BACKWARD:
5426                     s->mv_dir = MV_DIR_BACKWARD;
5427                     s->mb_intra= 0;
5428                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5429                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5430                     break;
5431                 case CANDIDATE_MB_TYPE_FORWARD:
5432                     s->mv_dir = MV_DIR_FORWARD;
5433                     s->mb_intra= 0;
5434                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5435                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5436 //                    printf(" %d %d ", motion_x, motion_y);
5437                     break;
5438                 case CANDIDATE_MB_TYPE_FORWARD_I:
5439                     s->mv_dir = MV_DIR_FORWARD;
5440                     s->mv_type = MV_TYPE_FIELD;
5441                     s->mb_intra= 0;
5442                     for(i=0; i<2; i++){
5443                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5444                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5445                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5446                     }
5447                     break;
5448                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5449                     s->mv_dir = MV_DIR_BACKWARD;
5450                     s->mv_type = MV_TYPE_FIELD;
5451                     s->mb_intra= 0;
5452                     for(i=0; i<2; i++){
5453                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5454                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5455                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5456                     }
5457                     break;
5458                 case CANDIDATE_MB_TYPE_BIDIR_I:
5459                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5460                     s->mv_type = MV_TYPE_FIELD;
5461                     s->mb_intra= 0;
5462                     for(dir=0; dir<2; dir++){
5463                         for(i=0; i<2; i++){
5464                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5465                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5466                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5467                         }
5468                     }
5469                     break;
5470                 default:
5471                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5472                 }
5473
5474                 encode_mb(s, motion_x, motion_y);
5475
5476                 // RAL: Update last macroblock type
5477                 s->last_mv_dir = s->mv_dir;
5478
5479                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5480                     ff_h263_update_motion_val(s);
5481
5482                 MPV_decode_mb(s, s->block);
5483             }
5484
5485             /* clean the MV table in IPS frames for direct mode in B frames */
5486             if(s->mb_intra /* && I,P,S_TYPE */){
5487                 s->p_mv_table[xy][0]=0;
5488                 s->p_mv_table[xy][1]=0;
5489             }
5490
5491             if(s->flags&CODEC_FLAG_PSNR){
5492                 int w= 16;
5493                 int h= 16;
5494
5495                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5496                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5497
5498                 s->current_picture.error[0] += sse(
5499                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5500                     s->dest[0], w, h, s->linesize);
5501                 s->current_picture.error[1] += sse(
5502                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5503                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5504                 s->current_picture.error[2] += sse(
5505                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5506                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5507             }
5508             if(s->loop_filter){
5509                 if(s->out_format == FMT_H263)
5510                     ff_h263_loop_filter(s);
5511             }
5512 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5513         }
5514     }
5515
5516     //not beautiful here but we must write it before flushing so it has to be here
5517     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5518         msmpeg4_encode_ext_header(s);
5519
5520     write_slice_end(s);
5521
5522     /* Send the last GOB if RTP */
5523     if (s->avctx->rtp_callback) {
5524         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5525         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5526         /* Call the RTP callback to send the last GOB */
5527         emms_c();
5528         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5529     }
5530
5531     return 0;
5532 }
5533
5534 #define MERGE(field) dst->field += src->field; src->field=0
5535 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5536     MERGE(me.scene_change_score);
5537     MERGE(me.mc_mb_var_sum_temp);
5538     MERGE(me.mb_var_sum_temp);
5539 }
5540
5541 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5542     int i;
5543
5544     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5545     MERGE(dct_count[1]);
5546     MERGE(mv_bits);
5547     MERGE(i_tex_bits);
5548     MERGE(p_tex_bits);
5549     MERGE(i_count);
5550     MERGE(f_count);
5551     MERGE(b_count);
5552     MERGE(skip_count);
5553     MERGE(misc_bits);
5554     MERGE(error_count);
5555     MERGE(padding_bug_score);
5556     MERGE(current_picture.error[0]);
5557     MERGE(current_picture.error[1]);
5558     MERGE(current_picture.error[2]);
5559
5560     if(dst->avctx->noise_reduction){
5561         for(i=0; i<64; i++){
5562             MERGE(dct_error_sum[0][i]);
5563             MERGE(dct_error_sum[1][i]);
5564         }
5565     }
5566
5567     assert(put_bits_count(&src->pb) % 8 ==0);
5568     assert(put_bits_count(&dst->pb) % 8 ==0);
5569     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5570     flush_put_bits(&dst->pb);
5571 }
5572
5573 static int estimate_qp(MpegEncContext *s, int dry_run){
5574     if (s->next_lambda){
5575         s->current_picture_ptr->quality=
5576         s->current_picture.quality = s->next_lambda;
5577         if(!dry_run) s->next_lambda= 0;
5578     } else if (!s->fixed_qscale) {
5579         s->current_picture_ptr->quality=
5580         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
5581         if (s->current_picture.quality < 0)
5582             return -1;
5583     }
5584
5585     if(s->adaptive_quant){
5586         switch(s->codec_id){
5587         case CODEC_ID_MPEG4:
5588             ff_clean_mpeg4_qscales(s);
5589             break;
5590         case CODEC_ID_H263:
5591         case CODEC_ID_H263P:
5592         case CODEC_ID_FLV1:
5593             ff_clean_h263_qscales(s);
5594             break;
5595         }
5596
5597         s->lambda= s->lambda_table[0];
5598         //FIXME broken
5599     }else
5600         s->lambda= s->current_picture.quality;
5601 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5602     update_qscale(s);
5603     return 0;
5604 }
5605
5606 static int encode_picture(MpegEncContext *s, int picture_number)
5607 {
5608     int i;
5609     int bits;
5610
5611     s->picture_number = picture_number;
5612
5613     /* Reset the average MB variance */
5614     s->me.mb_var_sum_temp    =
5615     s->me.mc_mb_var_sum_temp = 0;
5616
5617     /* we need to initialize some time vars before we can encode b-frames */
5618     // RAL: Condition added for MPEG1VIDEO
5619     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5620         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5621
5622     s->me.scene_change_score=0;
5623
5624 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5625
5626     if(s->pict_type==I_TYPE){
5627         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5628         else                        s->no_rounding=0;
5629     }else if(s->pict_type!=B_TYPE){
5630         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5631             s->no_rounding ^= 1;
5632     }
5633
5634     if(s->flags & CODEC_FLAG_PASS2){
5635         if (estimate_qp(s,1) < 0)
5636             return -1;
5637         ff_get_2pass_fcode(s);
5638     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
5639         if(s->pict_type==B_TYPE)
5640             s->lambda= s->last_lambda_for[s->pict_type];
5641         else
5642             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
5643         update_qscale(s);
5644     }
5645
5646     s->mb_intra=0; //for the rate distortion & bit compare functions
5647     for(i=1; i<s->avctx->thread_count; i++){
5648         ff_update_duplicate_context(s->thread_context[i], s);
5649     }
5650
5651     ff_init_me(s);
5652
5653     /* Estimate motion for every MB */
5654     if(s->pict_type != I_TYPE){
5655         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5656         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
5657         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5658             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5659                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5660             }
5661         }
5662
5663         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5664     }else /* if(s->pict_type == I_TYPE) */{
5665         /* I-Frame */
5666         for(i=0; i<s->mb_stride*s->mb_height; i++)
5667             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5668
5669         if(!s->fixed_qscale){
5670             /* finding spatial complexity for I-frame rate control */
5671             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5672         }
5673     }
5674     for(i=1; i<s->avctx->thread_count; i++){
5675         merge_context_after_me(s, s->thread_context[i]);
5676     }
5677     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5678     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5679     emms_c();
5680
5681     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5682         s->pict_type= I_TYPE;
5683         for(i=0; i<s->mb_stride*s->mb_height; i++)
5684             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5685 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5686     }
5687
5688     if(!s->umvplus){
5689         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5690             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5691
5692             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5693                 int a,b;
5694                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5695                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5696                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5697             }
5698
5699             ff_fix_long_p_mvs(s);
5700             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5701             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5702                 int j;
5703                 for(i=0; i<2; i++){
5704                     for(j=0; j<2; j++)
5705                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5706                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5707                 }
5708             }
5709         }
5710
5711         if(s->pict_type==B_TYPE){
5712             int a, b;
5713
5714             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5715             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5716             s->f_code = FFMAX(a, b);
5717
5718             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5719             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5720             s->b_code = FFMAX(a, b);
5721
5722             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5723             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5724             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5725             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5726             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5727                 int dir, j;
5728                 for(dir=0; dir<2; dir++){
5729                     for(i=0; i<2; i++){
5730                         for(j=0; j<2; j++){
5731                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5732                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5733                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5734                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5735                         }
5736                     }
5737                 }
5738             }
5739         }
5740     }
5741
5742     if (estimate_qp(s, 0) < 0)
5743         return -1;
5744
5745     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5746         s->qscale= 3; //reduce clipping problems
5747
5748     if (s->out_format == FMT_MJPEG) {
5749         /* for mjpeg, we do include qscale in the matrix */
5750         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5751         for(i=1;i<64;i++){
5752             int j= s->dsp.idct_permutation[i];
5753
5754             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
5755         }
5756         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5757                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5758         s->qscale= 8;
5759     }
5760
5761     //FIXME var duplication
5762     s->current_picture_ptr->key_frame=
5763     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5764     s->current_picture_ptr->pict_type=
5765     s->current_picture.pict_type= s->pict_type;
5766
5767     if(s->current_picture.key_frame)
5768         s->picture_in_gop_number=0;
5769
5770     s->last_bits= put_bits_count(&s->pb);
5771     switch(s->out_format) {
5772     case FMT_MJPEG:
5773         mjpeg_picture_header(s);
5774         break;
5775 #ifdef CONFIG_H261_ENCODER
5776     case FMT_H261:
5777         ff_h261_encode_picture_header(s, picture_number);
5778         break;
5779 #endif
5780     case FMT_H263:
5781         if (s->codec_id == CODEC_ID_WMV2)
5782             ff_wmv2_encode_picture_header(s, picture_number);
5783         else if (s->h263_msmpeg4)
5784             msmpeg4_encode_picture_header(s, picture_number);
5785         else if (s->h263_pred)
5786             mpeg4_encode_picture_header(s, picture_number);
5787 #ifdef CONFIG_RV10_ENCODER
5788         else if (s->codec_id == CODEC_ID_RV10)
5789             rv10_encode_picture_header(s, picture_number);
5790 #endif
5791 #ifdef CONFIG_RV20_ENCODER
5792         else if (s->codec_id == CODEC_ID_RV20)
5793             rv20_encode_picture_header(s, picture_number);
5794 #endif
5795         else if (s->codec_id == CODEC_ID_FLV1)
5796             ff_flv_encode_picture_header(s, picture_number);
5797         else
5798             h263_encode_picture_header(s, picture_number);
5799         break;
5800     case FMT_MPEG1:
5801         mpeg1_encode_picture_header(s, picture_number);
5802         break;
5803     case FMT_H264:
5804         break;
5805     default:
5806         assert(0);
5807     }
5808     bits= put_bits_count(&s->pb);
5809     s->header_bits= bits - s->last_bits;
5810
5811     for(i=1; i<s->avctx->thread_count; i++){
5812         update_duplicate_context_after_me(s->thread_context[i], s);
5813     }
5814     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5815     for(i=1; i<s->avctx->thread_count; i++){
5816         merge_context_after_encode(s, s->thread_context[i]);
5817     }
5818     emms_c();
5819     return 0;
5820 }
5821
5822 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5823     const int intra= s->mb_intra;
5824     int i;
5825
5826     s->dct_count[intra]++;
5827
5828     for(i=0; i<64; i++){
5829         int level= block[i];
5830
5831         if(level){
5832             if(level>0){
5833                 s->dct_error_sum[intra][i] += level;
5834                 level -= s->dct_offset[intra][i];
5835                 if(level<0) level=0;
5836             }else{
5837                 s->dct_error_sum[intra][i] -= level;
5838                 level += s->dct_offset[intra][i];
5839                 if(level>0) level=0;
5840             }
5841             block[i]= level;
5842         }
5843     }
5844 }
5845
5846 static int dct_quantize_trellis_c(MpegEncContext *s,
5847                         DCTELEM *block, int n,
5848                         int qscale, int *overflow){
5849     const int *qmat;
5850     const uint8_t *scantable= s->intra_scantable.scantable;
5851     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5852     int max=0;
5853     unsigned int threshold1, threshold2;
5854     int bias=0;
5855     int run_tab[65];
5856     int level_tab[65];
5857     int score_tab[65];
5858     int survivor[65];
5859     int survivor_count;
5860     int last_run=0;
5861     int last_level=0;
5862     int last_score= 0;
5863     int last_i;
5864     int coeff[2][64];
5865     int coeff_count[64];
5866     int qmul, qadd, start_i, last_non_zero, i, dc;
5867     const int esc_length= s->ac_esc_length;
5868     uint8_t * length;
5869     uint8_t * last_length;
5870     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5871
5872     s->dsp.fdct (block);
5873
5874     if(s->dct_error_sum)
5875         s->denoise_dct(s, block);
5876     qmul= qscale*16;
5877     qadd= ((qscale-1)|1)*8;
5878
5879     if (s->mb_intra) {
5880         int q;
5881         if (!s->h263_aic) {
5882             if (n < 4)
5883                 q = s->y_dc_scale;
5884             else
5885                 q = s->c_dc_scale;
5886             q = q << 3;
5887         } else{
5888             /* For AIC we skip quant/dequant of INTRADC */
5889             q = 1 << 3;
5890             qadd=0;
5891         }
5892
5893         /* note: block[0] is assumed to be positive */
5894         block[0] = (block[0] + (q >> 1)) / q;
5895         start_i = 1;
5896         last_non_zero = 0;
5897         qmat = s->q_intra_matrix[qscale];
5898         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5899             bias= 1<<(QMAT_SHIFT-1);
5900         length     = s->intra_ac_vlc_length;
5901         last_length= s->intra_ac_vlc_last_length;
5902     } else {
5903         start_i = 0;
5904         last_non_zero = -1;
5905         qmat = s->q_inter_matrix[qscale];
5906         length     = s->inter_ac_vlc_length;
5907         last_length= s->inter_ac_vlc_last_length;
5908     }
5909     last_i= start_i;
5910
5911     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5912     threshold2= (threshold1<<1);
5913
5914     for(i=63; i>=start_i; i--) {
5915         const int j = scantable[i];
5916         int level = block[j] * qmat[j];
5917
5918         if(((unsigned)(level+threshold1))>threshold2){
5919             last_non_zero = i;
5920             break;
5921         }
5922     }
5923
5924     for(i=start_i; i<=last_non_zero; i++) {
5925         const int j = scantable[i];
5926         int level = block[j] * qmat[j];
5927
5928 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5929 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5930         if(((unsigned)(level+threshold1))>threshold2){
5931             if(level>0){
5932                 level= (bias + level)>>QMAT_SHIFT;
5933                 coeff[0][i]= level;
5934                 coeff[1][i]= level-1;
5935 //                coeff[2][k]= level-2;
5936             }else{
5937                 level= (bias - level)>>QMAT_SHIFT;
5938                 coeff[0][i]= -level;
5939                 coeff[1][i]= -level+1;
5940 //                coeff[2][k]= -level+2;
5941             }
5942             coeff_count[i]= FFMIN(level, 2);
5943             assert(coeff_count[i]);
5944             max |=level;
5945         }else{
5946             coeff[0][i]= (level>>31)|1;
5947             coeff_count[i]= 1;
5948         }
5949     }
5950
5951     *overflow= s->max_qcoeff < max; //overflow might have happened
5952
5953     if(last_non_zero < start_i){
5954         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5955         return last_non_zero;
5956     }
5957
5958     score_tab[start_i]= 0;
5959     survivor[0]= start_i;
5960     survivor_count= 1;
5961
5962     for(i=start_i; i<=last_non_zero; i++){
5963         int level_index, j;
5964         const int dct_coeff= FFABS(block[ scantable[i] ]);
5965         const int zero_distoration= dct_coeff*dct_coeff;
5966         int best_score=256*256*256*120;
5967         for(level_index=0; level_index < coeff_count[i]; level_index++){
5968             int distoration;
5969             int level= coeff[level_index][i];
5970             const int alevel= FFABS(level);
5971             int unquant_coeff;
5972
5973             assert(level);
5974
5975             if(s->out_format == FMT_H263){
5976                 unquant_coeff= alevel*qmul + qadd;
5977             }else{ //MPEG1
5978                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5979                 if(s->mb_intra){
5980                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5981                         unquant_coeff =   (unquant_coeff - 1) | 1;
5982                 }else{
5983                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5984                         unquant_coeff =   (unquant_coeff - 1) | 1;
5985                 }
5986                 unquant_coeff<<= 3;
5987             }
5988
5989             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5990             level+=64;
5991             if((level&(~127)) == 0){
5992                 for(j=survivor_count-1; j>=0; j--){
5993                     int run= i - survivor[j];
5994                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5995                     score += score_tab[i-run];
5996
5997                     if(score < best_score){
5998                         best_score= score;
5999                         run_tab[i+1]= run;
6000                         level_tab[i+1]= level-64;
6001                     }
6002                 }
6003
6004                 if(s->out_format == FMT_H263){
6005                     for(j=survivor_count-1; j>=0; j--){
6006                         int run= i - survivor[j];
6007                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
6008                         score += score_tab[i-run];
6009                         if(score < last_score){
6010                             last_score= score;
6011                             last_run= run;
6012                             last_level= level-64;
6013                             last_i= i+1;
6014                         }
6015                     }
6016                 }
6017             }else{
6018                 distoration += esc_length*lambda;
6019                 for(j=survivor_count-1; j>=0; j--){
6020                     int run= i - survivor[j];
6021                     int score= distoration + score_tab[i-run];
6022
6023                     if(score < best_score){
6024                         best_score= score;
6025                         run_tab[i+1]= run;
6026                         level_tab[i+1]= level-64;
6027                     }
6028                 }
6029
6030                 if(s->out_format == FMT_H263){
6031                   for(j=survivor_count-1; j>=0; j--){
6032                         int run= i - survivor[j];
6033                         int score= distoration + score_tab[i-run];
6034                         if(score < last_score){
6035                             last_score= score;
6036                             last_run= run;
6037                             last_level= level-64;
6038                             last_i= i+1;
6039                         }
6040                     }
6041                 }
6042             }
6043         }
6044
6045         score_tab[i+1]= best_score;
6046
6047         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
6048         if(last_non_zero <= 27){
6049             for(; survivor_count; survivor_count--){
6050                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
6051                     break;
6052             }
6053         }else{
6054             for(; survivor_count; survivor_count--){
6055                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
6056                     break;
6057             }
6058         }
6059
6060         survivor[ survivor_count++ ]= i+1;
6061     }
6062
6063     if(s->out_format != FMT_H263){
6064         last_score= 256*256*256*120;
6065         for(i= survivor[0]; i<=last_non_zero + 1; i++){
6066             int score= score_tab[i];
6067             if(i) score += lambda*2; //FIXME exacter?
6068
6069             if(score < last_score){
6070                 last_score= score;
6071                 last_i= i;
6072                 last_level= level_tab[i];
6073                 last_run= run_tab[i];
6074             }
6075         }
6076     }
6077
6078     s->coded_score[n] = last_score;
6079
6080     dc= FFABS(block[0]);
6081     last_non_zero= last_i - 1;
6082     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
6083
6084     if(last_non_zero < start_i)
6085         return last_non_zero;
6086
6087     if(last_non_zero == 0 && start_i == 0){
6088         int best_level= 0;
6089         int best_score= dc * dc;
6090
6091         for(i=0; i<coeff_count[0]; i++){
6092             int level= coeff[i][0];
6093             int alevel= FFABS(level);
6094             int unquant_coeff, score, distortion;
6095
6096             if(s->out_format == FMT_H263){
6097                     unquant_coeff= (alevel*qmul + qadd)>>3;
6098             }else{ //MPEG1
6099                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
6100                     unquant_coeff =   (unquant_coeff - 1) | 1;
6101             }
6102             unquant_coeff = (unquant_coeff + 4) >> 3;
6103             unquant_coeff<<= 3 + 3;
6104
6105             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
6106             level+=64;
6107             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
6108             else                    score= distortion + esc_length*lambda;
6109
6110             if(score < best_score){
6111                 best_score= score;
6112                 best_level= level - 64;
6113             }
6114         }
6115         block[0]= best_level;
6116         s->coded_score[n] = best_score - dc*dc;
6117         if(best_level == 0) return -1;
6118         else                return last_non_zero;
6119     }
6120
6121     i= last_i;
6122     assert(last_level);
6123
6124     block[ perm_scantable[last_non_zero] ]= last_level;
6125     i -= last_run + 1;
6126
6127     for(; i>start_i; i -= run_tab[i] + 1){
6128         block[ perm_scantable[i-1] ]= level_tab[i];
6129     }
6130
6131     return last_non_zero;
6132 }
6133
6134 //#define REFINE_STATS 1
6135 static int16_t basis[64][64];
6136
6137 static void build_basis(uint8_t *perm){
6138     int i, j, x, y;
6139     emms_c();
6140     for(i=0; i<8; i++){
6141         for(j=0; j<8; j++){
6142             for(y=0; y<8; y++){
6143                 for(x=0; x<8; x++){
6144                     double s= 0.25*(1<<BASIS_SHIFT);
6145                     int index= 8*i + j;
6146                     int perm_index= perm[index];
6147                     if(i==0) s*= sqrt(0.5);
6148                     if(j==0) s*= sqrt(0.5);
6149                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
6150                 }
6151             }
6152         }
6153     }
6154 }
6155
6156 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
6157                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
6158                         int n, int qscale){
6159     int16_t rem[64];
6160     DECLARE_ALIGNED_16(DCTELEM, d1[64]);
6161     const int *qmat;
6162     const uint8_t *scantable= s->intra_scantable.scantable;
6163     const uint8_t *perm_scantable= s->intra_scantable.permutated;
6164 //    unsigned int threshold1, threshold2;
6165 //    int bias=0;
6166     int run_tab[65];
6167     int prev_run=0;
6168     int prev_level=0;
6169     int qmul, qadd, start_i, last_non_zero, i, dc;
6170     uint8_t * length;
6171     uint8_t * last_length;
6172     int lambda;
6173     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
6174 #ifdef REFINE_STATS
6175 static int count=0;
6176 static int after_last=0;
6177 static int to_zero=0;
6178 static int from_zero=0;
6179 static int raise=0;
6180 static int lower=0;
6181 static int messed_sign=0;
6182 #endif
6183
6184     if(basis[0][0] == 0)
6185         build_basis(s->dsp.idct_permutation);
6186
6187     qmul= qscale*2;
6188     qadd= (qscale-1)|1;
6189     if (s->mb_intra) {
6190         if (!s->h263_aic) {
6191             if (n < 4)
6192                 q = s->y_dc_scale;
6193             else
6194                 q = s->c_dc_scale;
6195         } else{
6196             /* For AIC we skip quant/dequant of INTRADC */
6197             q = 1;
6198             qadd=0;
6199         }
6200         q <<= RECON_SHIFT-3;
6201         /* note: block[0] is assumed to be positive */
6202         dc= block[0]*q;
6203 //        block[0] = (block[0] + (q >> 1)) / q;
6204         start_i = 1;
6205         qmat = s->q_intra_matrix[qscale];
6206 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
6207 //            bias= 1<<(QMAT_SHIFT-1);
6208         length     = s->intra_ac_vlc_length;
6209         last_length= s->intra_ac_vlc_last_length;
6210     } else {
6211         dc= 0;
6212         start_i = 0;
6213         qmat = s->q_inter_matrix[qscale];
6214         length     = s->inter_ac_vlc_length;
6215         last_length= s->inter_ac_vlc_last_length;
6216     }
6217     last_non_zero = s->block_last_index[n];
6218
6219 #ifdef REFINE_STATS
6220 {START_TIMER
6221 #endif
6222     dc += (1<<(RECON_SHIFT-1));
6223     for(i=0; i<64; i++){
6224         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
6225     }
6226 #ifdef REFINE_STATS
6227 STOP_TIMER("memset rem[]")}
6228 #endif
6229     sum=0;
6230     for(i=0; i<64; i++){
6231         int one= 36;
6232         int qns=4;
6233         int w;
6234
6235         w= FFABS(weight[i]) + qns*one;
6236         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
6237
6238         weight[i] = w;
6239 //        w=weight[i] = (63*qns + (w/2)) / w;
6240
6241         assert(w>0);
6242         assert(w<(1<<6));
6243         sum += w*w;
6244     }
6245     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
6246 #ifdef REFINE_STATS
6247 {START_TIMER
6248 #endif
6249     run=0;
6250     rle_index=0;
6251     for(i=start_i; i<=last_non_zero; i++){
6252         int j= perm_scantable[i];
6253         const int level= block[j];
6254         int coeff;
6255
6256         if(level){
6257             if(level<0) coeff= qmul*level - qadd;
6258             else        coeff= qmul*level + qadd;
6259             run_tab[rle_index++]=run;
6260             run=0;
6261
6262             s->dsp.add_8x8basis(rem, basis[j], coeff);
6263         }else{
6264             run++;
6265         }
6266     }
6267 #ifdef REFINE_STATS
6268 if(last_non_zero>0){
6269 STOP_TIMER("init rem[]")
6270 }
6271 }
6272
6273 {START_TIMER
6274 #endif
6275     for(;;){
6276         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
6277         int best_coeff=0;
6278         int best_change=0;
6279         int run2, best_unquant_change=0, analyze_gradient;
6280 #ifdef REFINE_STATS
6281 {START_TIMER
6282 #endif
6283         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
6284
6285         if(analyze_gradient){
6286 #ifdef REFINE_STATS
6287 {START_TIMER
6288 #endif
6289             for(i=0; i<64; i++){
6290                 int w= weight[i];
6291
6292                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
6293             }
6294 #ifdef REFINE_STATS
6295 STOP_TIMER("rem*w*w")}
6296 {START_TIMER
6297 #endif
6298             s->dsp.fdct(d1);
6299 #ifdef REFINE_STATS
6300 STOP_TIMER("dct")}
6301 #endif
6302         }
6303
6304         if(start_i){
6305             const int level= block[0];
6306             int change, old_coeff;
6307
6308             assert(s->mb_intra);
6309
6310             old_coeff= q*level;
6311
6312             for(change=-1; change<=1; change+=2){
6313                 int new_level= level + change;
6314                 int score, new_coeff;
6315
6316                 new_coeff= q*new_level;
6317                 if(new_coeff >= 2048 || new_coeff < 0)
6318                     continue;
6319
6320                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
6321                 if(score<best_score){
6322                     best_score= score;
6323                     best_coeff= 0;
6324                     best_change= change;
6325                     best_unquant_change= new_coeff - old_coeff;
6326                 }
6327             }
6328         }
6329
6330         run=0;
6331         rle_index=0;
6332         run2= run_tab[rle_index++];
6333         prev_level=0;
6334         prev_run=0;
6335
6336         for(i=start_i; i<64; i++){
6337             int j= perm_scantable[i];
6338             const int level= block[j];
6339             int change, old_coeff;
6340
6341             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
6342                 break;
6343
6344             if(level){
6345                 if(level<0) old_coeff= qmul*level - qadd;
6346                 else        old_coeff= qmul*level + qadd;
6347                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
6348             }else{
6349                 old_coeff=0;
6350                 run2--;
6351                 assert(run2>=0 || i >= last_non_zero );
6352             }
6353
6354             for(change=-1; change<=1; change+=2){
6355                 int new_level= level + change;
6356                 int score, new_coeff, unquant_change;
6357
6358                 score=0;
6359                 if(s->avctx->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
6360                    continue;
6361
6362                 if(new_level){
6363                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6364                     else            new_coeff= qmul*new_level + qadd;
6365                     if(new_coeff >= 2048 || new_coeff <= -2048)
6366                         continue;
6367                     //FIXME check for overflow
6368
6369                     if(level){
6370                         if(level < 63 && level > -63){
6371                             if(i < last_non_zero)
6372                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6373                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6374                             else
6375                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6376                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6377                         }
6378                     }else{
6379                         assert(FFABS(new_level)==1);
6380
6381                         if(analyze_gradient){
6382                             int g= d1[ scantable[i] ];
6383                             if(g && (g^new_level) >= 0)
6384                                 continue;
6385                         }
6386
6387                         if(i < last_non_zero){
6388                             int next_i= i + run2 + 1;
6389                             int next_level= block[ perm_scantable[next_i] ] + 64;
6390
6391                             if(next_level&(~127))
6392                                 next_level= 0;
6393
6394                             if(next_i < last_non_zero)
6395                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6396                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6397                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6398                             else
6399                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6400                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6401                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6402                         }else{
6403                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6404                             if(prev_level){
6405                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6406                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6407                             }
6408                         }
6409                     }
6410                 }else{
6411                     new_coeff=0;
6412                     assert(FFABS(level)==1);
6413
6414                     if(i < last_non_zero){
6415                         int next_i= i + run2 + 1;
6416                         int next_level= block[ perm_scantable[next_i] ] + 64;
6417
6418                         if(next_level&(~127))
6419                             next_level= 0;
6420
6421                         if(next_i < last_non_zero)
6422                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6423                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6424                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6425                         else
6426                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6427                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6428                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6429                     }else{
6430                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6431                         if(prev_level){
6432                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6433                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6434                         }
6435                     }
6436                 }
6437
6438                 score *= lambda;
6439
6440                 unquant_change= new_coeff - old_coeff;
6441                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6442
6443                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6444                 if(score<best_score){
6445                     best_score= score;
6446                     best_coeff= i;
6447                     best_change= change;
6448                     best_unquant_change= unquant_change;
6449                 }
6450             }
6451             if(level){
6452                 prev_level= level + 64;
6453                 if(prev_level&(~127))
6454                     prev_level= 0;
6455                 prev_run= run;
6456                 run=0;
6457             }else{
6458                 run++;
6459             }
6460         }
6461 #ifdef REFINE_STATS
6462 STOP_TIMER("iterative step")}
6463 #endif
6464
6465         if(best_change){
6466             int j= perm_scantable[ best_coeff ];
6467
6468             block[j] += best_change;
6469
6470             if(best_coeff > last_non_zero){
6471                 last_non_zero= best_coeff;
6472                 assert(block[j]);
6473 #ifdef REFINE_STATS
6474 after_last++;
6475 #endif
6476             }else{
6477 #ifdef REFINE_STATS
6478 if(block[j]){
6479     if(block[j] - best_change){
6480         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
6481             raise++;
6482         }else{
6483             lower++;
6484         }
6485     }else{
6486         from_zero++;
6487     }
6488 }else{
6489     to_zero++;
6490 }
6491 #endif
6492                 for(; last_non_zero>=start_i; last_non_zero--){
6493                     if(block[perm_scantable[last_non_zero]])
6494                         break;
6495                 }
6496             }
6497 #ifdef REFINE_STATS
6498 count++;
6499 if(256*256*256*64 % count == 0){
6500     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6501 }
6502 #endif
6503             run=0;
6504             rle_index=0;
6505             for(i=start_i; i<=last_non_zero; i++){
6506                 int j= perm_scantable[i];
6507                 const int level= block[j];
6508
6509                  if(level){
6510                      run_tab[rle_index++]=run;
6511                      run=0;
6512                  }else{
6513                      run++;
6514                  }
6515             }
6516
6517             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6518         }else{
6519             break;
6520         }
6521     }
6522 #ifdef REFINE_STATS
6523 if(last_non_zero>0){
6524 STOP_TIMER("iterative search")
6525 }
6526 }
6527 #endif
6528
6529     return last_non_zero;
6530 }
6531
6532 static int dct_quantize_c(MpegEncContext *s,
6533                         DCTELEM *block, int n,
6534                         int qscale, int *overflow)
6535 {
6536     int i, j, level, last_non_zero, q, start_i;
6537     const int *qmat;
6538     const uint8_t *scantable= s->intra_scantable.scantable;
6539     int bias;
6540     int max=0;
6541     unsigned int threshold1, threshold2;
6542
6543     s->dsp.fdct (block);
6544
6545     if(s->dct_error_sum)
6546         s->denoise_dct(s, block);
6547
6548     if (s->mb_intra) {
6549         if (!s->h263_aic) {
6550             if (n < 4)
6551                 q = s->y_dc_scale;
6552             else
6553                 q = s->c_dc_scale;
6554             q = q << 3;
6555         } else
6556             /* For AIC we skip quant/dequant of INTRADC */
6557             q = 1 << 3;
6558
6559         /* note: block[0] is assumed to be positive */
6560         block[0] = (block[0] + (q >> 1)) / q;
6561         start_i = 1;
6562         last_non_zero = 0;
6563         qmat = s->q_intra_matrix[qscale];
6564         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6565     } else {
6566         start_i = 0;
6567         last_non_zero = -1;
6568         qmat = s->q_inter_matrix[qscale];
6569         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6570     }
6571     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6572     threshold2= (threshold1<<1);
6573     for(i=63;i>=start_i;i--) {
6574         j = scantable[i];
6575         level = block[j] * qmat[j];
6576
6577         if(((unsigned)(level+threshold1))>threshold2){
6578             last_non_zero = i;
6579             break;
6580         }else{
6581             block[j]=0;
6582         }
6583     }
6584     for(i=start_i; i<=last_non_zero; i++) {
6585         j = scantable[i];
6586         level = block[j] * qmat[j];
6587
6588 //        if(   bias+level >= (1<<QMAT_SHIFT)
6589 //           || bias-level >= (1<<QMAT_SHIFT)){
6590         if(((unsigned)(level+threshold1))>threshold2){
6591             if(level>0){
6592                 level= (bias + level)>>QMAT_SHIFT;
6593                 block[j]= level;
6594             }else{
6595                 level= (bias - level)>>QMAT_SHIFT;
6596                 block[j]= -level;
6597             }
6598             max |=level;
6599         }else{
6600             block[j]=0;
6601         }
6602     }
6603     *overflow= s->max_qcoeff < max; //overflow might have happened
6604
6605     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6606     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6607         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6608
6609     return last_non_zero;
6610 }
6611
6612 #endif //CONFIG_ENCODERS
6613
6614 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6615                                    DCTELEM *block, int n, int qscale)
6616 {
6617     int i, level, nCoeffs;
6618     const uint16_t *quant_matrix;
6619
6620     nCoeffs= s->block_last_index[n];
6621
6622     if (n < 4)
6623         block[0] = block[0] * s->y_dc_scale;
6624     else
6625         block[0] = block[0] * s->c_dc_scale;
6626     /* XXX: only mpeg1 */
6627     quant_matrix = s->intra_matrix;
6628     for(i=1;i<=nCoeffs;i++) {
6629         int j= s->intra_scantable.permutated[i];
6630         level = block[j];
6631         if (level) {
6632             if (level < 0) {
6633                 level = -level;
6634                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6635                 level = (level - 1) | 1;
6636                 level = -level;
6637             } else {
6638                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6639                 level = (level - 1) | 1;
6640             }
6641             block[j] = level;
6642         }
6643     }
6644 }
6645
6646 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6647                                    DCTELEM *block, int n, int qscale)
6648 {
6649     int i, level, nCoeffs;
6650     const uint16_t *quant_matrix;
6651
6652     nCoeffs= s->block_last_index[n];
6653
6654     quant_matrix = s->inter_matrix;
6655     for(i=0; i<=nCoeffs; i++) {
6656         int j= s->intra_scantable.permutated[i];
6657         level = block[j];
6658         if (level) {
6659             if (level < 0) {
6660                 level = -level;
6661                 level = (((level << 1) + 1) * qscale *
6662                          ((int) (quant_matrix[j]))) >> 4;
6663                 level = (level - 1) | 1;
6664                 level = -level;
6665             } else {
6666                 level = (((level << 1) + 1) * qscale *
6667                          ((int) (quant_matrix[j]))) >> 4;
6668                 level = (level - 1) | 1;
6669             }
6670             block[j] = level;
6671         }
6672     }
6673 }
6674
6675 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6676                                    DCTELEM *block, int n, int qscale)
6677 {
6678     int i, level, nCoeffs;
6679     const uint16_t *quant_matrix;
6680
6681     if(s->alternate_scan) nCoeffs= 63;
6682     else nCoeffs= s->block_last_index[n];
6683
6684     if (n < 4)
6685         block[0] = block[0] * s->y_dc_scale;
6686     else
6687         block[0] = block[0] * s->c_dc_scale;
6688     quant_matrix = s->intra_matrix;
6689     for(i=1;i<=nCoeffs;i++) {
6690         int j= s->intra_scantable.permutated[i];
6691         level = block[j];
6692         if (level) {
6693             if (level < 0) {
6694                 level = -level;
6695                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6696                 level = -level;
6697             } else {
6698                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6699             }
6700             block[j] = level;
6701         }
6702     }
6703 }
6704
6705 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
6706                                    DCTELEM *block, int n, int qscale)
6707 {
6708     int i, level, nCoeffs;
6709     const uint16_t *quant_matrix;
6710     int sum=-1;
6711
6712     if(s->alternate_scan) nCoeffs= 63;
6713     else nCoeffs= s->block_last_index[n];
6714
6715     if (n < 4)
6716         block[0] = block[0] * s->y_dc_scale;
6717     else
6718         block[0] = block[0] * s->c_dc_scale;
6719     quant_matrix = s->intra_matrix;
6720     for(i=1;i<=nCoeffs;i++) {
6721         int j= s->intra_scantable.permutated[i];
6722         level = block[j];
6723         if (level) {
6724             if (level < 0) {
6725                 level = -level;
6726                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6727                 level = -level;
6728             } else {
6729                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6730             }
6731             block[j] = level;
6732             sum+=level;
6733         }
6734     }
6735     block[63]^=sum&1;
6736 }
6737
6738 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6739                                    DCTELEM *block, int n, int qscale)
6740 {
6741     int i, level, nCoeffs;
6742     const uint16_t *quant_matrix;
6743     int sum=-1;
6744
6745     if(s->alternate_scan) nCoeffs= 63;
6746     else nCoeffs= s->block_last_index[n];
6747
6748     quant_matrix = s->inter_matrix;
6749     for(i=0; i<=nCoeffs; i++) {
6750         int j= s->intra_scantable.permutated[i];
6751         level = block[j];
6752         if (level) {
6753             if (level < 0) {
6754                 level = -level;
6755                 level = (((level << 1) + 1) * qscale *
6756                          ((int) (quant_matrix[j]))) >> 4;
6757                 level = -level;
6758             } else {
6759                 level = (((level << 1) + 1) * qscale *
6760                          ((int) (quant_matrix[j]))) >> 4;
6761             }
6762             block[j] = level;
6763             sum+=level;
6764         }
6765     }
6766     block[63]^=sum&1;
6767 }
6768
6769 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6770                                   DCTELEM *block, int n, int qscale)
6771 {
6772     int i, level, qmul, qadd;
6773     int nCoeffs;
6774
6775     assert(s->block_last_index[n]>=0);
6776
6777     qmul = qscale << 1;
6778
6779     if (!s->h263_aic) {
6780         if (n < 4)
6781             block[0] = block[0] * s->y_dc_scale;
6782         else
6783             block[0] = block[0] * s->c_dc_scale;
6784         qadd = (qscale - 1) | 1;
6785     }else{
6786         qadd = 0;
6787     }
6788     if(s->ac_pred)
6789         nCoeffs=63;
6790     else
6791         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6792
6793     for(i=1; i<=nCoeffs; i++) {
6794         level = block[i];
6795         if (level) {
6796             if (level < 0) {
6797                 level = level * qmul - qadd;
6798             } else {
6799                 level = level * qmul + qadd;
6800             }
6801             block[i] = level;
6802         }
6803     }
6804 }
6805
6806 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6807                                   DCTELEM *block, int n, int qscale)
6808 {
6809     int i, level, qmul, qadd;
6810     int nCoeffs;
6811
6812     assert(s->block_last_index[n]>=0);
6813
6814     qadd = (qscale - 1) | 1;
6815     qmul = qscale << 1;
6816
6817     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6818
6819     for(i=0; i<=nCoeffs; i++) {
6820         level = block[i];
6821         if (level) {
6822             if (level < 0) {
6823                 level = level * qmul - qadd;
6824             } else {
6825                 level = level * qmul + qadd;
6826             }
6827             block[i] = level;
6828         }
6829     }
6830 }
6831
6832 #ifdef CONFIG_ENCODERS
6833 AVCodec h263_encoder = {
6834     "h263",
6835     CODEC_TYPE_VIDEO,
6836     CODEC_ID_H263,
6837     sizeof(MpegEncContext),
6838     MPV_encode_init,
6839     MPV_encode_picture,
6840     MPV_encode_end,
6841     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6842 };
6843
6844 AVCodec h263p_encoder = {
6845     "h263p",
6846     CODEC_TYPE_VIDEO,
6847     CODEC_ID_H263P,
6848     sizeof(MpegEncContext),
6849     MPV_encode_init,
6850     MPV_encode_picture,
6851     MPV_encode_end,
6852     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6853 };
6854
6855 AVCodec flv_encoder = {
6856     "flv",
6857     CODEC_TYPE_VIDEO,
6858     CODEC_ID_FLV1,
6859     sizeof(MpegEncContext),
6860     MPV_encode_init,
6861     MPV_encode_picture,
6862     MPV_encode_end,
6863     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6864 };
6865
6866 AVCodec rv10_encoder = {
6867     "rv10",
6868     CODEC_TYPE_VIDEO,
6869     CODEC_ID_RV10,
6870     sizeof(MpegEncContext),
6871     MPV_encode_init,
6872     MPV_encode_picture,
6873     MPV_encode_end,
6874     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6875 };
6876
6877 AVCodec rv20_encoder = {
6878     "rv20",
6879     CODEC_TYPE_VIDEO,
6880     CODEC_ID_RV20,
6881     sizeof(MpegEncContext),
6882     MPV_encode_init,
6883     MPV_encode_picture,
6884     MPV_encode_end,
6885     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6886 };
6887
6888 AVCodec mpeg4_encoder = {
6889     "mpeg4",
6890     CODEC_TYPE_VIDEO,
6891     CODEC_ID_MPEG4,
6892     sizeof(MpegEncContext),
6893     MPV_encode_init,
6894     MPV_encode_picture,
6895     MPV_encode_end,
6896     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6897     .capabilities= CODEC_CAP_DELAY,
6898 };
6899
6900 AVCodec msmpeg4v1_encoder = {
6901     "msmpeg4v1",
6902     CODEC_TYPE_VIDEO,
6903     CODEC_ID_MSMPEG4V1,
6904     sizeof(MpegEncContext),
6905     MPV_encode_init,
6906     MPV_encode_picture,
6907     MPV_encode_end,
6908     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6909 };
6910
6911 AVCodec msmpeg4v2_encoder = {
6912     "msmpeg4v2",
6913     CODEC_TYPE_VIDEO,
6914     CODEC_ID_MSMPEG4V2,
6915     sizeof(MpegEncContext),
6916     MPV_encode_init,
6917     MPV_encode_picture,
6918     MPV_encode_end,
6919     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6920 };
6921
6922 AVCodec msmpeg4v3_encoder = {
6923     "msmpeg4",
6924     CODEC_TYPE_VIDEO,
6925     CODEC_ID_MSMPEG4V3,
6926     sizeof(MpegEncContext),
6927     MPV_encode_init,
6928     MPV_encode_picture,
6929     MPV_encode_end,
6930     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6931 };
6932
6933 AVCodec wmv1_encoder = {
6934     "wmv1",
6935     CODEC_TYPE_VIDEO,
6936     CODEC_ID_WMV1,
6937     sizeof(MpegEncContext),
6938     MPV_encode_init,
6939     MPV_encode_picture,
6940     MPV_encode_end,
6941     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6942 };
6943
6944 AVCodec mjpeg_encoder = {
6945     "mjpeg",
6946     CODEC_TYPE_VIDEO,
6947     CODEC_ID_MJPEG,
6948     sizeof(MpegEncContext),
6949     MPV_encode_init,
6950     MPV_encode_picture,
6951     MPV_encode_end,
6952     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, PIX_FMT_YUVJ422P, -1},
6953 };
6954
6955 #endif //CONFIG_ENCODERS