]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
fix overflow in mc_block() patch by (Yartrebo )yartrebo earthlink net
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
21  */
22  
23 /**
24  * @file mpegvideo.c
25  * The simplest mpeg encoder (well, it was the simplest!).
26  */ 
27  
28 #include "avcodec.h"
29 #include "dsputil.h"
30 #include "mpegvideo.h"
31 #include "faandct.h"
32 #include <limits.h>
33
34 #ifdef USE_FASTMEMCPY
35 #include "fastmemcpy.h"
36 #endif
37
38 //#undef NDEBUG
39 //#include <assert.h>
40
41 #ifdef CONFIG_ENCODERS
42 static void encode_picture(MpegEncContext *s, int picture_number);
43 #endif //CONFIG_ENCODERS
44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s, 
45                                    DCTELEM *block, int n, int qscale);
46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s, 
47                                    DCTELEM *block, int n, int qscale);
48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
49                                    DCTELEM *block, int n, int qscale);
50 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
51                                    DCTELEM *block, int n, int qscale);
52 static void dct_unquantize_h263_intra_c(MpegEncContext *s, 
53                                   DCTELEM *block, int n, int qscale);
54 static void dct_unquantize_h263_inter_c(MpegEncContext *s, 
55                                   DCTELEM *block, int n, int qscale);
56 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
57 #ifdef CONFIG_ENCODERS
58 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
59 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
60 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
61 static int sse_mb(MpegEncContext *s);
62 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
63 #endif //CONFIG_ENCODERS
64
65 #ifdef HAVE_XVMC
66 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
67 extern void XVMC_field_end(MpegEncContext *s);
68 extern void XVMC_decode_mb(MpegEncContext *s);
69 #endif
70
71 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
72
73
74 /* enable all paranoid tests for rounding, overflows, etc... */
75 //#define PARANOID
76
77 //#define DEBUG
78
79
80 /* for jpeg fast DCT */
81 #define CONST_BITS 14
82
83 static const uint16_t aanscales[64] = {
84     /* precomputed values scaled up by 14 bits */
85     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
86     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
87     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
88     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
90     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
91     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
92     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
93 };
94
95 static const uint8_t h263_chroma_roundtab[16] = {
96 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
97     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
98 };
99
100 static const uint8_t ff_default_chroma_qscale_table[32]={
101 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
102     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
103 };
104
105 #ifdef CONFIG_ENCODERS
106 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
107 static uint8_t default_fcode_tab[MAX_MV*2+1];
108
109 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
110
111 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
112                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
113 {
114     int qscale;
115     int shift=0;
116
117     for(qscale=qmin; qscale<=qmax; qscale++){
118         int i;
119         if (dsp->fdct == ff_jpeg_fdct_islow 
120 #ifdef FAAN_POSTSCALE
121             || dsp->fdct == ff_faandct
122 #endif
123             ) {
124             for(i=0;i<64;i++) {
125                 const int j= dsp->idct_permutation[i];
126                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
127                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
128                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
129                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
130                 
131                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / 
132                                 (qscale * quant_matrix[j]));
133             }
134         } else if (dsp->fdct == fdct_ifast
135 #ifndef FAAN_POSTSCALE
136                    || dsp->fdct == ff_faandct
137 #endif
138                    ) {
139             for(i=0;i<64;i++) {
140                 const int j= dsp->idct_permutation[i];
141                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
142                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
143                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
144                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
145                 
146                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) / 
147                                 (aanscales[i] * qscale * quant_matrix[j]));
148             }
149         } else {
150             for(i=0;i<64;i++) {
151                 const int j= dsp->idct_permutation[i];
152                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
153                    So 16           <= qscale * quant_matrix[i]             <= 7905
154                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
155                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
156                 */
157                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
158 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
159                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
160
161                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
162                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
163             }
164         }
165         
166         for(i=intra; i<64; i++){
167             int64_t max= 8191;
168             if (dsp->fdct == fdct_ifast
169 #ifndef FAAN_POSTSCALE
170                    || dsp->fdct == ff_faandct
171 #endif
172                    ) {
173                 max= (8191LL*aanscales[i]) >> 14;
174             }
175             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){ 
176                 shift++;
177             }
178         }
179     }
180     if(shift){
181         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
182     }
183 }
184
185 static inline void update_qscale(MpegEncContext *s){
186     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
187     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
188     
189     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
190 }
191 #endif //CONFIG_ENCODERS
192
193 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
194     int i;
195     int end;
196     
197     st->scantable= src_scantable;
198
199     for(i=0; i<64; i++){
200         int j;
201         j = src_scantable[i];
202         st->permutated[i] = permutation[j];
203 #ifdef ARCH_POWERPC
204         st->inverse[j] = i;
205 #endif
206     }
207     
208     end=-1;
209     for(i=0; i<64; i++){
210         int j;
211         j = st->permutated[i];
212         if(j>end) end=j;
213         st->raster_end[i]= end;
214     }
215 }
216
217 #ifdef CONFIG_ENCODERS
218 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
219     int i;
220
221     if(matrix){
222         put_bits(pb, 1, 1);
223         for(i=0;i<64;i++) {
224             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
225         }
226     }else
227         put_bits(pb, 1, 0);
228 }
229 #endif //CONFIG_ENCODERS
230
231 /* init common dct for both encoder and decoder */
232 int DCT_common_init(MpegEncContext *s)
233 {
234     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
235     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
236     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
237     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
238     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
239     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
240
241 #ifdef CONFIG_ENCODERS
242     s->dct_quantize= dct_quantize_c;
243     s->denoise_dct= denoise_dct_c;
244 #endif
245         
246 #ifdef HAVE_MMX
247     MPV_common_init_mmx(s);
248 #endif
249 #ifdef ARCH_ALPHA
250     MPV_common_init_axp(s);
251 #endif
252 #ifdef HAVE_MLIB
253     MPV_common_init_mlib(s);
254 #endif
255 #ifdef HAVE_MMI
256     MPV_common_init_mmi(s);
257 #endif
258 #ifdef ARCH_ARMV4L
259     MPV_common_init_armv4l(s);
260 #endif
261 #ifdef ARCH_POWERPC
262     MPV_common_init_ppc(s);
263 #endif
264
265 #ifdef CONFIG_ENCODERS
266     s->fast_dct_quantize= s->dct_quantize;
267
268     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
269         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
270     }
271
272 #endif //CONFIG_ENCODERS
273
274     /* load & permutate scantables
275        note: only wmv uses differnt ones 
276     */
277     if(s->alternate_scan){
278         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
279         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
280     }else{
281         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
282         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
283     }
284     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
285     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
286
287     return 0;
288 }
289
290 static void copy_picture(Picture *dst, Picture *src){
291     *dst = *src;
292     dst->type= FF_BUFFER_TYPE_COPY;
293 }
294
295 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
296     int i;
297
298     dst->pict_type              = src->pict_type;
299     dst->quality                = src->quality;
300     dst->coded_picture_number   = src->coded_picture_number;
301     dst->display_picture_number = src->display_picture_number;
302 //    dst->reference              = src->reference;
303     dst->pts                    = src->pts;
304     dst->interlaced_frame       = src->interlaced_frame;
305     dst->top_field_first        = src->top_field_first;
306
307     if(s->avctx->me_threshold){
308         if(!src->motion_val[0])
309             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
310         if(!src->mb_type)
311             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
312         if(!src->ref_index[0])
313             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
314         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
315             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesnt match! (%d!=%d)\n",
316             src->motion_subsample_log2, dst->motion_subsample_log2);
317
318         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
319         
320         for(i=0; i<2; i++){
321             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
322             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
323
324             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
325                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
326             }
327             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
328                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
329             }
330         }
331     }
332 }
333
334 /**
335  * allocates a Picture
336  * The pixels are allocated/set by calling get_buffer() if shared=0
337  */
338 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
339     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
340     const int mb_array_size= s->mb_stride*s->mb_height;
341     const int b8_array_size= s->b8_stride*s->mb_height*2;
342     const int b4_array_size= s->b4_stride*s->mb_height*4;
343     int i;
344     
345     if(shared){
346         assert(pic->data[0]);
347         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
348         pic->type= FF_BUFFER_TYPE_SHARED;
349     }else{
350         int r;
351         
352         assert(!pic->data[0]);
353         
354         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
355         
356         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
357             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
358             return -1;
359         }
360
361         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
362             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
363             return -1;
364         }
365
366         if(pic->linesize[1] != pic->linesize[2]){
367             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride missmatch)\n");
368             return -1;
369         }
370
371         s->linesize  = pic->linesize[0];
372         s->uvlinesize= pic->linesize[1];
373     }
374     
375     if(pic->qscale_table==NULL){
376         if (s->encoding) {        
377             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
378             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
379             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
380         }
381
382         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
383         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
384         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
385         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
386         if(s->out_format == FMT_H264){
387             for(i=0; i<2; i++){
388                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
389                 pic->motion_val[i]= pic->motion_val_base[i]+4;
390                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
391             }
392             pic->motion_subsample_log2= 2;
393         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
394             for(i=0; i<2; i++){
395                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
396                 pic->motion_val[i]= pic->motion_val_base[i]+4;
397                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
398             }
399             pic->motion_subsample_log2= 3;
400         }
401         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
402             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
403         }
404         pic->qstride= s->mb_stride;
405         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
406     }
407
408     //it might be nicer if the application would keep track of these but it would require a API change
409     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
410     s->prev_pict_types[0]= s->pict_type;
411     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
412         pic->age= INT_MAX; // skiped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
413     
414     return 0;
415 fail: //for the CHECKED_ALLOCZ macro
416     return -1;
417 }
418
419 /**
420  * deallocates a picture
421  */
422 static void free_picture(MpegEncContext *s, Picture *pic){
423     int i;
424
425     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
426         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
427     }
428
429     av_freep(&pic->mb_var);
430     av_freep(&pic->mc_mb_var);
431     av_freep(&pic->mb_mean);
432     av_freep(&pic->mbskip_table);
433     av_freep(&pic->qscale_table);
434     av_freep(&pic->mb_type_base);
435     av_freep(&pic->dct_coeff);
436     av_freep(&pic->pan_scan);
437     pic->mb_type= NULL;
438     for(i=0; i<2; i++){
439         av_freep(&pic->motion_val_base[i]);
440         av_freep(&pic->ref_index[i]);
441     }
442     
443     if(pic->type == FF_BUFFER_TYPE_SHARED){
444         for(i=0; i<4; i++){
445             pic->base[i]=
446             pic->data[i]= NULL;
447         }
448         pic->type= 0;        
449     }
450 }
451
452 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
453     int i;
454
455     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264) 
456     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
457     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
458
459      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
460     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t)) 
461     s->rd_scratchpad=   s->me.scratchpad;
462     s->b_scratchpad=    s->me.scratchpad;
463     s->obmc_scratchpad= s->me.scratchpad + 16;
464     if (s->encoding) {
465         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
466         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
467         if(s->avctx->noise_reduction){
468             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
469         }
470     }   
471     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
472     s->block= s->blocks[0];
473
474     for(i=0;i<12;i++){
475         s->pblocks[i] = (short *)(&s->block[i]);
476     }
477     return 0;
478 fail:
479     return -1; //free() through MPV_common_end()
480 }
481
482 static void free_duplicate_context(MpegEncContext *s){
483     if(s==NULL) return;
484
485     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
486     av_freep(&s->me.scratchpad);
487     s->rd_scratchpad=   
488     s->b_scratchpad=    
489     s->obmc_scratchpad= NULL;
490     
491     av_freep(&s->dct_error_sum);
492     av_freep(&s->me.map);
493     av_freep(&s->me.score_map);
494     av_freep(&s->blocks);
495     s->block= NULL;
496 }
497
498 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
499 #define COPY(a) bak->a= src->a
500     COPY(allocated_edge_emu_buffer);
501     COPY(edge_emu_buffer);
502     COPY(me.scratchpad);
503     COPY(rd_scratchpad);
504     COPY(b_scratchpad);
505     COPY(obmc_scratchpad);
506     COPY(me.map);
507     COPY(me.score_map);
508     COPY(blocks);
509     COPY(block);
510     COPY(start_mb_y);
511     COPY(end_mb_y);
512     COPY(me.map_generation);
513     COPY(pb);
514     COPY(dct_error_sum);
515     COPY(dct_count[0]);
516     COPY(dct_count[1]);
517 #undef COPY
518 }
519
520 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
521     MpegEncContext bak;
522     int i;
523     //FIXME copy only needed parts
524 //START_TIMER
525     backup_duplicate_context(&bak, dst);
526     memcpy(dst, src, sizeof(MpegEncContext));
527     backup_duplicate_context(dst, &bak);
528     for(i=0;i<12;i++){
529         dst->pblocks[i] = (short *)(&dst->block[i]);
530     }
531 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
532 }
533
534 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
535 #define COPY(a) dst->a= src->a
536     COPY(pict_type);
537     COPY(current_picture);
538     COPY(f_code);
539     COPY(b_code);
540     COPY(qscale);
541     COPY(lambda);
542     COPY(lambda2);
543     COPY(picture_in_gop_number);
544     COPY(gop_picture_number);
545     COPY(frame_pred_frame_dct); //FIXME dont set in encode_header
546     COPY(progressive_frame); //FIXME dont set in encode_header
547     COPY(partitioned_frame); //FIXME dont set in encode_header
548 #undef COPY
549 }
550
551 /**
552  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
553  * the changed fields will not depend upon the prior state of the MpegEncContext.
554  */
555 static void MPV_common_defaults(MpegEncContext *s){
556     s->y_dc_scale_table=
557     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
558     s->chroma_qscale_table= ff_default_chroma_qscale_table;
559     s->progressive_frame= 1;
560     s->progressive_sequence= 1;
561     s->picture_structure= PICT_FRAME;
562
563     s->coded_picture_number = 0;
564     s->picture_number = 0;
565     s->input_picture_number = 0;
566
567     s->picture_in_gop_number = 0;
568
569     s->f_code = 1;
570     s->b_code = 1;
571 }
572
573 /**
574  * sets the given MpegEncContext to defaults for decoding.
575  * the changed fields will not depend upon the prior state of the MpegEncContext.
576  */
577 void MPV_decode_defaults(MpegEncContext *s){
578     MPV_common_defaults(s);
579 }
580
581 /**
582  * sets the given MpegEncContext to defaults for encoding.
583  * the changed fields will not depend upon the prior state of the MpegEncContext.
584  */
585
586 #ifdef CONFIG_ENCODERS
587 static void MPV_encode_defaults(MpegEncContext *s){
588     static int done=0;
589     
590     MPV_common_defaults(s);
591     
592     if(!done){
593         int i;
594         done=1;
595
596         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
597         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
598
599         for(i=-16; i<16; i++){
600             default_fcode_tab[i + MAX_MV]= 1;
601         }
602     }
603     s->me.mv_penalty= default_mv_penalty;
604     s->fcode_tab= default_fcode_tab;
605 }
606 #endif //CONFIG_ENCODERS
607
608 /** 
609  * init common structure for both encoder and decoder.
610  * this assumes that some variables like width/height are already set
611  */
612 int MPV_common_init(MpegEncContext *s)
613 {
614     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
615
616     if(s->avctx->thread_count > MAX_THREADS || (16*s->avctx->thread_count > s->height && s->height)){
617         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
618         return -1;
619     }
620
621     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
622         return -1;
623
624     dsputil_init(&s->dsp, s->avctx);
625     DCT_common_init(s);
626
627     s->flags= s->avctx->flags;
628     s->flags2= s->avctx->flags2;
629
630     s->mb_width  = (s->width  + 15) / 16;
631     s->mb_height = (s->height + 15) / 16;
632     s->mb_stride = s->mb_width + 1;
633     s->b8_stride = s->mb_width*2 + 1;
634     s->b4_stride = s->mb_width*4 + 1;
635     mb_array_size= s->mb_height * s->mb_stride;
636     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
637
638     /* set chroma shifts */
639     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
640                                                     &(s->chroma_y_shift) );
641
642     /* set default edge pos, will be overriden in decode_header if needed */
643     s->h_edge_pos= s->mb_width*16;
644     s->v_edge_pos= s->mb_height*16;
645
646     s->mb_num = s->mb_width * s->mb_height;
647     
648     s->block_wrap[0]=
649     s->block_wrap[1]=
650     s->block_wrap[2]=
651     s->block_wrap[3]= s->b8_stride;
652     s->block_wrap[4]=
653     s->block_wrap[5]= s->mb_stride;
654  
655     y_size = s->b8_stride * (2 * s->mb_height + 1);
656     c_size = s->mb_stride * (s->mb_height + 1);
657     yc_size = y_size + 2 * c_size;
658     
659     /* convert fourcc to upper case */
660     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)          
661                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
662                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16) 
663                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
664
665     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)          
666                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
667                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16) 
668                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
669
670     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
671
672     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
673     for(y=0; y<s->mb_height; y++){
674         for(x=0; x<s->mb_width; x++){
675             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
676         }
677     }
678     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
679     
680     if (s->encoding) {
681         /* Allocate MV tables */
682         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
683         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
684         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
685         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
686         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
687         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
688         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
689         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
690         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
691         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
692         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
693         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
694
695         if(s->msmpeg4_version){
696             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
697         }
698         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
699
700         /* Allocate MB type table */
701         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
702         
703         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
704         
705         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
706         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
707         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
708         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
709         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
710         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
711         
712         if(s->avctx->noise_reduction){
713             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
714         }
715     }
716     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
717
718     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
719     
720     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
721         /* interlaced direct mode decoding tables */
722             for(i=0; i<2; i++){
723                 int j, k;
724                 for(j=0; j<2; j++){
725                     for(k=0; k<2; k++){
726                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
727                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
728                     }
729                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
730                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
731                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
732                 }
733                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
734             }
735     }
736     if (s->out_format == FMT_H263) {
737         /* ac values */
738         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
739         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
740         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
741         s->ac_val[2] = s->ac_val[1] + c_size;
742         
743         /* cbp values */
744         CHECKED_ALLOCZ(s->coded_block_base, y_size);
745         s->coded_block= s->coded_block_base + s->b8_stride + 1;
746         
747         /* cbp, ac_pred, pred_dir */
748         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
749         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
750     }
751     
752     if (s->h263_pred || s->h263_plus || !s->encoding) {
753         /* dc values */
754         //MN: we need these for error resilience of intra-frames
755         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
756         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
757         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
758         s->dc_val[2] = s->dc_val[1] + c_size;
759         for(i=0;i<yc_size;i++)
760             s->dc_val_base[i] = 1024;
761     }
762
763     /* which mb is a intra block */
764     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
765     memset(s->mbintra_table, 1, mb_array_size);
766     
767     /* init macroblock skip table */
768     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
769     //Note the +1 is for a quicker mpeg4 slice_end detection
770     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
771     
772     s->parse_context.state= -1;
773     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
774        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
775        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
776        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
777     }
778
779     s->context_initialized = 1;
780
781     s->thread_context[0]= s;
782     for(i=1; i<s->avctx->thread_count; i++){
783         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
784         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
785     }
786
787     for(i=0; i<s->avctx->thread_count; i++){
788         if(init_duplicate_context(s->thread_context[i], s) < 0)
789            goto fail;
790         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
791         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
792     }
793
794     return 0;
795  fail:
796     MPV_common_end(s);
797     return -1;
798 }
799
800 /* init common structure for both encoder and decoder */
801 void MPV_common_end(MpegEncContext *s)
802 {
803     int i, j, k;
804
805     for(i=0; i<s->avctx->thread_count; i++){
806         free_duplicate_context(s->thread_context[i]);
807     }
808     for(i=1; i<s->avctx->thread_count; i++){
809         av_freep(&s->thread_context[i]);
810     }
811
812     av_freep(&s->parse_context.buffer);
813     s->parse_context.buffer_size=0;
814
815     av_freep(&s->mb_type);
816     av_freep(&s->p_mv_table_base);
817     av_freep(&s->b_forw_mv_table_base);
818     av_freep(&s->b_back_mv_table_base);
819     av_freep(&s->b_bidir_forw_mv_table_base);
820     av_freep(&s->b_bidir_back_mv_table_base);
821     av_freep(&s->b_direct_mv_table_base);
822     s->p_mv_table= NULL;
823     s->b_forw_mv_table= NULL;
824     s->b_back_mv_table= NULL;
825     s->b_bidir_forw_mv_table= NULL;
826     s->b_bidir_back_mv_table= NULL;
827     s->b_direct_mv_table= NULL;
828     for(i=0; i<2; i++){
829         for(j=0; j<2; j++){
830             for(k=0; k<2; k++){
831                 av_freep(&s->b_field_mv_table_base[i][j][k]);
832                 s->b_field_mv_table[i][j][k]=NULL;
833             }
834             av_freep(&s->b_field_select_table[i][j]);
835             av_freep(&s->p_field_mv_table_base[i][j]);
836             s->p_field_mv_table[i][j]=NULL;
837         }
838         av_freep(&s->p_field_select_table[i]);
839     }
840     
841     av_freep(&s->dc_val_base);
842     av_freep(&s->ac_val_base);
843     av_freep(&s->coded_block_base);
844     av_freep(&s->mbintra_table);
845     av_freep(&s->cbp_table);
846     av_freep(&s->pred_dir_table);
847     
848     av_freep(&s->mbskip_table);
849     av_freep(&s->prev_pict_types);
850     av_freep(&s->bitstream_buffer);
851     s->allocated_bitstream_buffer_size=0;
852
853     av_freep(&s->avctx->stats_out);
854     av_freep(&s->ac_stats);
855     av_freep(&s->error_status_table);
856     av_freep(&s->mb_index2xy);
857     av_freep(&s->lambda_table);
858     av_freep(&s->q_intra_matrix);
859     av_freep(&s->q_inter_matrix);
860     av_freep(&s->q_intra_matrix16);
861     av_freep(&s->q_inter_matrix16);
862     av_freep(&s->input_picture);
863     av_freep(&s->reordered_input_picture);
864     av_freep(&s->dct_offset);
865
866     if(s->picture){
867         for(i=0; i<MAX_PICTURE_COUNT; i++){
868             free_picture(s, &s->picture[i]);
869         }
870     }
871     av_freep(&s->picture);
872     s->context_initialized = 0;
873     s->last_picture_ptr=
874     s->next_picture_ptr=
875     s->current_picture_ptr= NULL;
876     s->linesize= s->uvlinesize= 0;
877
878     for(i=0; i<3; i++)
879         av_freep(&s->visualization_buffer[i]);
880
881     avcodec_default_free_buffers(s->avctx);
882 }
883
884 #ifdef CONFIG_ENCODERS
885
886 /* init video encoder */
887 int MPV_encode_init(AVCodecContext *avctx)
888 {
889     MpegEncContext *s = avctx->priv_data;
890     int i, dummy;
891     int chroma_h_shift, chroma_v_shift;
892     
893     MPV_encode_defaults(s);
894
895     if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUV420P){
896         av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
897         return -1;
898     }
899
900     if(avctx->codec_id == CODEC_ID_MJPEG || avctx->codec_id == CODEC_ID_LJPEG){
901         if(avctx->strict_std_compliance>=0 && avctx->pix_fmt != PIX_FMT_YUVJ420P){
902             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
903             return -1;
904         }
905     }else{
906         if(avctx->strict_std_compliance>=0 && avctx->pix_fmt != PIX_FMT_YUV420P){
907             av_log(avctx, AV_LOG_ERROR, "colorspace not supported\n");
908             return -1;
909         }
910     }
911
912     s->bit_rate = avctx->bit_rate;
913     s->width = avctx->width;
914     s->height = avctx->height;
915     if(avctx->gop_size > 600){
916         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
917         avctx->gop_size=600;
918     }
919     s->gop_size = avctx->gop_size;
920     s->avctx = avctx;
921     s->flags= avctx->flags;
922     s->flags2= avctx->flags2;
923     s->max_b_frames= avctx->max_b_frames;
924     s->codec_id= avctx->codec->id;
925     s->luma_elim_threshold  = avctx->luma_elim_threshold;
926     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
927     s->strict_std_compliance= avctx->strict_std_compliance;
928     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
929     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
930     s->mpeg_quant= avctx->mpeg_quant;
931     s->rtp_mode= !!avctx->rtp_payload_size;
932     s->intra_dc_precision= avctx->intra_dc_precision;
933     s->user_specified_pts = AV_NOPTS_VALUE;
934
935     if (s->gop_size <= 1) {
936         s->intra_only = 1;
937         s->gop_size = 12;
938     } else {
939         s->intra_only = 0;
940     }
941
942     s->me_method = avctx->me_method;
943
944     /* Fixed QSCALE */
945     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
946     
947     s->adaptive_quant= (   s->avctx->lumi_masking
948                         || s->avctx->dark_masking
949                         || s->avctx->temporal_cplx_masking 
950                         || s->avctx->spatial_cplx_masking
951                         || s->avctx->p_masking
952                         || s->avctx->border_masking
953                         || (s->flags&CODEC_FLAG_QP_RD))
954                        && !s->fixed_qscale;
955     
956     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
957     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
958     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
959
960     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
961         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
962         return -1;
963     }    
964
965     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
966         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isnt recommanded!\n");
967     }
968     
969     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
970         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
971         return -1;
972     }
973     
974     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
975         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
976         return -1;
977     }
978         
979     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate 
980        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
981        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
982         
983         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
984     }
985        
986     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4 
987        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
988         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
989         return -1;
990     }
991         
992     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
993         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decission\n");
994         return -1;
995     }
996     
997     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
998         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
999         return -1;
1000     }
1001     
1002     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1003         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1004         return -1;
1005     }
1006
1007     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1008         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1009         return -1;
1010     }
1011     
1012     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1013         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1014         return -1;
1015     }
1016
1017     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN)) 
1018        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1019         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1020         return -1;
1021     }
1022         
1023     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1024         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supporetd by codec\n");
1025         return -1;
1026     }
1027         
1028     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1029         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1030         return -1;
1031     }
1032
1033     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1034         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1035         return -1;
1036     }
1037     
1038     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1039         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1040         return -1;
1041     }
1042     
1043     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4 
1044        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO 
1045        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1046         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1047         return -1;
1048     }
1049     
1050     if(s->avctx->thread_count > 1)
1051         s->rtp_mode= 1;
1052
1053     if(!avctx->frame_rate || !avctx->frame_rate_base){
1054         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1055         return -1;
1056     }
1057     
1058     i= (INT_MAX/2+128)>>8;
1059     if(avctx->me_threshold >= i){
1060         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1061         return -1;
1062     }
1063     if(avctx->mb_threshold >= i){
1064         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1065         return -1;
1066     }
1067         
1068     i= ff_gcd(avctx->frame_rate, avctx->frame_rate_base);
1069     if(i > 1){
1070         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1071         avctx->frame_rate /= i;
1072         avctx->frame_rate_base /= i;
1073 //        return -1;
1074     }
1075     
1076     if(s->codec_id==CODEC_ID_MJPEG){
1077         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1078         s->inter_quant_bias= 0;
1079     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1080         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1081         s->inter_quant_bias= 0;
1082     }else{
1083         s->intra_quant_bias=0;
1084         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1085     }
1086     
1087     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1088         s->intra_quant_bias= avctx->intra_quant_bias;
1089     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1090         s->inter_quant_bias= avctx->inter_quant_bias;
1091         
1092     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1093
1094     av_reduce(&s->time_increment_resolution, &dummy, s->avctx->frame_rate, s->avctx->frame_rate_base, (1<<16)-1);
1095     s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
1096
1097     switch(avctx->codec->id) {
1098     case CODEC_ID_MPEG1VIDEO:
1099         s->out_format = FMT_MPEG1;
1100         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1101         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1102         break;
1103     case CODEC_ID_MPEG2VIDEO:
1104         s->out_format = FMT_MPEG1;
1105         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1106         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1107         s->rtp_mode= 1;
1108         break;
1109     case CODEC_ID_LJPEG:
1110     case CODEC_ID_MJPEG:
1111         s->out_format = FMT_MJPEG;
1112         s->intra_only = 1; /* force intra only for jpeg */
1113         s->mjpeg_write_tables = 1; /* write all tables */
1114         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1115         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1116         s->mjpeg_vsample[1] = 1;
1117         s->mjpeg_vsample[2] = 1; 
1118         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1119         s->mjpeg_hsample[1] = 1; 
1120         s->mjpeg_hsample[2] = 1; 
1121         if (mjpeg_init(s) < 0)
1122             return -1;
1123         avctx->delay=0;
1124         s->low_delay=1;
1125         break;
1126     case CODEC_ID_H261:
1127         s->out_format = FMT_H261;
1128         avctx->delay=0;
1129         s->low_delay=1;
1130         break;
1131     case CODEC_ID_H263:
1132         if (h263_get_picture_format(s->width, s->height) == 7) {
1133             av_log(avctx, AV_LOG_INFO, "Input picture size isn't suitable for h263 codec! try h263+\n");
1134             return -1;
1135         }
1136         s->out_format = FMT_H263;
1137         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1138         avctx->delay=0;
1139         s->low_delay=1;
1140         break;
1141     case CODEC_ID_H263P:
1142         s->out_format = FMT_H263;
1143         s->h263_plus = 1;
1144         /* Fx */
1145         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1146         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1147         s->modified_quant= s->h263_aic;
1148         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1149         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1150         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1151         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1152         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1153
1154         /* /Fx */
1155         /* These are just to be sure */
1156         avctx->delay=0;
1157         s->low_delay=1;
1158         break;
1159     case CODEC_ID_FLV1:
1160         s->out_format = FMT_H263;
1161         s->h263_flv = 2; /* format = 1; 11-bit codes */
1162         s->unrestricted_mv = 1;
1163         s->rtp_mode=0; /* don't allow GOB */
1164         avctx->delay=0;
1165         s->low_delay=1;
1166         break;
1167     case CODEC_ID_RV10:
1168         s->out_format = FMT_H263;
1169         avctx->delay=0;
1170         s->low_delay=1;
1171         break;
1172     case CODEC_ID_RV20:
1173         s->out_format = FMT_H263;
1174         avctx->delay=0;
1175         s->low_delay=1;
1176         s->modified_quant=1;
1177         s->h263_aic=1;
1178         s->h263_plus=1;
1179         s->loop_filter=1;
1180         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1181         break;
1182     case CODEC_ID_MPEG4:
1183         s->out_format = FMT_H263;
1184         s->h263_pred = 1;
1185         s->unrestricted_mv = 1;
1186         s->low_delay= s->max_b_frames ? 0 : 1;
1187         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1188         break;
1189     case CODEC_ID_MSMPEG4V1:
1190         s->out_format = FMT_H263;
1191         s->h263_msmpeg4 = 1;
1192         s->h263_pred = 1;
1193         s->unrestricted_mv = 1;
1194         s->msmpeg4_version= 1;
1195         avctx->delay=0;
1196         s->low_delay=1;
1197         break;
1198     case CODEC_ID_MSMPEG4V2:
1199         s->out_format = FMT_H263;
1200         s->h263_msmpeg4 = 1;
1201         s->h263_pred = 1;
1202         s->unrestricted_mv = 1;
1203         s->msmpeg4_version= 2;
1204         avctx->delay=0;
1205         s->low_delay=1;
1206         break;
1207     case CODEC_ID_MSMPEG4V3:
1208         s->out_format = FMT_H263;
1209         s->h263_msmpeg4 = 1;
1210         s->h263_pred = 1;
1211         s->unrestricted_mv = 1;
1212         s->msmpeg4_version= 3;
1213         s->flipflop_rounding=1;
1214         avctx->delay=0;
1215         s->low_delay=1;
1216         break;
1217     case CODEC_ID_WMV1:
1218         s->out_format = FMT_H263;
1219         s->h263_msmpeg4 = 1;
1220         s->h263_pred = 1;
1221         s->unrestricted_mv = 1;
1222         s->msmpeg4_version= 4;
1223         s->flipflop_rounding=1;
1224         avctx->delay=0;
1225         s->low_delay=1;
1226         break;
1227     case CODEC_ID_WMV2:
1228         s->out_format = FMT_H263;
1229         s->h263_msmpeg4 = 1;
1230         s->h263_pred = 1;
1231         s->unrestricted_mv = 1;
1232         s->msmpeg4_version= 5;
1233         s->flipflop_rounding=1;
1234         avctx->delay=0;
1235         s->low_delay=1;
1236         break;
1237     default:
1238         return -1;
1239     }
1240     
1241     avctx->has_b_frames= !s->low_delay;
1242
1243     s->encoding = 1;
1244
1245     /* init */
1246     if (MPV_common_init(s) < 0)
1247         return -1;
1248
1249     if(s->modified_quant)
1250         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1251     s->progressive_frame= 
1252     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1253     s->quant_precision=5;
1254     
1255     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1256     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1257     
1258 #ifdef CONFIG_ENCODERS
1259     if (s->out_format == FMT_H261)
1260         ff_h261_encode_init(s);
1261     if (s->out_format == FMT_H263)
1262         h263_encode_init(s);
1263     if(s->msmpeg4_version)
1264         ff_msmpeg4_encode_init(s);
1265     if (s->out_format == FMT_MPEG1)
1266         ff_mpeg1_encode_init(s);
1267 #endif
1268
1269     /* init q matrix */
1270     for(i=0;i<64;i++) {
1271         int j= s->dsp.idct_permutation[i];
1272         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1273             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1274             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1275         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1276             s->intra_matrix[j] =
1277             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1278         }else
1279         { /* mpeg1/2 */
1280             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1281             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1282         }
1283         if(s->avctx->intra_matrix)
1284             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1285         if(s->avctx->inter_matrix)
1286             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1287     }
1288
1289     /* precompute matrix */
1290     /* for mjpeg, we do include qscale in the matrix */
1291     if (s->out_format != FMT_MJPEG) {
1292         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, 
1293                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1294         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16, 
1295                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1296     }
1297
1298     if(ff_rate_control_init(s) < 0)
1299         return -1;
1300     
1301     return 0;
1302 }
1303
1304 int MPV_encode_end(AVCodecContext *avctx)
1305 {
1306     MpegEncContext *s = avctx->priv_data;
1307
1308 #ifdef STATS
1309     print_stats();
1310 #endif
1311
1312     ff_rate_control_uninit(s);
1313
1314     MPV_common_end(s);
1315     if (s->out_format == FMT_MJPEG)
1316         mjpeg_close(s);
1317
1318     av_freep(&avctx->extradata);
1319       
1320     return 0;
1321 }
1322
1323 #endif //CONFIG_ENCODERS
1324
1325 void init_rl(RLTable *rl, int use_static)
1326 {
1327     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1328     uint8_t index_run[MAX_RUN+1];
1329     int last, run, level, start, end, i;
1330
1331     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1332     if(use_static && rl->max_level[0])
1333         return;
1334
1335     /* compute max_level[], max_run[] and index_run[] */
1336     for(last=0;last<2;last++) {
1337         if (last == 0) {
1338             start = 0;
1339             end = rl->last;
1340         } else {
1341             start = rl->last;
1342             end = rl->n;
1343         }
1344
1345         memset(max_level, 0, MAX_RUN + 1);
1346         memset(max_run, 0, MAX_LEVEL + 1);
1347         memset(index_run, rl->n, MAX_RUN + 1);
1348         for(i=start;i<end;i++) {
1349             run = rl->table_run[i];
1350             level = rl->table_level[i];
1351             if (index_run[run] == rl->n)
1352                 index_run[run] = i;
1353             if (level > max_level[run])
1354                 max_level[run] = level;
1355             if (run > max_run[level])
1356                 max_run[level] = run;
1357         }
1358         if(use_static)
1359             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1360         else
1361             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1362         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1363         if(use_static)
1364             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1365         else
1366             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1367         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1368         if(use_static)
1369             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1370         else
1371             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1372         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1373     }
1374 }
1375
1376 /* draw the edges of width 'w' of an image of size width, height */
1377 //FIXME check that this is ok for mpeg4 interlaced
1378 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1379 {
1380     uint8_t *ptr, *last_line;
1381     int i;
1382
1383     last_line = buf + (height - 1) * wrap;
1384     for(i=0;i<w;i++) {
1385         /* top and bottom */
1386         memcpy(buf - (i + 1) * wrap, buf, width);
1387         memcpy(last_line + (i + 1) * wrap, last_line, width);
1388     }
1389     /* left and right */
1390     ptr = buf;
1391     for(i=0;i<height;i++) {
1392         memset(ptr - w, ptr[0], w);
1393         memset(ptr + width, ptr[width-1], w);
1394         ptr += wrap;
1395     }
1396     /* corners */
1397     for(i=0;i<w;i++) {
1398         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1399         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1400         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1401         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1402     }
1403 }
1404
1405 int ff_find_unused_picture(MpegEncContext *s, int shared){
1406     int i;
1407     
1408     if(shared){
1409         for(i=0; i<MAX_PICTURE_COUNT; i++){
1410             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1411         }
1412     }else{
1413         for(i=0; i<MAX_PICTURE_COUNT; i++){
1414             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1415         }
1416         for(i=0; i<MAX_PICTURE_COUNT; i++){
1417             if(s->picture[i].data[0]==NULL) return i;
1418         }
1419     }
1420
1421     assert(0);
1422     return -1;
1423 }
1424
1425 static void update_noise_reduction(MpegEncContext *s){
1426     int intra, i;
1427
1428     for(intra=0; intra<2; intra++){
1429         if(s->dct_count[intra] > (1<<16)){
1430             for(i=0; i<64; i++){
1431                 s->dct_error_sum[intra][i] >>=1;
1432             }
1433             s->dct_count[intra] >>= 1;
1434         }
1435         
1436         for(i=0; i<64; i++){
1437             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1438         }
1439     }
1440 }
1441
1442 /**
1443  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1444  */
1445 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1446 {
1447     int i;
1448     AVFrame *pic;
1449     s->mb_skiped = 0;
1450
1451     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1452
1453     /* mark&release old frames */
1454     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1455         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1456
1457         /* release forgotten pictures */
1458         /* if(mpeg124/h263) */
1459         if(!s->encoding){
1460             for(i=0; i<MAX_PICTURE_COUNT; i++){
1461                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1462                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1463                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);                
1464                 }
1465             }
1466         }
1467     }
1468 alloc:
1469     if(!s->encoding){
1470         /* release non refernce frames */
1471         for(i=0; i<MAX_PICTURE_COUNT; i++){
1472             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1473                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1474             }
1475         }
1476
1477         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1478             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1479         else{
1480             i= ff_find_unused_picture(s, 0);
1481             pic= (AVFrame*)&s->picture[i];
1482         }
1483
1484         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1485                         && !s->dropable ? 3 : 0;
1486
1487         pic->coded_picture_number= s->coded_picture_number++;
1488         
1489         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1490             return -1;
1491
1492         s->current_picture_ptr= (Picture*)pic;
1493         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1494         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1495     }
1496
1497     s->current_picture_ptr->pict_type= s->pict_type;
1498 //    if(s->flags && CODEC_FLAG_QSCALE) 
1499   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1500     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1501
1502     copy_picture(&s->current_picture, s->current_picture_ptr);
1503   
1504   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1505     if (s->pict_type != B_TYPE) {
1506         s->last_picture_ptr= s->next_picture_ptr;
1507         if(!s->dropable)
1508             s->next_picture_ptr= s->current_picture_ptr;
1509     }
1510 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1511         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL, 
1512         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL, 
1513         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1514         s->pict_type, s->dropable);*/
1515     
1516     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1517     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1518     
1519     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1520         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1521         assert(s->pict_type != B_TYPE); //these should have been dropped if we dont have a reference
1522         goto alloc;
1523     }
1524
1525     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1526
1527     if(s->picture_structure!=PICT_FRAME){
1528         int i;
1529         for(i=0; i<4; i++){
1530             if(s->picture_structure == PICT_BOTTOM_FIELD){
1531                  s->current_picture.data[i] += s->current_picture.linesize[i];
1532             } 
1533             s->current_picture.linesize[i] *= 2;
1534             s->last_picture.linesize[i] *=2;
1535             s->next_picture.linesize[i] *=2;
1536         }
1537     }
1538   }
1539    
1540     s->hurry_up= s->avctx->hurry_up;
1541     s->error_resilience= avctx->error_resilience;
1542
1543     /* set dequantizer, we cant do it during init as it might change for mpeg4
1544        and we cant do it in the header decode as init isnt called for mpeg4 there yet */
1545     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1546         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1547         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1548     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1549         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1550         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1551     }else{
1552         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1553         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1554     }
1555
1556     if(s->dct_error_sum){
1557         assert(s->avctx->noise_reduction && s->encoding);
1558
1559         update_noise_reduction(s);
1560     }
1561         
1562 #ifdef HAVE_XVMC
1563     if(s->avctx->xvmc_acceleration)
1564         return XVMC_field_start(s, avctx);
1565 #endif
1566     return 0;
1567 }
1568
1569 /* generic function for encode/decode called after a frame has been coded/decoded */
1570 void MPV_frame_end(MpegEncContext *s)
1571 {
1572     int i;
1573     /* draw edge for correct motion prediction if outside */
1574 #ifdef HAVE_XVMC
1575 //just to make sure that all data is rendered.
1576     if(s->avctx->xvmc_acceleration){
1577         XVMC_field_end(s);
1578     }else
1579 #endif
1580     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1581             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1582             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1583             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1584     }
1585     emms_c();
1586     
1587     s->last_pict_type    = s->pict_type;
1588     if(s->pict_type!=B_TYPE){
1589         s->last_non_b_pict_type= s->pict_type;
1590     }
1591 #if 0
1592         /* copy back current_picture variables */
1593     for(i=0; i<MAX_PICTURE_COUNT; i++){
1594         if(s->picture[i].data[0] == s->current_picture.data[0]){
1595             s->picture[i]= s->current_picture;
1596             break;
1597         }    
1598     }
1599     assert(i<MAX_PICTURE_COUNT);
1600 #endif    
1601
1602     if(s->encoding){
1603         /* release non refernce frames */
1604         for(i=0; i<MAX_PICTURE_COUNT; i++){
1605             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1606                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1607             }
1608         }
1609     }
1610     // clear copies, to avoid confusion
1611 #if 0
1612     memset(&s->last_picture, 0, sizeof(Picture));
1613     memset(&s->next_picture, 0, sizeof(Picture));
1614     memset(&s->current_picture, 0, sizeof(Picture));
1615 #endif
1616     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1617 }
1618
1619 /**
1620  * draws an line from (ex, ey) -> (sx, sy).
1621  * @param w width of the image
1622  * @param h height of the image
1623  * @param stride stride/linesize of the image
1624  * @param color color of the arrow
1625  */
1626 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1627     int t, x, y, fr, f;
1628     
1629     sx= clip(sx, 0, w-1);
1630     sy= clip(sy, 0, h-1);
1631     ex= clip(ex, 0, w-1);
1632     ey= clip(ey, 0, h-1);
1633     
1634     buf[sy*stride + sx]+= color;
1635     
1636     if(ABS(ex - sx) > ABS(ey - sy)){
1637         if(sx > ex){
1638             t=sx; sx=ex; ex=t;
1639             t=sy; sy=ey; ey=t;
1640         }
1641         buf+= sx + sy*stride;
1642         ex-= sx;
1643         f= ((ey-sy)<<16)/ex;
1644         for(x= 0; x <= ex; x++){
1645             y = (x*f)>>16;
1646             fr= (x*f)&0xFFFF;
1647             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1648             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1649         }
1650     }else{
1651         if(sy > ey){
1652             t=sx; sx=ex; ex=t;
1653             t=sy; sy=ey; ey=t;
1654         }
1655         buf+= sx + sy*stride;
1656         ey-= sy;
1657         if(ey) f= ((ex-sx)<<16)/ey;
1658         else   f= 0;
1659         for(y= 0; y <= ey; y++){
1660             x = (y*f)>>16;
1661             fr= (y*f)&0xFFFF;
1662             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1663             buf[y*stride + x+1]+= (color*         fr )>>16;;
1664         }
1665     }
1666 }
1667
1668 /**
1669  * draws an arrow from (ex, ey) -> (sx, sy).
1670  * @param w width of the image
1671  * @param h height of the image
1672  * @param stride stride/linesize of the image
1673  * @param color color of the arrow
1674  */
1675 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){ 
1676     int dx,dy;
1677
1678     sx= clip(sx, -100, w+100);
1679     sy= clip(sy, -100, h+100);
1680     ex= clip(ex, -100, w+100);
1681     ey= clip(ey, -100, h+100);
1682     
1683     dx= ex - sx;
1684     dy= ey - sy;
1685     
1686     if(dx*dx + dy*dy > 3*3){
1687         int rx=  dx + dy;
1688         int ry= -dx + dy;
1689         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1690         
1691         //FIXME subpixel accuracy
1692         rx= ROUNDED_DIV(rx*3<<4, length);
1693         ry= ROUNDED_DIV(ry*3<<4, length);
1694         
1695         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1696         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1697     }
1698     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1699 }
1700
1701 /**
1702  * prints debuging info for the given picture.
1703  */
1704 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1705
1706     if(!pict || !pict->mb_type) return;
1707
1708     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1709         int x,y;
1710         
1711         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1712         switch (pict->pict_type) {
1713             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1714             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1715             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1716             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1717             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1718             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;            
1719         }
1720         for(y=0; y<s->mb_height; y++){
1721             for(x=0; x<s->mb_width; x++){
1722                 if(s->avctx->debug&FF_DEBUG_SKIP){
1723                     int count= s->mbskip_table[x + y*s->mb_stride];
1724                     if(count>9) count=9;
1725                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1726                 }
1727                 if(s->avctx->debug&FF_DEBUG_QP){
1728                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1729                 }
1730                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1731                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1732                     //Type & MV direction
1733                     if(IS_PCM(mb_type))
1734                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1735                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1736                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1737                     else if(IS_INTRA4x4(mb_type))
1738                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1739                     else if(IS_INTRA16x16(mb_type))
1740                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1741                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1742                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1743                     else if(IS_DIRECT(mb_type))
1744                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1745                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1746                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1747                     else if(IS_GMC(mb_type))
1748                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1749                     else if(IS_SKIP(mb_type))
1750                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1751                     else if(!USES_LIST(mb_type, 1))
1752                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1753                     else if(!USES_LIST(mb_type, 0))
1754                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1755                     else{
1756                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1757                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1758                     }
1759                     
1760                     //segmentation
1761                     if(IS_8X8(mb_type))
1762                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1763                     else if(IS_16X8(mb_type))
1764                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1765                     else if(IS_8X16(mb_type))
1766                         av_log(s->avctx, AV_LOG_DEBUG, "¦");
1767                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1768                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1769                     else
1770                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1771                     
1772                         
1773                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1774                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1775                     else
1776                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1777                 }
1778 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1779             }
1780             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1781         }
1782     }
1783
1784     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1785         const int shift= 1 + s->quarter_sample;
1786         int mb_y;
1787         uint8_t *ptr;
1788         int i;
1789         int h_chroma_shift, v_chroma_shift;
1790         const int width = s->avctx->width;
1791         const int height= s->avctx->height;
1792         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1793         const int mv_stride= (s->mb_width << mv_sample_log2) + 1;
1794         s->low_delay=0; //needed to see the vectors without trashing the buffers
1795
1796         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1797         for(i=0; i<3; i++){
1798             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1799             pict->data[i]= s->visualization_buffer[i];
1800         }
1801         pict->type= FF_BUFFER_TYPE_COPY;
1802         ptr= pict->data[0];
1803
1804         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1805             int mb_x;
1806             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1807                 const int mb_index= mb_x + mb_y*s->mb_stride;
1808                 if((s->avctx->debug_mv) && pict->motion_val){
1809                   int type;
1810                   for(type=0; type<3; type++){
1811                     int direction = 0;
1812                     switch (type) {
1813                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1814                                 continue;
1815                               direction = 0;
1816                               break;
1817                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1818                                 continue;
1819                               direction = 0;
1820                               break;
1821                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1822                                 continue;
1823                               direction = 1;
1824                               break;
1825                     }
1826                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1827                         continue;
1828
1829                     if(IS_8X8(pict->mb_type[mb_index])){
1830                       int i;
1831                       for(i=0; i<4; i++){
1832                         int sx= mb_x*16 + 4 + 8*(i&1);
1833                         int sy= mb_y*16 + 4 + 8*(i>>1);
1834                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1835                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1836                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1837                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1838                       }
1839                     }else if(IS_16X8(pict->mb_type[mb_index])){
1840                       int i;
1841                       for(i=0; i<2; i++){
1842                         int sx=mb_x*16 + 8;
1843                         int sy=mb_y*16 + 4 + 8*i;
1844                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1845                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1846                         int my=(pict->motion_val[direction][xy][1]>>shift);
1847                         
1848                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1849                             my*=2;
1850                         
1851                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1852                       }
1853                     }else if(IS_8X16(pict->mb_type[mb_index])){
1854                       int i;
1855                       for(i=0; i<2; i++){
1856                         int sx=mb_x*16 + 4 + 8*i;
1857                         int sy=mb_y*16 + 8;
1858                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1859                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1860                         int my=(pict->motion_val[direction][xy][1]>>shift);
1861                         
1862                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1863                             my*=2;
1864                         
1865                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1866                       }
1867                     }else{
1868                       int sx= mb_x*16 + 8;
1869                       int sy= mb_y*16 + 8;
1870                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1871                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1872                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1873                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1874                     }
1875                   }                  
1876                 }
1877                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1878                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1879                     int y;
1880                     for(y=0; y<8; y++){
1881                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1882                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1883                     }
1884                 }
1885                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1886                     int mb_type= pict->mb_type[mb_index];
1887                     uint64_t u,v;
1888                     int y;
1889 #define COLOR(theta, r)\
1890 u= (int)(128 + r*cos(theta*3.141592/180));\
1891 v= (int)(128 + r*sin(theta*3.141592/180));
1892
1893                     
1894                     u=v=128;
1895                     if(IS_PCM(mb_type)){
1896                         COLOR(120,48)
1897                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1898                         COLOR(30,48)
1899                     }else if(IS_INTRA4x4(mb_type)){
1900                         COLOR(90,48)
1901                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1902 //                        COLOR(120,48)
1903                     }else if(IS_DIRECT(mb_type)){
1904                         COLOR(150,48)
1905                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1906                         COLOR(170,48)
1907                     }else if(IS_GMC(mb_type)){
1908                         COLOR(190,48)
1909                     }else if(IS_SKIP(mb_type)){
1910 //                        COLOR(180,48)
1911                     }else if(!USES_LIST(mb_type, 1)){
1912                         COLOR(240,48)
1913                     }else if(!USES_LIST(mb_type, 0)){
1914                         COLOR(0,48)
1915                     }else{
1916                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1917                         COLOR(300,48)
1918                     }
1919
1920                     u*= 0x0101010101010101ULL;
1921                     v*= 0x0101010101010101ULL;
1922                     for(y=0; y<8; y++){
1923                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1924                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1925                     }
1926
1927                     //segmentation
1928                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1929                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1930                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1931                     }
1932                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1933                         for(y=0; y<16; y++)
1934                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1935                     }
1936                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1937                         int dm= 1 << (mv_sample_log2-2);
1938                         for(i=0; i<4; i++){
1939                             int sx= mb_x*16 + 8*(i&1);
1940                             int sy= mb_y*16 + 8*(i>>1);
1941                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1942                             //FIXME bidir
1943                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1944                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1945                                 for(y=0; y<8; y++)
1946                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1947                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1948                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1949                         }
1950                     }
1951                         
1952                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1953                         // hmm
1954                     }
1955                 }
1956                 s->mbskip_table[mb_index]=0;
1957             }
1958         }
1959     }
1960 }
1961
1962 #ifdef CONFIG_ENCODERS
1963
1964 static int get_sae(uint8_t *src, int ref, int stride){
1965     int x,y;
1966     int acc=0;
1967     
1968     for(y=0; y<16; y++){
1969         for(x=0; x<16; x++){
1970             acc+= ABS(src[x+y*stride] - ref);
1971         }
1972     }
1973     
1974     return acc;
1975 }
1976
1977 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
1978     int x, y, w, h;
1979     int acc=0;
1980     
1981     w= s->width &~15;
1982     h= s->height&~15;
1983     
1984     for(y=0; y<h; y+=16){
1985         for(x=0; x<w; x+=16){
1986             int offset= x + y*stride;
1987             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
1988             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1989             int sae = get_sae(src + offset, mean, stride);
1990             
1991             acc+= sae + 500 < sad;
1992         }
1993     }
1994     return acc;
1995 }
1996
1997
1998 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
1999     AVFrame *pic=NULL;
2000     int64_t pts;
2001     int i;
2002     const int encoding_delay= s->max_b_frames;
2003     int direct=1;
2004     
2005     if(pic_arg){
2006         pts= pic_arg->pts;
2007         pic_arg->display_picture_number= s->input_picture_number++;
2008
2009         if(pts != AV_NOPTS_VALUE){ 
2010             if(s->user_specified_pts != AV_NOPTS_VALUE){
2011                 int64_t time= av_rescale(pts, s->avctx->frame_rate, s->avctx->frame_rate_base*(int64_t)AV_TIME_BASE);
2012                 int64_t last= av_rescale(s->user_specified_pts, s->avctx->frame_rate, s->avctx->frame_rate_base*(int64_t)AV_TIME_BASE);
2013             
2014                 if(time <= last){            
2015                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%Ld, last=%Ld\n", pts, s->user_specified_pts);
2016                     return -1;
2017                 }
2018             }
2019             s->user_specified_pts= pts;
2020         }else{
2021             if(s->user_specified_pts != AV_NOPTS_VALUE){
2022                 s->user_specified_pts= 
2023                 pts= s->user_specified_pts + AV_TIME_BASE*(int64_t)s->avctx->frame_rate_base / s->avctx->frame_rate;
2024                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%Ld)\n", pts);
2025             }else{
2026                 pts= av_rescale(pic_arg->display_picture_number*(int64_t)s->avctx->frame_rate_base, AV_TIME_BASE, s->avctx->frame_rate);
2027             }
2028         }
2029     }
2030
2031   if(pic_arg){
2032     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2033     if(pic_arg->linesize[0] != s->linesize) direct=0;
2034     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2035     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2036   
2037 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2038     
2039     if(direct){
2040         i= ff_find_unused_picture(s, 1);
2041
2042         pic= (AVFrame*)&s->picture[i];
2043         pic->reference= 3;
2044     
2045         for(i=0; i<4; i++){
2046             pic->data[i]= pic_arg->data[i];
2047             pic->linesize[i]= pic_arg->linesize[i];
2048         }
2049         alloc_picture(s, (Picture*)pic, 1);
2050     }else{
2051         int offset= 16;
2052         i= ff_find_unused_picture(s, 0);
2053
2054         pic= (AVFrame*)&s->picture[i];
2055         pic->reference= 3;
2056
2057         alloc_picture(s, (Picture*)pic, 0);
2058
2059         if(   pic->data[0] + offset == pic_arg->data[0] 
2060            && pic->data[1] + offset == pic_arg->data[1]
2061            && pic->data[2] + offset == pic_arg->data[2]){
2062        // empty
2063         }else{
2064             int h_chroma_shift, v_chroma_shift;
2065             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2066         
2067             for(i=0; i<3; i++){
2068                 int src_stride= pic_arg->linesize[i];
2069                 int dst_stride= i ? s->uvlinesize : s->linesize;
2070                 int h_shift= i ? h_chroma_shift : 0;
2071                 int v_shift= i ? v_chroma_shift : 0;
2072                 int w= s->width >>h_shift;
2073                 int h= s->height>>v_shift;
2074                 uint8_t *src= pic_arg->data[i];
2075                 uint8_t *dst= pic->data[i] + offset;
2076             
2077                 if(src_stride==dst_stride)
2078                     memcpy(dst, src, src_stride*h);
2079                 else{
2080                     while(h--){
2081                         memcpy(dst, src, w);
2082                         dst += dst_stride;
2083                         src += src_stride;
2084                     }
2085                 }
2086             }
2087         }
2088     }
2089     copy_picture_attributes(s, pic, pic_arg);
2090     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2091   }
2092   
2093     /* shift buffer entries */
2094     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2095         s->input_picture[i-1]= s->input_picture[i];
2096         
2097     s->input_picture[encoding_delay]= (Picture*)pic;
2098
2099     return 0;
2100 }
2101
2102 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2103     int x, y, plane;
2104     int score=0;
2105     int64_t score64=0;
2106
2107     for(plane=0; plane<3; plane++){
2108         const int stride= p->linesize[plane];
2109         const int bw= plane ? 1 : 2;
2110         for(y=0; y<s->mb_height*bw; y++){
2111             for(x=0; x<s->mb_width*bw; x++){
2112                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride), ref->data[plane] + 8*(x + y*stride), stride, 8);
2113                 
2114                 switch(s->avctx->frame_skip_exp){
2115                     case 0: score= FFMAX(score, v); break;
2116                     case 1: score+= ABS(v);break;
2117                     case 2: score+= v*v;break;
2118                     case 3: score64+= ABS(v*v*(int64_t)v);break;
2119                     case 4: score64+= v*v*(int64_t)(v*v);break;
2120                 }
2121             }
2122         }
2123     }
2124     
2125     if(score) score64= score;
2126
2127     if(score64 < s->avctx->frame_skip_threshold)
2128         return 1;
2129     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2130         return 1;
2131     return 0;
2132 }
2133
2134 static void select_input_picture(MpegEncContext *s){
2135     int i;
2136
2137     for(i=1; i<MAX_PICTURE_COUNT; i++)
2138         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2139     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2140
2141     /* set next picture types & ordering */
2142     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2143         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2144             s->reordered_input_picture[0]= s->input_picture[0];
2145             s->reordered_input_picture[0]->pict_type= I_TYPE;
2146             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2147         }else{
2148             int b_frames;
2149
2150             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2151                 if(skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2152 //av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2153                 
2154                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2155                         for(i=0; i<4; i++)
2156                             s->input_picture[0]->data[i]= NULL;
2157                         s->input_picture[0]->type= 0;            
2158                     }else{
2159                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER 
2160                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2161             
2162                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2163                     }
2164
2165                     goto no_output_pic;
2166                 }
2167             }
2168
2169             if(s->flags&CODEC_FLAG_PASS2){
2170                 for(i=0; i<s->max_b_frames+1; i++){
2171                     int pict_num= s->input_picture[0]->display_picture_number + i;
2172
2173                     if(pict_num >= s->rc_context.num_entries) 
2174                         break;
2175                     if(!s->input_picture[i]){
2176                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2177                         break;
2178                     }
2179
2180                     s->input_picture[i]->pict_type= 
2181                         s->rc_context.entry[pict_num].new_pict_type;
2182                 }
2183             }
2184
2185             if(s->avctx->b_frame_strategy==0){
2186                 b_frames= s->max_b_frames;
2187                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2188             }else if(s->avctx->b_frame_strategy==1){
2189                 for(i=1; i<s->max_b_frames+1; i++){
2190                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2191                         s->input_picture[i]->b_frame_score= 
2192                             get_intra_count(s, s->input_picture[i  ]->data[0], 
2193                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2194                     }
2195                 }
2196                 for(i=0; i<s->max_b_frames; i++){
2197                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
2198                 }
2199                                 
2200                 b_frames= FFMAX(0, i-1);
2201                 
2202                 /* reset scores */
2203                 for(i=0; i<b_frames+1; i++){
2204                     s->input_picture[i]->b_frame_score=0;
2205                 }
2206             }else{
2207                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2208                 b_frames=0;
2209             }
2210
2211             emms_c();
2212 //static int b_count=0;
2213 //b_count+= b_frames;
2214 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2215
2216             for(i= b_frames - 1; i>=0; i--){
2217                 int type= s->input_picture[i]->pict_type;
2218                 if(type && type != B_TYPE)
2219                     b_frames= i;
2220             }
2221             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2222                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many bframes in a row\n");
2223             }
2224
2225             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2226               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2227                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2228               }else{
2229                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2230                     b_frames=0;
2231                 s->input_picture[b_frames]->pict_type= I_TYPE;
2232               }
2233             }
2234             
2235             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2236                && b_frames
2237                && s->input_picture[b_frames]->pict_type== I_TYPE)
2238                 b_frames--;
2239
2240             s->reordered_input_picture[0]= s->input_picture[b_frames];
2241             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2242                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2243             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2244             for(i=0; i<b_frames; i++){
2245                 s->reordered_input_picture[i+1]= s->input_picture[i];
2246                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2247                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2248             }
2249         }
2250     }
2251 no_output_pic:
2252     if(s->reordered_input_picture[0]){
2253         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2254
2255         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2256
2257         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2258             // input is a shared pix, so we cant modifiy it -> alloc a new one & ensure that the shared one is reuseable
2259         
2260             int i= ff_find_unused_picture(s, 0);
2261             Picture *pic= &s->picture[i];
2262
2263             /* mark us unused / free shared pic */
2264             for(i=0; i<4; i++)
2265                 s->reordered_input_picture[0]->data[i]= NULL;
2266             s->reordered_input_picture[0]->type= 0;
2267             
2268             pic->reference              = s->reordered_input_picture[0]->reference;
2269             
2270             alloc_picture(s, pic, 0);
2271
2272             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2273
2274             s->current_picture_ptr= pic;
2275         }else{
2276             // input is not a shared pix -> reuse buffer for current_pix
2277
2278             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER 
2279                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2280             
2281             s->current_picture_ptr= s->reordered_input_picture[0];
2282             for(i=0; i<4; i++){
2283                 s->new_picture.data[i]+=16;
2284             }
2285         }
2286         copy_picture(&s->current_picture, s->current_picture_ptr);
2287     
2288         s->picture_number= s->new_picture.display_picture_number;
2289 //printf("dpn:%d\n", s->picture_number);
2290     }else{
2291        memset(&s->new_picture, 0, sizeof(Picture));
2292     }
2293 }
2294
2295 int MPV_encode_picture(AVCodecContext *avctx,
2296                        unsigned char *buf, int buf_size, void *data)
2297 {
2298     MpegEncContext *s = avctx->priv_data;
2299     AVFrame *pic_arg = data;
2300     int i, stuffing_count;
2301
2302     if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUVJ420P){
2303         av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
2304         return -1;
2305     }
2306     
2307     for(i=0; i<avctx->thread_count; i++){
2308         int start_y= s->thread_context[i]->start_mb_y;
2309         int   end_y= s->thread_context[i]->  end_mb_y;
2310         int h= s->mb_height;
2311         uint8_t *start= buf + buf_size*start_y/h;
2312         uint8_t *end  = buf + buf_size*  end_y/h;
2313
2314         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2315     }
2316
2317     s->picture_in_gop_number++;
2318
2319     if(load_input_picture(s, pic_arg) < 0)
2320         return -1;
2321     
2322     select_input_picture(s);
2323     
2324     /* output? */
2325     if(s->new_picture.data[0]){
2326         s->pict_type= s->new_picture.pict_type;
2327 //emms_c();
2328 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2329         MPV_frame_start(s, avctx);
2330
2331         encode_picture(s, s->picture_number);
2332         
2333         avctx->real_pict_num  = s->picture_number;
2334         avctx->header_bits = s->header_bits;
2335         avctx->mv_bits     = s->mv_bits;
2336         avctx->misc_bits   = s->misc_bits;
2337         avctx->i_tex_bits  = s->i_tex_bits;
2338         avctx->p_tex_bits  = s->p_tex_bits;
2339         avctx->i_count     = s->i_count;
2340         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2341         avctx->skip_count  = s->skip_count;
2342
2343         MPV_frame_end(s);
2344
2345         if (s->out_format == FMT_MJPEG)
2346             mjpeg_picture_trailer(s);
2347         
2348         if(s->flags&CODEC_FLAG_PASS1)
2349             ff_write_pass1_stats(s);
2350
2351         for(i=0; i<4; i++){
2352             avctx->error[i] += s->current_picture_ptr->error[i];
2353         }
2354
2355         if(s->flags&CODEC_FLAG_PASS1)
2356             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2357         flush_put_bits(&s->pb);
2358         s->frame_bits  = put_bits_count(&s->pb);
2359
2360         stuffing_count= ff_vbv_update(s, s->frame_bits);
2361         if(stuffing_count){
2362             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2363                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2364                 return -1;
2365             }
2366
2367             switch(s->codec_id){
2368             case CODEC_ID_MPEG1VIDEO:
2369             case CODEC_ID_MPEG2VIDEO:
2370                 while(stuffing_count--){
2371                     put_bits(&s->pb, 8, 0);
2372                 }
2373             break;
2374             case CODEC_ID_MPEG4:
2375                 put_bits(&s->pb, 16, 0);
2376                 put_bits(&s->pb, 16, 0x1C3);
2377                 stuffing_count -= 4;
2378                 while(stuffing_count--){
2379                     put_bits(&s->pb, 8, 0xFF);
2380                 }
2381             break;
2382             default:
2383                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2384             }
2385             flush_put_bits(&s->pb);
2386             s->frame_bits  = put_bits_count(&s->pb);
2387         }
2388
2389         /* update mpeg1/2 vbv_delay for CBR */    
2390         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2391            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2392             int vbv_delay;
2393
2394             assert(s->repeat_first_field==0);
2395             
2396             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2397             assert(vbv_delay < 0xFFFF);
2398
2399             s->vbv_delay_ptr[0] &= 0xF8;
2400             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2401             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2402             s->vbv_delay_ptr[2] &= 0x07;
2403             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2404         }
2405         s->total_bits += s->frame_bits;
2406         avctx->frame_bits  = s->frame_bits;
2407     }else{
2408         assert((pbBufPtr(&s->pb) == s->pb.buf));
2409         s->frame_bits=0;
2410     }
2411     assert((s->frame_bits&7)==0);
2412     
2413     return s->frame_bits/8;
2414 }
2415
2416 #endif //CONFIG_ENCODERS
2417
2418 static inline void gmc1_motion(MpegEncContext *s,
2419                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2420                                uint8_t **ref_picture)
2421 {
2422     uint8_t *ptr;
2423     int offset, src_x, src_y, linesize, uvlinesize;
2424     int motion_x, motion_y;
2425     int emu=0;
2426
2427     motion_x= s->sprite_offset[0][0];
2428     motion_y= s->sprite_offset[0][1];
2429     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2430     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2431     motion_x<<=(3-s->sprite_warping_accuracy);
2432     motion_y<<=(3-s->sprite_warping_accuracy);
2433     src_x = clip(src_x, -16, s->width);
2434     if (src_x == s->width)
2435         motion_x =0;
2436     src_y = clip(src_y, -16, s->height);
2437     if (src_y == s->height)
2438         motion_y =0;
2439
2440     linesize = s->linesize;
2441     uvlinesize = s->uvlinesize;
2442     
2443     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2444
2445     if(s->flags&CODEC_FLAG_EMU_EDGE){
2446         if(   (unsigned)src_x >= s->h_edge_pos - 17
2447            || (unsigned)src_y >= s->v_edge_pos - 17){
2448             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2449             ptr= s->edge_emu_buffer;
2450         }
2451     }
2452     
2453     if((motion_x|motion_y)&7){
2454         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2455         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2456     }else{
2457         int dxy;
2458         
2459         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2460         if (s->no_rounding){
2461             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2462         }else{
2463             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2464         }
2465     }
2466     
2467     if(s->flags&CODEC_FLAG_GRAY) return;
2468
2469     motion_x= s->sprite_offset[1][0];
2470     motion_y= s->sprite_offset[1][1];
2471     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2472     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2473     motion_x<<=(3-s->sprite_warping_accuracy);
2474     motion_y<<=(3-s->sprite_warping_accuracy);
2475     src_x = clip(src_x, -8, s->width>>1);
2476     if (src_x == s->width>>1)
2477         motion_x =0;
2478     src_y = clip(src_y, -8, s->height>>1);
2479     if (src_y == s->height>>1)
2480         motion_y =0;
2481
2482     offset = (src_y * uvlinesize) + src_x;
2483     ptr = ref_picture[1] + offset;
2484     if(s->flags&CODEC_FLAG_EMU_EDGE){
2485         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2486            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2487             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2488             ptr= s->edge_emu_buffer;
2489             emu=1;
2490         }
2491     }
2492     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2493     
2494     ptr = ref_picture[2] + offset;
2495     if(emu){
2496         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2497         ptr= s->edge_emu_buffer;
2498     }
2499     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2500     
2501     return;
2502 }
2503
2504 static inline void gmc_motion(MpegEncContext *s,
2505                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2506                                uint8_t **ref_picture)
2507 {
2508     uint8_t *ptr;
2509     int linesize, uvlinesize;
2510     const int a= s->sprite_warping_accuracy;
2511     int ox, oy;
2512
2513     linesize = s->linesize;
2514     uvlinesize = s->uvlinesize;
2515
2516     ptr = ref_picture[0];
2517
2518     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2519     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2520
2521     s->dsp.gmc(dest_y, ptr, linesize, 16,
2522            ox, 
2523            oy, 
2524            s->sprite_delta[0][0], s->sprite_delta[0][1],
2525            s->sprite_delta[1][0], s->sprite_delta[1][1], 
2526            a+1, (1<<(2*a+1)) - s->no_rounding,
2527            s->h_edge_pos, s->v_edge_pos);
2528     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2529            ox + s->sprite_delta[0][0]*8, 
2530            oy + s->sprite_delta[1][0]*8, 
2531            s->sprite_delta[0][0], s->sprite_delta[0][1],
2532            s->sprite_delta[1][0], s->sprite_delta[1][1], 
2533            a+1, (1<<(2*a+1)) - s->no_rounding,
2534            s->h_edge_pos, s->v_edge_pos);
2535
2536     if(s->flags&CODEC_FLAG_GRAY) return;
2537
2538     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2539     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2540
2541     ptr = ref_picture[1];
2542     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2543            ox, 
2544            oy, 
2545            s->sprite_delta[0][0], s->sprite_delta[0][1],
2546            s->sprite_delta[1][0], s->sprite_delta[1][1], 
2547            a+1, (1<<(2*a+1)) - s->no_rounding,
2548            s->h_edge_pos>>1, s->v_edge_pos>>1);
2549     
2550     ptr = ref_picture[2];
2551     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2552            ox, 
2553            oy, 
2554            s->sprite_delta[0][0], s->sprite_delta[0][1],
2555            s->sprite_delta[1][0], s->sprite_delta[1][1], 
2556            a+1, (1<<(2*a+1)) - s->no_rounding,
2557            s->h_edge_pos>>1, s->v_edge_pos>>1);
2558 }
2559
2560 /**
2561  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2562  * @param buf destination buffer
2563  * @param src source buffer
2564  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2565  * @param block_w width of block
2566  * @param block_h height of block
2567  * @param src_x x coordinate of the top left sample of the block in the source buffer
2568  * @param src_y y coordinate of the top left sample of the block in the source buffer
2569  * @param w width of the source buffer
2570  * @param h height of the source buffer
2571  */
2572 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h, 
2573                                     int src_x, int src_y, int w, int h){
2574     int x, y;
2575     int start_y, start_x, end_y, end_x;
2576
2577     if(src_y>= h){
2578         src+= (h-1-src_y)*linesize;
2579         src_y=h-1;
2580     }else if(src_y<=-block_h){
2581         src+= (1-block_h-src_y)*linesize;
2582         src_y=1-block_h;
2583     }
2584     if(src_x>= w){
2585         src+= (w-1-src_x);
2586         src_x=w-1;
2587     }else if(src_x<=-block_w){
2588         src+= (1-block_w-src_x);
2589         src_x=1-block_w;
2590     }
2591
2592     start_y= FFMAX(0, -src_y);
2593     start_x= FFMAX(0, -src_x);
2594     end_y= FFMIN(block_h, h-src_y);
2595     end_x= FFMIN(block_w, w-src_x);
2596
2597     // copy existing part
2598     for(y=start_y; y<end_y; y++){
2599         for(x=start_x; x<end_x; x++){
2600             buf[x + y*linesize]= src[x + y*linesize];
2601         }
2602     }
2603
2604     //top
2605     for(y=0; y<start_y; y++){
2606         for(x=start_x; x<end_x; x++){
2607             buf[x + y*linesize]= buf[x + start_y*linesize];
2608         }
2609     }
2610
2611     //bottom
2612     for(y=end_y; y<block_h; y++){
2613         for(x=start_x; x<end_x; x++){
2614             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2615         }
2616     }
2617                                     
2618     for(y=0; y<block_h; y++){
2619        //left
2620         for(x=0; x<start_x; x++){
2621             buf[x + y*linesize]= buf[start_x + y*linesize];
2622         }
2623        
2624        //right
2625         for(x=end_x; x<block_w; x++){
2626             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2627         }
2628     }
2629 }
2630
2631 static inline int hpel_motion(MpegEncContext *s, 
2632                                   uint8_t *dest, uint8_t *src,
2633                                   int field_based, int field_select,
2634                                   int src_x, int src_y,
2635                                   int width, int height, int stride,
2636                                   int h_edge_pos, int v_edge_pos,
2637                                   int w, int h, op_pixels_func *pix_op,
2638                                   int motion_x, int motion_y)
2639 {
2640     int dxy;
2641     int emu=0;
2642
2643     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2644     src_x += motion_x >> 1;
2645     src_y += motion_y >> 1;
2646                 
2647     /* WARNING: do no forget half pels */
2648     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2649     if (src_x == width)
2650         dxy &= ~1;
2651     src_y = clip(src_y, -16, height);
2652     if (src_y == height)
2653         dxy &= ~2;
2654     src += src_y * stride + src_x;
2655
2656     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2657         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2658            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2659             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2660                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2661             src= s->edge_emu_buffer;
2662             emu=1;
2663         }
2664     }
2665     if(field_select)
2666         src += s->linesize;
2667     pix_op[dxy](dest, src, stride, h);
2668     return emu;
2669 }
2670
2671 static inline int hpel_motion_lowres(MpegEncContext *s, 
2672                                   uint8_t *dest, uint8_t *src,
2673                                   int field_based, int field_select,
2674                                   int src_x, int src_y,
2675                                   int width, int height, int stride,
2676                                   int h_edge_pos, int v_edge_pos,
2677                                   int w, int h, h264_chroma_mc_func *pix_op,
2678                                   int motion_x, int motion_y)
2679 {
2680     const int lowres= s->avctx->lowres;
2681     const int s_mask= (2<<lowres)-1;
2682     int emu=0;
2683     int sx, sy;
2684
2685     if(s->quarter_sample){
2686         motion_x/=2;
2687         motion_y/=2;
2688     }
2689
2690     sx= motion_x & s_mask;
2691     sy= motion_y & s_mask;
2692     src_x += motion_x >> (lowres+1);
2693     src_y += motion_y >> (lowres+1);
2694                 
2695     src += src_y * stride + src_x;
2696
2697     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2698        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2699         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2700                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2701         src= s->edge_emu_buffer;
2702         emu=1;
2703     }
2704
2705     sx <<= 2 - lowres;
2706     sy <<= 2 - lowres;
2707     if(field_select)
2708         src += s->linesize;
2709     pix_op[lowres](dest, src, stride, h, sx, sy);
2710     return emu;
2711 }
2712
2713 /* apply one mpeg motion vector to the three components */
2714 static always_inline void mpeg_motion(MpegEncContext *s,
2715                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2716                                int field_based, int bottom_field, int field_select,
2717                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2718                                int motion_x, int motion_y, int h)
2719 {
2720     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2721     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2722     
2723 #if 0    
2724 if(s->quarter_sample)
2725 {
2726     motion_x>>=1;
2727     motion_y>>=1;
2728 }
2729 #endif
2730
2731     v_edge_pos = s->v_edge_pos >> field_based;
2732     linesize   = s->current_picture.linesize[0] << field_based;
2733     uvlinesize = s->current_picture.linesize[1] << field_based;
2734
2735     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2736     src_x = s->mb_x* 16               + (motion_x >> 1);
2737     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2738
2739     if (s->out_format == FMT_H263) {
2740         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2741             mx = (motion_x>>1)|(motion_x&1);
2742             my = motion_y >>1;
2743             uvdxy = ((my & 1) << 1) | (mx & 1);
2744             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2745             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2746         }else{
2747             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2748             uvsrc_x = src_x>>1;
2749             uvsrc_y = src_y>>1;
2750         }
2751     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2752         mx = motion_x / 4;
2753         my = motion_y / 4;
2754         uvdxy = 0;
2755         uvsrc_x = s->mb_x*8 + mx;
2756         uvsrc_y = s->mb_y*8 + my;
2757     } else {
2758         if(s->chroma_y_shift){
2759             mx = motion_x / 2;
2760             my = motion_y / 2;
2761             uvdxy = ((my & 1) << 1) | (mx & 1);
2762             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2763             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2764         } else {
2765             if(s->chroma_x_shift){
2766             //Chroma422
2767                 mx = motion_x / 2;
2768                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2769                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2770                 uvsrc_y = src_y;
2771             } else {
2772             //Chroma444
2773                 uvdxy = dxy;
2774                 uvsrc_x = src_x;
2775                 uvsrc_y = src_y;
2776             }
2777         }
2778     }
2779
2780     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2781     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2782     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2783
2784     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2785        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2786             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2787                s->codec_id == CODEC_ID_MPEG1VIDEO){
2788                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2789                 return ;
2790             }
2791             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2792                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2793             ptr_y = s->edge_emu_buffer;
2794             if(!(s->flags&CODEC_FLAG_GRAY)){
2795                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2796                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based, 
2797                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2798                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based, 
2799                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2800                 ptr_cb= uvbuf;
2801                 ptr_cr= uvbuf+16;
2802             }
2803     }
2804
2805     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2806         dest_y += s->linesize;
2807         dest_cb+= s->uvlinesize;
2808         dest_cr+= s->uvlinesize;
2809     }
2810
2811     if(field_select){
2812         ptr_y += s->linesize;
2813         ptr_cb+= s->uvlinesize;
2814         ptr_cr+= s->uvlinesize;
2815     }
2816
2817     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2818     
2819     if(!(s->flags&CODEC_FLAG_GRAY)){
2820         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
2821         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
2822     }
2823     if(s->out_format == FMT_H261){
2824         ff_h261_loop_filter(s);
2825     }
2826 }
2827
2828 /* apply one mpeg motion vector to the three components */
2829 static always_inline void mpeg_motion_lowres(MpegEncContext *s,
2830                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2831                                int field_based, int bottom_field, int field_select,
2832                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
2833                                int motion_x, int motion_y, int h)
2834 {
2835     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2836     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
2837     const int lowres= s->avctx->lowres;
2838     const int block_s= 8>>lowres;
2839     const int s_mask= (2<<lowres)-1;
2840     const int h_edge_pos = s->h_edge_pos >> lowres;
2841     const int v_edge_pos = s->v_edge_pos >> lowres;
2842     linesize   = s->current_picture.linesize[0] << field_based;
2843     uvlinesize = s->current_picture.linesize[1] << field_based;
2844
2845     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
2846         motion_x/=2;
2847         motion_y/=2;
2848     }
2849     
2850     if(field_based){
2851         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
2852     }
2853
2854     sx= motion_x & s_mask;
2855     sy= motion_y & s_mask;
2856     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
2857     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
2858     
2859     if (s->out_format == FMT_H263) {
2860         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
2861         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
2862         uvsrc_x = src_x>>1;
2863         uvsrc_y = src_y>>1;
2864     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2865         mx = motion_x / 4;
2866         my = motion_y / 4;
2867         uvsx = (2*mx) & s_mask;
2868         uvsy = (2*my) & s_mask;
2869         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
2870         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
2871     } else {
2872         mx = motion_x / 2;
2873         my = motion_y / 2;
2874         uvsx = mx & s_mask;
2875         uvsy = my & s_mask;
2876         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
2877         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
2878     }
2879
2880     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2881     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2882     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2883
2884     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
2885        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2886             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2887                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2888             ptr_y = s->edge_emu_buffer;
2889             if(!(s->flags&CODEC_FLAG_GRAY)){
2890                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2891                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based, 
2892                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
2893                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based, 
2894                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
2895                 ptr_cb= uvbuf;
2896                 ptr_cr= uvbuf+16;
2897             }
2898     }
2899
2900     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2901         dest_y += s->linesize;
2902         dest_cb+= s->uvlinesize;
2903         dest_cr+= s->uvlinesize;
2904     }
2905
2906     if(field_select){
2907         ptr_y += s->linesize;
2908         ptr_cb+= s->uvlinesize;
2909         ptr_cr+= s->uvlinesize;
2910     }
2911
2912     sx <<= 2 - lowres;
2913     sy <<= 2 - lowres;
2914     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
2915     
2916     if(!(s->flags&CODEC_FLAG_GRAY)){
2917         uvsx <<= 2 - lowres;
2918         uvsy <<= 2 - lowres;
2919         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
2920         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
2921     }
2922     //FIXME h261 lowres loop filter
2923 }
2924
2925 //FIXME move to dsputil, avg variant, 16x16 version
2926 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
2927     int x;
2928     uint8_t * const top   = src[1];
2929     uint8_t * const left  = src[2];
2930     uint8_t * const mid   = src[0];
2931     uint8_t * const right = src[3];
2932     uint8_t * const bottom= src[4];
2933 #define OBMC_FILTER(x, t, l, m, r, b)\
2934     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
2935 #define OBMC_FILTER4(x, t, l, m, r, b)\
2936     OBMC_FILTER(x         , t, l, m, r, b);\
2937     OBMC_FILTER(x+1       , t, l, m, r, b);\
2938     OBMC_FILTER(x  +stride, t, l, m, r, b);\
2939     OBMC_FILTER(x+1+stride, t, l, m, r, b);
2940     
2941     x=0;
2942     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
2943     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
2944     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
2945     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
2946     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
2947     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
2948     x+= stride;
2949     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
2950     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
2951     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
2952     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
2953     x+= stride;
2954     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
2955     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
2956     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
2957     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
2958     x+= 2*stride;
2959     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
2960     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
2961     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
2962     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
2963     x+= 2*stride;
2964     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
2965     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
2966     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
2967     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
2968     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
2969     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
2970     x+= stride;
2971     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
2972     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
2973     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
2974     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
2975 }
2976
2977 /* obmc for 1 8x8 luma block */
2978 static inline void obmc_motion(MpegEncContext *s,
2979                                uint8_t *dest, uint8_t *src,
2980                                int src_x, int src_y,
2981                                op_pixels_func *pix_op,
2982                                int16_t mv[5][2]/* mid top left right bottom*/)
2983 #define MID    0
2984 {
2985     int i;
2986     uint8_t *ptr[5];
2987     
2988     assert(s->quarter_sample==0);
2989     
2990     for(i=0; i<5; i++){
2991         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
2992             ptr[i]= ptr[MID];
2993         }else{
2994             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
2995             hpel_motion(s, ptr[i], src, 0, 0,
2996                         src_x, src_y,
2997                         s->width, s->height, s->linesize,
2998                         s->h_edge_pos, s->v_edge_pos,
2999                         8, 8, pix_op,
3000                         mv[i][0], mv[i][1]);
3001         }
3002     }
3003
3004     put_obmc(dest, ptr, s->linesize);                
3005 }
3006
3007 static inline void qpel_motion(MpegEncContext *s,
3008                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3009                                int field_based, int bottom_field, int field_select,
3010                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3011                                qpel_mc_func (*qpix_op)[16],
3012                                int motion_x, int motion_y, int h)
3013 {
3014     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3015     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3016
3017     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3018     src_x = s->mb_x *  16                 + (motion_x >> 2);
3019     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3020
3021     v_edge_pos = s->v_edge_pos >> field_based;
3022     linesize = s->linesize << field_based;
3023     uvlinesize = s->uvlinesize << field_based;
3024     
3025     if(field_based){
3026         mx= motion_x/2;
3027         my= motion_y>>1;
3028     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3029         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3030         mx= (motion_x>>1) + rtab[motion_x&7];
3031         my= (motion_y>>1) + rtab[motion_y&7];
3032     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3033         mx= (motion_x>>1)|(motion_x&1);
3034         my= (motion_y>>1)|(motion_y&1);
3035     }else{
3036         mx= motion_x/2;
3037         my= motion_y/2;
3038     }
3039     mx= (mx>>1)|(mx&1);
3040     my= (my>>1)|(my&1);
3041
3042     uvdxy= (mx&1) | ((my&1)<<1);
3043     mx>>=1;
3044     my>>=1;
3045
3046     uvsrc_x = s->mb_x *  8                 + mx;
3047     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3048
3049     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3050     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3051     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3052
3053     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16 
3054        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3055         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based, 
3056                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3057         ptr_y= s->edge_emu_buffer;
3058         if(!(s->flags&CODEC_FLAG_GRAY)){
3059             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3060             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based, 
3061                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3062             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based, 
3063                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3064             ptr_cb= uvbuf;
3065             ptr_cr= uvbuf + 16;
3066         }
3067     }
3068
3069     if(!field_based)
3070         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3071     else{
3072         if(bottom_field){
3073             dest_y += s->linesize;
3074             dest_cb+= s->uvlinesize;
3075             dest_cr+= s->uvlinesize;
3076         }
3077
3078         if(field_select){
3079             ptr_y  += s->linesize;
3080             ptr_cb += s->uvlinesize;
3081             ptr_cr += s->uvlinesize;
3082         }
3083         //damn interlaced mode
3084         //FIXME boundary mirroring is not exactly correct here
3085         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3086         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3087     }
3088     if(!(s->flags&CODEC_FLAG_GRAY)){
3089         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3090         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3091     }
3092 }
3093
3094 inline int ff_h263_round_chroma(int x){
3095     if (x >= 0)
3096         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3097     else {
3098         x = -x;
3099         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3100     }
3101 }
3102
3103 /**
3104  * h263 chorma 4mv motion compensation.
3105  */
3106 static inline void chroma_4mv_motion(MpegEncContext *s,
3107                                      uint8_t *dest_cb, uint8_t *dest_cr,
3108                                      uint8_t **ref_picture,
3109                                      op_pixels_func *pix_op,
3110                                      int mx, int my){
3111     int dxy, emu=0, src_x, src_y, offset;
3112     uint8_t *ptr;
3113     
3114     /* In case of 8X8, we construct a single chroma motion vector
3115        with a special rounding */
3116     mx= ff_h263_round_chroma(mx);
3117     my= ff_h263_round_chroma(my);
3118     
3119     dxy = ((my & 1) << 1) | (mx & 1);
3120     mx >>= 1;
3121     my >>= 1;
3122
3123     src_x = s->mb_x * 8 + mx;
3124     src_y = s->mb_y * 8 + my;
3125     src_x = clip(src_x, -8, s->width/2);
3126     if (src_x == s->width/2)
3127         dxy &= ~1;
3128     src_y = clip(src_y, -8, s->height/2);
3129     if (src_y == s->height/2)
3130         dxy &= ~2;
3131     
3132     offset = (src_y * (s->uvlinesize)) + src_x;
3133     ptr = ref_picture[1] + offset;
3134     if(s->flags&CODEC_FLAG_EMU_EDGE){
3135         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3136            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3137             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3138             ptr= s->edge_emu_buffer;
3139             emu=1;
3140         }
3141     }
3142     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3143
3144     ptr = ref_picture[2] + offset;
3145     if(emu){
3146         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3147         ptr= s->edge_emu_buffer;
3148     }
3149     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3150 }
3151
3152 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3153                                      uint8_t *dest_cb, uint8_t *dest_cr,
3154                                      uint8_t **ref_picture,
3155                                      h264_chroma_mc_func *pix_op,
3156                                      int mx, int my){
3157     const int lowres= s->avctx->lowres;
3158     const int block_s= 8>>lowres;
3159     const int s_mask= (2<<lowres)-1;
3160     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3161     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3162     int emu=0, src_x, src_y, offset, sx, sy;
3163     uint8_t *ptr;
3164     
3165     if(s->quarter_sample){
3166         mx/=2;
3167         my/=2;
3168     }
3169
3170     /* In case of 8X8, we construct a single chroma motion vector
3171        with a special rounding */
3172     mx= ff_h263_round_chroma(mx);
3173     my= ff_h263_round_chroma(my);
3174     
3175     sx= mx & s_mask;
3176     sy= my & s_mask;
3177     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3178     src_y = s->mb_y*block_s + (my >> (lowres+1));
3179     
3180     offset = src_y * s->uvlinesize + src_x;
3181     ptr = ref_picture[1] + offset;
3182     if(s->flags&CODEC_FLAG_EMU_EDGE){
3183         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3184            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3185             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3186             ptr= s->edge_emu_buffer;
3187             emu=1;
3188         }
3189     }     
3190     sx <<= 2 - lowres;
3191     sy <<= 2 - lowres;
3192     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3193           
3194     ptr = ref_picture[2] + offset;
3195     if(emu){
3196         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3197         ptr= s->edge_emu_buffer;
3198     }
3199     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3200 }
3201
3202 /**
3203  * motion compesation of a single macroblock
3204  * @param s context
3205  * @param dest_y luma destination pointer
3206  * @param dest_cb chroma cb/u destination pointer
3207  * @param dest_cr chroma cr/v destination pointer
3208  * @param dir direction (0->forward, 1->backward)
3209  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3210  * @param pic_op halfpel motion compensation function (average or put normally)
3211  * @param pic_op qpel motion compensation function (average or put normally)
3212  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3213  */
3214 static inline void MPV_motion(MpegEncContext *s, 
3215                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3216                               int dir, uint8_t **ref_picture, 
3217                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3218 {
3219     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3220     int mb_x, mb_y, i;
3221     uint8_t *ptr, *dest;
3222
3223     mb_x = s->mb_x;
3224     mb_y = s->mb_y;
3225
3226     if(s->obmc && s->pict_type != B_TYPE){
3227         int16_t mv_cache[4][4][2];
3228         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3229         const int mot_stride= s->b8_stride;
3230         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3231
3232         assert(!s->mb_skiped);
3233                 
3234         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3235         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3236         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3237
3238         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3239             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3240         }else{
3241             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3242         }
3243
3244         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3245             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3246             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3247         }else{
3248             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3249             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3250         }
3251
3252         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3253             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3254             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3255         }else{
3256             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3257             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3258         }
3259         
3260         mx = 0;
3261         my = 0;
3262         for(i=0;i<4;i++) {
3263             const int x= (i&1)+1;
3264             const int y= (i>>1)+1;
3265             int16_t mv[5][2]= {
3266                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3267                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3268                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3269                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3270                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3271             //FIXME cleanup
3272             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3273                         ref_picture[0],
3274                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3275                         pix_op[1],
3276                         mv);
3277
3278             mx += mv[0][0];
3279             my += mv[0][1];
3280         }
3281         if(!(s->flags&CODEC_FLAG_GRAY))
3282             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3283
3284         return;
3285     }
3286    
3287     switch(s->mv_type) {
3288     case MV_TYPE_16X16:
3289         if(s->mcsel){
3290             if(s->real_sprite_warping_points==1){
3291                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3292                             ref_picture);
3293             }else{
3294                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3295                             ref_picture);
3296             }
3297         }else if(s->quarter_sample){
3298             qpel_motion(s, dest_y, dest_cb, dest_cr, 
3299                         0, 0, 0,
3300                         ref_picture, pix_op, qpix_op,
3301                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3302         }else if(s->mspel){
3303             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3304                         ref_picture, pix_op,
3305                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3306         }else
3307         {
3308             mpeg_motion(s, dest_y, dest_cb, dest_cr, 
3309                         0, 0, 0,
3310                         ref_picture, pix_op,
3311                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3312         }           
3313         break;
3314     case MV_TYPE_8X8:
3315         mx = 0;
3316         my = 0;
3317         if(s->quarter_sample){
3318             for(i=0;i<4;i++) {
3319                 motion_x = s->mv[dir][i][0];
3320                 motion_y = s->mv[dir][i][1];
3321
3322                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3323                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3324                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3325                     
3326                 /* WARNING: do no forget half pels */
3327                 src_x = clip(src_x, -16, s->width);
3328                 if (src_x == s->width)
3329                     dxy &= ~3;
3330                 src_y = clip(src_y, -16, s->height);
3331                 if (src_y == s->height)
3332                     dxy &= ~12;
3333                     
3334                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3335                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3336                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8 
3337                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3338                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3339                         ptr= s->edge_emu_buffer;
3340                     }
3341                 }
3342                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3343                 qpix_op[1][dxy](dest, ptr, s->linesize);
3344
3345                 mx += s->mv[dir][i][0]/2;
3346                 my += s->mv[dir][i][1]/2;
3347             }
3348         }else{
3349             for(i=0;i<4;i++) {
3350                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3351                             ref_picture[0], 0, 0,
3352                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3353                             s->width, s->height, s->linesize,
3354                             s->h_edge_pos, s->v_edge_pos,
3355                             8, 8, pix_op[1],
3356                             s->mv[dir][i][0], s->mv[dir][i][1]);
3357
3358                 mx += s->mv[dir][i][0];
3359                 my += s->mv[dir][i][1];
3360             }
3361         }
3362
3363         if(!(s->flags&CODEC_FLAG_GRAY))
3364             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3365         break;
3366     case MV_TYPE_FIELD:
3367         if (s->picture_structure == PICT_FRAME) {
3368             if(s->quarter_sample){
3369                 for(i=0; i<2; i++){
3370                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3371                                 1, i, s->field_select[dir][i],
3372                                 ref_picture, pix_op, qpix_op,
3373                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3374                 }
3375             }else{
3376                 /* top field */       
3377                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3378                             1, 0, s->field_select[dir][0],
3379                             ref_picture, pix_op,
3380                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3381                 /* bottom field */
3382                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3383                             1, 1, s->field_select[dir][1],
3384                             ref_picture, pix_op,
3385                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3386             }
3387         } else {
3388             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3389                 ref_picture= s->current_picture_ptr->data;
3390             } 
3391
3392             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3393                         0, 0, s->field_select[dir][0],
3394                         ref_picture, pix_op,
3395                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3396         }
3397         break;
3398     case MV_TYPE_16X8:
3399         for(i=0; i<2; i++){
3400             uint8_t ** ref2picture;
3401
3402             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3403                 ref2picture= ref_picture;
3404             }else{
3405                 ref2picture= s->current_picture_ptr->data;
3406             } 
3407
3408             mpeg_motion(s, dest_y, dest_cb, dest_cr, 
3409                         0, 0, s->field_select[dir][i],
3410                         ref2picture, pix_op,
3411                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3412                 
3413             dest_y += 16*s->linesize;
3414             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3415             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3416         }        
3417         break;
3418     case MV_TYPE_DMV:
3419         if(s->picture_structure == PICT_FRAME){
3420             for(i=0; i<2; i++){
3421                 int j;
3422                 for(j=0; j<2; j++){
3423                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3424                                 1, j, j^i,
3425                                 ref_picture, pix_op,
3426                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3427                 }
3428                 pix_op = s->dsp.avg_pixels_tab; 
3429             }
3430         }else{
3431             for(i=0; i<2; i++){
3432                 mpeg_motion(s, dest_y, dest_cb, dest_cr, 
3433                             0, 0, s->picture_structure != i+1,
3434                             ref_picture, pix_op,
3435                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3436
3437                 // after put we make avg of the same block
3438                 pix_op=s->dsp.avg_pixels_tab; 
3439
3440                 //opposite parity is always in the same frame if this is second field
3441                 if(!s->first_field){
3442                     ref_picture = s->current_picture_ptr->data;    
3443                 }
3444             }
3445         }
3446     break;
3447     default: assert(0);
3448     }
3449 }
3450
3451 /**
3452  * motion compesation of a single macroblock
3453  * @param s context
3454  * @param dest_y luma destination pointer
3455  * @param dest_cb chroma cb/u destination pointer
3456  * @param dest_cr chroma cr/v destination pointer
3457  * @param dir direction (0->forward, 1->backward)
3458  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3459  * @param pic_op halfpel motion compensation function (average or put normally)
3460  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3461  */
3462 static inline void MPV_motion_lowres(MpegEncContext *s, 
3463                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3464                               int dir, uint8_t **ref_picture, 
3465                               h264_chroma_mc_func *pix_op)
3466 {
3467     int mx, my;
3468     int mb_x, mb_y, i;
3469     const int lowres= s->avctx->lowres;
3470     const int block_s= 8>>lowres;    
3471
3472     mb_x = s->mb_x;
3473     mb_y = s->mb_y;
3474
3475     switch(s->mv_type) {
3476     case MV_TYPE_16X16:
3477         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr, 
3478                     0, 0, 0,
3479                     ref_picture, pix_op,
3480                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3481         break;
3482     case MV_TYPE_8X8:
3483         mx = 0;
3484         my = 0;
3485             for(i=0;i<4;i++) {
3486                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3487                             ref_picture[0], 0, 0,
3488                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3489                             s->width, s->height, s->linesize,
3490                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3491                             block_s, block_s, pix_op,
3492                             s->mv[dir][i][0], s->mv[dir][i][1]);
3493
3494                 mx += s->mv[dir][i][0];
3495                 my += s->mv[dir][i][1];
3496             }
3497
3498         if(!(s->flags&CODEC_FLAG_GRAY))
3499             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3500         break;
3501     case MV_TYPE_FIELD:
3502         if (s->picture_structure == PICT_FRAME) {
3503             /* top field */       
3504             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3505                         1, 0, s->field_select[dir][0],
3506                         ref_picture, pix_op,
3507                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3508             /* bottom field */
3509             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3510                         1, 1, s->field_select[dir][1],
3511                         ref_picture, pix_op,
3512                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3513         } else {
3514             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3515                 ref_picture= s->current_picture_ptr->data;
3516             } 
3517
3518             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3519                         0, 0, s->field_select[dir][0],
3520                         ref_picture, pix_op,
3521                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3522         }
3523         break;
3524     case MV_TYPE_16X8:
3525         for(i=0; i<2; i++){
3526             uint8_t ** ref2picture;
3527
3528             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3529                 ref2picture= ref_picture;
3530             }else{
3531                 ref2picture= s->current_picture_ptr->data;
3532             } 
3533
3534             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr, 
3535                         0, 0, s->field_select[dir][i],
3536                         ref2picture, pix_op,
3537                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3538                 
3539             dest_y += 2*block_s*s->linesize;
3540             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3541             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3542         }        
3543         break;
3544     case MV_TYPE_DMV:
3545         if(s->picture_structure == PICT_FRAME){
3546             for(i=0; i<2; i++){
3547                 int j;
3548                 for(j=0; j<2; j++){
3549                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3550                                 1, j, j^i,
3551                                 ref_picture, pix_op,
3552                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3553                 }
3554                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3555             }
3556         }else{
3557             for(i=0; i<2; i++){
3558                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr, 
3559                             0, 0, s->picture_structure != i+1,
3560                             ref_picture, pix_op,
3561                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3562
3563                 // after put we make avg of the same block
3564                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3565
3566                 //opposite parity is always in the same frame if this is second field
3567                 if(!s->first_field){
3568                     ref_picture = s->current_picture_ptr->data;    
3569                 }
3570             }
3571         }
3572     break;
3573     default: assert(0);
3574     }
3575 }
3576
3577 /* put block[] to dest[] */
3578 static inline void put_dct(MpegEncContext *s, 
3579                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3580 {
3581     s->dct_unquantize_intra(s, block, i, qscale);
3582     s->dsp.idct_put (dest, line_size, block);
3583 }
3584
3585 /* add block[] to dest[] */
3586 static inline void add_dct(MpegEncContext *s, 
3587                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3588 {
3589     if (s->block_last_index[i] >= 0) {
3590         s->dsp.idct_add (dest, line_size, block);
3591     }
3592 }
3593
3594 static inline void add_dequant_dct(MpegEncContext *s, 
3595                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3596 {
3597     if (s->block_last_index[i] >= 0) {
3598         s->dct_unquantize_inter(s, block, i, qscale);
3599
3600         s->dsp.idct_add (dest, line_size, block);
3601     }
3602 }
3603
3604 /**
3605  * cleans dc, ac, coded_block for the current non intra MB
3606  */
3607 void ff_clean_intra_table_entries(MpegEncContext *s)
3608 {
3609     int wrap = s->b8_stride;
3610     int xy = s->block_index[0];
3611     
3612     s->dc_val[0][xy           ] = 
3613     s->dc_val[0][xy + 1       ] = 
3614     s->dc_val[0][xy     + wrap] =
3615     s->dc_val[0][xy + 1 + wrap] = 1024;
3616     /* ac pred */
3617     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3618     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3619     if (s->msmpeg4_version>=3) {
3620         s->coded_block[xy           ] =
3621         s->coded_block[xy + 1       ] =
3622         s->coded_block[xy     + wrap] =
3623         s->coded_block[xy + 1 + wrap] = 0;
3624     }
3625     /* chroma */
3626     wrap = s->mb_stride;
3627     xy = s->mb_x + s->mb_y * wrap;
3628     s->dc_val[1][xy] =
3629     s->dc_val[2][xy] = 1024;
3630     /* ac pred */
3631     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3632     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3633     
3634     s->mbintra_table[xy]= 0;
3635 }
3636
3637 /* generic function called after a macroblock has been parsed by the
3638    decoder or after it has been encoded by the encoder.
3639
3640    Important variables used:
3641    s->mb_intra : true if intra macroblock
3642    s->mv_dir   : motion vector direction
3643    s->mv_type  : motion vector type
3644    s->mv       : motion vector
3645    s->interlaced_dct : true if interlaced dct used (mpeg2)
3646  */
3647 static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3648 {
3649     int mb_x, mb_y;
3650     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3651 #ifdef HAVE_XVMC
3652     if(s->avctx->xvmc_acceleration){
3653         XVMC_decode_mb(s);//xvmc uses pblocks
3654         return;
3655     }
3656 #endif
3657
3658     mb_x = s->mb_x;
3659     mb_y = s->mb_y;
3660
3661     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3662        /* save DCT coefficients */
3663        int i,j;
3664        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3665        for(i=0; i<6; i++)
3666            for(j=0; j<64; j++)
3667                *dct++ = block[i][s->dsp.idct_permutation[j]];
3668     }
3669
3670     s->current_picture.qscale_table[mb_xy]= s->qscale;
3671
3672     /* update DC predictors for P macroblocks */
3673     if (!s->mb_intra) {
3674         if (s->h263_pred || s->h263_aic) {
3675             if(s->mbintra_table[mb_xy])
3676                 ff_clean_intra_table_entries(s);
3677         } else {
3678             s->last_dc[0] =
3679             s->last_dc[1] =
3680             s->last_dc[2] = 128 << s->intra_dc_precision;
3681         }
3682     }
3683     else if (s->h263_pred || s->h263_aic)
3684         s->mbintra_table[mb_xy]=1;
3685
3686     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3687         uint8_t *dest_y, *dest_cb, *dest_cr;
3688         int dct_linesize, dct_offset;
3689         op_pixels_func (*op_pix)[4];
3690         qpel_mc_func (*op_qpix)[16];
3691         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
3692         const int uvlinesize= s->current_picture.linesize[1];
3693         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3694         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3695
3696         /* avoid copy if macroblock skipped in last frame too */
3697         /* skip only during decoding as we might trash the buffers during encoding a bit */
3698         if(!s->encoding){
3699             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3700             const int age= s->current_picture.age;
3701
3702             assert(age);
3703
3704             if (s->mb_skiped) {
3705                 s->mb_skiped= 0;
3706                 assert(s->pict_type!=I_TYPE);
3707  
3708                 (*mbskip_ptr) ++; /* indicate that this time we skiped it */
3709                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3710
3711                 /* if previous was skipped too, then nothing to do !  */
3712                 if (*mbskip_ptr >= age && s->current_picture.reference){
3713                     return;
3714                 }
3715             } else if(!s->current_picture.reference){
3716                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3717                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3718             } else{
3719                 *mbskip_ptr = 0; /* not skipped */
3720             }
3721         }
3722         
3723         dct_linesize = linesize << s->interlaced_dct;
3724         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3725         
3726         if(readable){
3727             dest_y=  s->dest[0];
3728             dest_cb= s->dest[1];
3729             dest_cr= s->dest[2];
3730         }else{
3731             dest_y = s->b_scratchpad;
3732             dest_cb= s->b_scratchpad+16*linesize;
3733             dest_cr= s->b_scratchpad+32*linesize;
3734         }
3735
3736         if (!s->mb_intra) {
3737             /* motion handling */
3738             /* decoding or more than one mb_type (MC was allready done otherwise) */
3739             if(!s->encoding){
3740                 if(lowres_flag){
3741                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3742
3743                     if (s->mv_dir & MV_DIR_FORWARD) {
3744                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3745                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3746                     }
3747                     if (s->mv_dir & MV_DIR_BACKWARD) {
3748                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3749                     }
3750                 }else{
3751                     if ((!s->no_rounding) || s->pict_type==B_TYPE){                
3752                         op_pix = s->dsp.put_pixels_tab;
3753                         op_qpix= s->dsp.put_qpel_pixels_tab;
3754                     }else{
3755                         op_pix = s->dsp.put_no_rnd_pixels_tab;
3756                         op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3757                     }
3758                     if (s->mv_dir & MV_DIR_FORWARD) {
3759                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3760                         op_pix = s->dsp.avg_pixels_tab;
3761                         op_qpix= s->dsp.avg_qpel_pixels_tab;
3762                     }
3763                     if (s->mv_dir & MV_DIR_BACKWARD) {
3764                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3765                     }
3766                 }
3767             }
3768
3769             /* skip dequant / idct if we are really late ;) */
3770             if(s->hurry_up>1) return;
3771
3772             /* add dct residue */
3773             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3774                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3775                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3776                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3777                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3778                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3779
3780                 if(!(s->flags&CODEC_FLAG_GRAY)){
3781                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3782                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3783                 }
3784             } else if(s->codec_id != CODEC_ID_WMV2){
3785                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
3786                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
3787                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
3788                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
3789
3790                 if(!(s->flags&CODEC_FLAG_GRAY)){
3791                     if(s->chroma_y_shift){//Chroma420
3792                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
3793                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
3794                     }else{
3795                         //chroma422
3796                         dct_linesize = uvlinesize << s->interlaced_dct;
3797                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3798
3799                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
3800                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
3801                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
3802                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
3803                         if(!s->chroma_x_shift){//Chroma444
3804                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
3805                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
3806                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
3807                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
3808                         }
3809                     }
3810                 }//fi gray
3811             }
3812             else{
3813                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
3814             }
3815         } else {
3816             /* dct only in intra block */
3817             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
3818                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3819                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3820                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3821                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3822
3823                 if(!(s->flags&CODEC_FLAG_GRAY)){
3824                     put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3825                     put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3826                 }
3827             }else{
3828                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
3829                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
3830                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
3831                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
3832
3833                 if(!(s->flags&CODEC_FLAG_GRAY)){
3834                     if(s->chroma_y_shift){
3835                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
3836                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
3837                     }else{
3838
3839                         dct_linesize = uvlinesize << s->interlaced_dct;
3840                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3841
3842                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
3843                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
3844                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
3845                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
3846                         if(!s->chroma_x_shift){//Chroma444
3847                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
3848                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
3849                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
3850                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
3851                         }
3852                     }
3853                 }//gray
3854             }
3855         }
3856         if(!readable){
3857             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
3858             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
3859             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
3860         }
3861     }
3862 }
3863
3864 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
3865     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
3866     else                  MPV_decode_mb_internal(s, block, 0);
3867 }
3868
3869 #ifdef CONFIG_ENCODERS
3870
3871 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
3872 {
3873     static const char tab[64]=
3874         {3,2,2,1,1,1,1,1,
3875          1,1,1,1,1,1,1,1,
3876          1,1,1,1,1,1,1,1,
3877          0,0,0,0,0,0,0,0,
3878          0,0,0,0,0,0,0,0,
3879          0,0,0,0,0,0,0,0,
3880          0,0,0,0,0,0,0,0,
3881          0,0,0,0,0,0,0,0};
3882     int score=0;
3883     int run=0;
3884     int i;
3885     DCTELEM *block= s->block[n];
3886     const int last_index= s->block_last_index[n];
3887     int skip_dc;
3888
3889     if(threshold<0){
3890         skip_dc=0;
3891         threshold= -threshold;
3892     }else
3893         skip_dc=1;
3894
3895     /* are all which we could set to zero are allready zero? */
3896     if(last_index<=skip_dc - 1) return;
3897
3898     for(i=0; i<=last_index; i++){
3899         const int j = s->intra_scantable.permutated[i];
3900         const int level = ABS(block[j]);
3901         if(level==1){
3902             if(skip_dc && i==0) continue;
3903             score+= tab[run];
3904             run=0;
3905         }else if(level>1){
3906             return;
3907         }else{
3908             run++;
3909         }
3910     }
3911     if(score >= threshold) return;
3912     for(i=skip_dc; i<=last_index; i++){
3913         const int j = s->intra_scantable.permutated[i];
3914         block[j]=0;
3915     }
3916     if(block[0]) s->block_last_index[n]= 0;
3917     else         s->block_last_index[n]= -1;
3918 }
3919
3920 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
3921 {
3922     int i;
3923     const int maxlevel= s->max_qcoeff;
3924     const int minlevel= s->min_qcoeff;
3925     int overflow=0;
3926     
3927     if(s->mb_intra){
3928         i=1; //skip clipping of intra dc
3929     }else
3930         i=0;
3931     
3932     for(;i<=last_index; i++){
3933         const int j= s->intra_scantable.permutated[i];
3934         int level = block[j];
3935        
3936         if     (level>maxlevel){
3937             level=maxlevel;
3938             overflow++;
3939         }else if(level<minlevel){
3940             level=minlevel;
3941             overflow++;
3942         }
3943         
3944         block[j]= level;
3945     }
3946     
3947     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
3948         av_log(s->avctx, AV_LOG_INFO, "warning, cliping %d dct coefficents to %d..%d\n", overflow, minlevel, maxlevel);
3949 }
3950
3951 #endif //CONFIG_ENCODERS
3952
3953 /**
3954  *
3955  * @param h is the normal height, this will be reduced automatically if needed for the last row
3956  */
3957 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
3958     if (s->avctx->draw_horiz_band) {
3959         AVFrame *src;
3960         int offset[4];
3961         
3962         if(s->picture_structure != PICT_FRAME){
3963             h <<= 1;
3964             y <<= 1;
3965             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
3966         }
3967
3968         h= FFMIN(h, s->avctx->height - y);
3969
3970         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER)) 
3971             src= (AVFrame*)s->current_picture_ptr;
3972         else if(s->last_picture_ptr)
3973             src= (AVFrame*)s->last_picture_ptr;
3974         else
3975             return;
3976             
3977         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
3978             offset[0]=
3979             offset[1]=
3980             offset[2]=
3981             offset[3]= 0;
3982         }else{
3983             offset[0]= y * s->linesize;;
3984             offset[1]= 
3985             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
3986             offset[3]= 0;
3987         }
3988
3989         emms_c();
3990
3991         s->avctx->draw_horiz_band(s->avctx, src, offset,
3992                                   y, s->picture_structure, h);
3993     }
3994 }
3995
3996 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
3997     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
3998     const int uvlinesize= s->current_picture.linesize[1];
3999     const int mb_size= 4 - s->avctx->lowres;
4000         
4001     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4002     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4003     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4004     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4005     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4006     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4007     //block_index is not used by mpeg2, so it is not affected by chroma_format
4008
4009     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4010     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4011     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4012
4013     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4014     {
4015         s->dest[0] += s->mb_y *   linesize << mb_size;
4016         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4017         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4018     }
4019 }
4020
4021 #ifdef CONFIG_ENCODERS
4022
4023 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4024     int x, y;
4025 //FIXME optimize
4026     for(y=0; y<8; y++){
4027         for(x=0; x<8; x++){
4028             int x2, y2;
4029             int sum=0;
4030             int sqr=0;
4031             int count=0;
4032
4033             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4034                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4035                     int v= ptr[x2 + y2*stride];
4036                     sum += v;
4037                     sqr += v*v;
4038                     count++;
4039                 }
4040             }
4041             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4042         }
4043     }
4044 }
4045
4046 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4047 {
4048     int16_t weight[6][64];
4049     DCTELEM orig[6][64];
4050     const int mb_x= s->mb_x;
4051     const int mb_y= s->mb_y;
4052     int i;
4053     int skip_dct[6];
4054     int dct_offset   = s->linesize*8; //default for progressive frames
4055     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4056     int wrap_y, wrap_c;
4057     
4058     for(i=0; i<6; i++) skip_dct[i]=0;
4059     
4060     if(s->adaptive_quant){
4061         const int last_qp= s->qscale;
4062         const int mb_xy= mb_x + mb_y*s->mb_stride;
4063
4064         s->lambda= s->lambda_table[mb_xy];
4065         update_qscale(s);
4066     
4067         if(!(s->flags&CODEC_FLAG_QP_RD)){
4068             s->dquant= s->qscale - last_qp;
4069
4070             if(s->out_format==FMT_H263){
4071                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4072             
4073                 if(s->codec_id==CODEC_ID_MPEG4){        
4074                     if(!s->mb_intra){
4075                         if(s->pict_type == B_TYPE){
4076                             if(s->dquant&1) 
4077                                 s->dquant= (s->dquant/2)*2;
4078                             if(s->mv_dir&MV_DIRECT)
4079                                 s->dquant= 0;
4080                         }
4081                         if(s->mv_type==MV_TYPE_8X8)
4082                             s->dquant=0;
4083                     }
4084                 }
4085             }
4086         }
4087         ff_set_qscale(s, last_qp + s->dquant);
4088     }else if(s->flags&CODEC_FLAG_QP_RD)
4089         ff_set_qscale(s, s->qscale + s->dquant);
4090
4091     wrap_y = s->linesize;
4092     wrap_c = s->uvlinesize;
4093     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4094     ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
4095     ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
4096
4097     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4098         uint8_t *ebuf= s->edge_emu_buffer + 32;
4099         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4100         ptr_y= ebuf;
4101         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4102         ptr_cb= ebuf+18*wrap_y;
4103         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4104         ptr_cr= ebuf+18*wrap_y+8;
4105     }
4106
4107     if (s->mb_intra) {
4108         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4109             int progressive_score, interlaced_score;
4110
4111             s->interlaced_dct=0;
4112             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8) 
4113                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4114
4115             if(progressive_score > 0){
4116                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8) 
4117                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4118                 if(progressive_score > interlaced_score){
4119                     s->interlaced_dct=1;
4120             
4121                     dct_offset= wrap_y;
4122                     wrap_y<<=1;
4123                 }
4124             }
4125         }
4126         
4127         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4128         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4129         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4130         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4131
4132         if(s->flags&CODEC_FLAG_GRAY){
4133             skip_dct[4]= 1;
4134             skip_dct[5]= 1;
4135         }else{
4136             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4137             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4138         }
4139     }else{
4140         op_pixels_func (*op_pix)[4];
4141         qpel_mc_func (*op_qpix)[16];
4142         uint8_t *dest_y, *dest_cb, *dest_cr;
4143
4144         dest_y  = s->dest[0];
4145         dest_cb = s->dest[1];
4146         dest_cr = s->dest[2];
4147
4148         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4149             op_pix = s->dsp.put_pixels_tab;
4150             op_qpix= s->dsp.put_qpel_pixels_tab;
4151         }else{
4152             op_pix = s->dsp.put_no_rnd_pixels_tab;
4153             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4154         }
4155
4156         if (s->mv_dir & MV_DIR_FORWARD) {
4157             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4158             op_pix = s->dsp.avg_pixels_tab;
4159             op_qpix= s->dsp.avg_qpel_pixels_tab;
4160         }
4161         if (s->mv_dir & MV_DIR_BACKWARD) {
4162             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4163         }
4164
4165         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4166             int progressive_score, interlaced_score;
4167
4168             s->interlaced_dct=0;
4169             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8) 
4170                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4171             
4172             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4173
4174             if(progressive_score>0){
4175                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8) 
4176                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4177             
4178                 if(progressive_score > interlaced_score){
4179                     s->interlaced_dct=1;
4180             
4181                     dct_offset= wrap_y;
4182                     wrap_y<<=1;
4183                 }
4184             }
4185         }
4186         
4187         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4188         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4189         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4190         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4191         
4192         if(s->flags&CODEC_FLAG_GRAY){
4193             skip_dct[4]= 1;
4194             skip_dct[5]= 1;
4195         }else{
4196             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4197             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4198         }
4199         /* pre quantization */         
4200         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4201             //FIXME optimize
4202             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4203             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4204             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4205             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4206             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4207             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4208         }
4209     }
4210
4211     if(s->avctx->quantizer_noise_shaping){
4212         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4213         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4214         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4215         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4216         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4217         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4218         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
4219     }
4220             
4221     /* DCT & quantize */
4222     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4223     {
4224         for(i=0;i<6;i++) {
4225             if(!skip_dct[i]){
4226                 int overflow;
4227                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4228             // FIXME we could decide to change to quantizer instead of clipping
4229             // JS: I don't think that would be a good idea it could lower quality instead
4230             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4231                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4232             }else
4233                 s->block_last_index[i]= -1;
4234         }
4235         if(s->avctx->quantizer_noise_shaping){
4236             for(i=0;i<6;i++) {
4237                 if(!skip_dct[i]){
4238                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4239                 }
4240             }
4241         }
4242         
4243         if(s->luma_elim_threshold && !s->mb_intra)
4244             for(i=0; i<4; i++)
4245                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4246         if(s->chroma_elim_threshold && !s->mb_intra)
4247             for(i=4; i<6; i++)
4248                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4249
4250         if(s->flags & CODEC_FLAG_CBP_RD){
4251             for(i=0;i<6;i++) {
4252                 if(s->block_last_index[i] == -1)
4253                     s->coded_score[i]= INT_MAX/256;
4254             }
4255         }
4256     }
4257
4258     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4259         s->block_last_index[4]=
4260         s->block_last_index[5]= 0;
4261         s->block[4][0]=
4262         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4263     }
4264
4265     //non c quantize code returns incorrect block_last_index FIXME
4266     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4267         for(i=0; i<6; i++){
4268             int j;
4269             if(s->block_last_index[i]>0){
4270                 for(j=63; j>0; j--){
4271                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4272                 }
4273                 s->block_last_index[i]= j;
4274             }
4275         }
4276     }
4277
4278     /* huffman encode */
4279     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4280     case CODEC_ID_MPEG1VIDEO:
4281     case CODEC_ID_MPEG2VIDEO:
4282         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4283     case CODEC_ID_MPEG4:
4284         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4285     case CODEC_ID_MSMPEG4V2:
4286     case CODEC_ID_MSMPEG4V3:
4287     case CODEC_ID_WMV1:
4288         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4289     case CODEC_ID_WMV2:
4290          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4291     case CODEC_ID_H261:
4292         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4293     case CODEC_ID_H263:
4294     case CODEC_ID_H263P:
4295     case CODEC_ID_FLV1:
4296     case CODEC_ID_RV10:
4297     case CODEC_ID_RV20:
4298         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4299     case CODEC_ID_MJPEG:
4300         mjpeg_encode_mb(s, s->block); break;
4301     default:
4302         assert(0);
4303     }
4304 }
4305
4306 #endif //CONFIG_ENCODERS
4307
4308 void ff_mpeg_flush(AVCodecContext *avctx){
4309     int i;
4310     MpegEncContext *s = avctx->priv_data;
4311     
4312     if(s==NULL || s->picture==NULL) 
4313         return;
4314     
4315     for(i=0; i<MAX_PICTURE_COUNT; i++){
4316        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4317                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4318         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4319     }
4320     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4321     
4322     s->mb_x= s->mb_y= 0;
4323     
4324     s->parse_context.state= -1;
4325     s->parse_context.frame_start_found= 0;
4326     s->parse_context.overread= 0;
4327     s->parse_context.overread_index= 0;
4328     s->parse_context.index= 0;
4329     s->parse_context.last_index= 0;
4330     s->bitstream_buffer_size=0;
4331 }
4332
4333 #ifdef CONFIG_ENCODERS
4334 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4335 {
4336     const uint16_t *srcw= (uint16_t*)src;
4337     int words= length>>4;
4338     int bits= length&15;
4339     int i;
4340
4341     if(length==0) return;
4342     
4343     if(words < 16){
4344         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4345     }else if(put_bits_count(pb)&7){
4346         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4347     }else{
4348         for(i=0; put_bits_count(pb)&31; i++)
4349             put_bits(pb, 8, src[i]);
4350         flush_put_bits(pb);
4351         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4352         skip_put_bytes(pb, 2*words-i);
4353     }
4354         
4355     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4356 }
4357
4358 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4359     int i;
4360
4361     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4362
4363     /* mpeg1 */
4364     d->mb_skip_run= s->mb_skip_run;
4365     for(i=0; i<3; i++)
4366         d->last_dc[i]= s->last_dc[i];
4367     
4368     /* statistics */
4369     d->mv_bits= s->mv_bits;
4370     d->i_tex_bits= s->i_tex_bits;
4371     d->p_tex_bits= s->p_tex_bits;
4372     d->i_count= s->i_count;
4373     d->f_count= s->f_count;
4374     d->b_count= s->b_count;
4375     d->skip_count= s->skip_count;
4376     d->misc_bits= s->misc_bits;
4377     d->last_bits= 0;
4378
4379     d->mb_skiped= 0;
4380     d->qscale= s->qscale;
4381     d->dquant= s->dquant;
4382 }
4383
4384 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4385     int i;
4386
4387     memcpy(d->mv, s->mv, 2*4*2*sizeof(int)); 
4388     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4389     
4390     /* mpeg1 */
4391     d->mb_skip_run= s->mb_skip_run;
4392     for(i=0; i<3; i++)
4393         d->last_dc[i]= s->last_dc[i];
4394     
4395     /* statistics */
4396     d->mv_bits= s->mv_bits;
4397     d->i_tex_bits= s->i_tex_bits;
4398     d->p_tex_bits= s->p_tex_bits;
4399     d->i_count= s->i_count;
4400     d->f_count= s->f_count;
4401     d->b_count= s->b_count;
4402     d->skip_count= s->skip_count;
4403     d->misc_bits= s->misc_bits;
4404
4405     d->mb_intra= s->mb_intra;
4406     d->mb_skiped= s->mb_skiped;
4407     d->mv_type= s->mv_type;
4408     d->mv_dir= s->mv_dir;
4409     d->pb= s->pb;
4410     if(s->data_partitioning){
4411         d->pb2= s->pb2;
4412         d->tex_pb= s->tex_pb;
4413     }
4414     d->block= s->block;
4415     for(i=0; i<6; i++)
4416         d->block_last_index[i]= s->block_last_index[i];
4417     d->interlaced_dct= s->interlaced_dct;
4418     d->qscale= s->qscale;
4419 }
4420
4421 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type, 
4422                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4423                            int *dmin, int *next_block, int motion_x, int motion_y)
4424 {
4425     int score;
4426     uint8_t *dest_backup[3];
4427     
4428     copy_context_before_encode(s, backup, type);
4429
4430     s->block= s->blocks[*next_block];
4431     s->pb= pb[*next_block];
4432     if(s->data_partitioning){
4433         s->pb2   = pb2   [*next_block];
4434         s->tex_pb= tex_pb[*next_block];
4435     }
4436     
4437     if(*next_block){
4438         memcpy(dest_backup, s->dest, sizeof(s->dest));
4439         s->dest[0] = s->rd_scratchpad;
4440         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4441         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4442         assert(s->linesize >= 32); //FIXME
4443     }
4444
4445     encode_mb(s, motion_x, motion_y);
4446     
4447     score= put_bits_count(&s->pb);
4448     if(s->data_partitioning){
4449         score+= put_bits_count(&s->pb2);
4450         score+= put_bits_count(&s->tex_pb);
4451     }
4452    
4453     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4454         MPV_decode_mb(s, s->block);
4455
4456         score *= s->lambda2;
4457         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4458     }
4459     
4460     if(*next_block){
4461         memcpy(s->dest, dest_backup, sizeof(s->dest));
4462     }
4463
4464     if(score<*dmin){
4465         *dmin= score;
4466         *next_block^=1;
4467
4468         copy_context_after_encode(best, s, type);
4469     }
4470 }
4471                 
4472 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4473     uint32_t *sq = squareTbl + 256;
4474     int acc=0;
4475     int x,y;
4476     
4477     if(w==16 && h==16) 
4478         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4479     else if(w==8 && h==8)
4480         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4481     
4482     for(y=0; y<h; y++){
4483         for(x=0; x<w; x++){
4484             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4485         } 
4486     }
4487     
4488     assert(acc>=0);
4489     
4490     return acc;
4491 }
4492
4493 static int sse_mb(MpegEncContext *s){
4494     int w= 16;
4495     int h= 16;
4496
4497     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4498     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4499
4500     if(w==16 && h==16)
4501       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4502         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4503                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4504                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4505       }else{
4506         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4507                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4508                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4509       }
4510     else
4511         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4512                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4513                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4514 }
4515
4516 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4517     MpegEncContext *s= arg;
4518
4519     
4520     s->me.pre_pass=1;
4521     s->me.dia_size= s->avctx->pre_dia_size;
4522     s->first_slice_line=1;
4523     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4524         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4525             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4526         }
4527         s->first_slice_line=0;
4528     }
4529     
4530     s->me.pre_pass=0;
4531     
4532     return 0;
4533 }
4534
4535 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4536     MpegEncContext *s= arg;
4537
4538     s->me.dia_size= s->avctx->dia_size;
4539     s->first_slice_line=1;
4540     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4541         s->mb_x=0; //for block init below
4542         ff_init_block_index(s);
4543         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4544             s->block_index[0]+=2;
4545             s->block_index[1]+=2;
4546             s->block_index[2]+=2;
4547             s->block_index[3]+=2;
4548             
4549             /* compute motion vector & mb_type and store in context */
4550             if(s->pict_type==B_TYPE)
4551                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4552             else
4553                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4554         }
4555         s->first_slice_line=0;
4556     }
4557     return 0;
4558 }
4559
4560 static int mb_var_thread(AVCodecContext *c, void *arg){
4561     MpegEncContext *s= arg;
4562     int mb_x, mb_y;
4563
4564     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4565         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4566             int xx = mb_x * 16;
4567             int yy = mb_y * 16;
4568             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4569             int varc;
4570             int sum = s->dsp.pix_sum(pix, s->linesize);
4571     
4572             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4573
4574             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4575             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4576             s->me.mb_var_sum_temp    += varc;
4577         }
4578     }
4579     return 0;
4580 }
4581
4582 static void write_slice_end(MpegEncContext *s){
4583     if(s->codec_id==CODEC_ID_MPEG4){
4584         if(s->partitioned_frame){
4585             ff_mpeg4_merge_partitions(s);
4586         }
4587     
4588         ff_mpeg4_stuffing(&s->pb);
4589     }else if(s->out_format == FMT_MJPEG){
4590         ff_mjpeg_stuffing(&s->pb);
4591     }
4592
4593     align_put_bits(&s->pb);
4594     flush_put_bits(&s->pb);
4595     
4596     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4597         s->misc_bits+= get_bits_diff(s);
4598 }
4599
4600 static int encode_thread(AVCodecContext *c, void *arg){
4601     MpegEncContext *s= arg;
4602     int mb_x, mb_y, pdif = 0;
4603     int i, j;
4604     MpegEncContext best_s, backup_s;
4605     uint8_t bit_buf[2][MAX_MB_BYTES];
4606     uint8_t bit_buf2[2][MAX_MB_BYTES];
4607     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4608     PutBitContext pb[2], pb2[2], tex_pb[2];
4609 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4610
4611     for(i=0; i<2; i++){
4612         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4613         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4614         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4615     }
4616
4617     s->last_bits= put_bits_count(&s->pb);
4618     s->mv_bits=0;
4619     s->misc_bits=0;
4620     s->i_tex_bits=0;
4621     s->p_tex_bits=0;
4622     s->i_count=0;
4623     s->f_count=0;
4624     s->b_count=0;
4625     s->skip_count=0;
4626
4627     for(i=0; i<3; i++){
4628         /* init last dc values */
4629         /* note: quant matrix value (8) is implied here */
4630         s->last_dc[i] = 128 << s->intra_dc_precision;
4631         
4632         s->current_picture_ptr->error[i] = 0;
4633     }
4634     s->mb_skip_run = 0;
4635     memset(s->last_mv, 0, sizeof(s->last_mv));
4636      
4637     s->last_mv_dir = 0;
4638
4639     switch(s->codec_id){
4640     case CODEC_ID_H263:
4641     case CODEC_ID_H263P:
4642     case CODEC_ID_FLV1:
4643         s->gob_index = ff_h263_get_gob_height(s);
4644         break;
4645     case CODEC_ID_MPEG4:
4646         if(s->partitioned_frame)
4647             ff_mpeg4_init_partitions(s);
4648         break;
4649     }
4650
4651     s->resync_mb_x=0;
4652     s->resync_mb_y=0; 
4653     s->first_slice_line = 1;
4654     s->ptr_lastgob = s->pb.buf;
4655     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4656 //    printf("row %d at %X\n", s->mb_y, (int)s);
4657         s->mb_x=0;
4658         s->mb_y= mb_y;
4659
4660         ff_set_qscale(s, s->qscale);
4661         ff_init_block_index(s);
4662         
4663         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4664             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4665             int mb_type= s->mb_type[xy];
4666 //            int d;
4667             int dmin= INT_MAX;
4668             int dir;
4669
4670             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4671                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4672                 return -1;
4673             }
4674             if(s->data_partitioning){
4675                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4676                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4677                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4678                     return -1;
4679                 }
4680             }
4681
4682             s->mb_x = mb_x;
4683             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4684             ff_update_block_index(s);
4685
4686             if(s->codec_id == CODEC_ID_H261){
4687                 ff_h261_reorder_mb_index(s);
4688                 xy= s->mb_y*s->mb_stride + s->mb_x;
4689                 mb_type= s->mb_type[xy];
4690             }
4691
4692             /* write gob / video packet header  */
4693             if(s->rtp_mode){
4694                 int current_packet_size, is_gob_start;
4695                 
4696                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4697                 
4698                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0; 
4699                 
4700                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4701                 
4702                 switch(s->codec_id){
4703                 case CODEC_ID_H263:
4704                 case CODEC_ID_H263P:
4705                     if(!s->h263_slice_structured)
4706                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
4707                     break;
4708                 case CODEC_ID_MPEG2VIDEO:
4709                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
4710                 case CODEC_ID_MPEG1VIDEO:
4711                     if(s->mb_skip_run) is_gob_start=0;
4712                     break;
4713                 }
4714
4715                 if(is_gob_start){
4716                     if(s->start_mb_y != mb_y || mb_x!=0){
4717                         write_slice_end(s);
4718
4719                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
4720                             ff_mpeg4_init_partitions(s);
4721                         }
4722                     }
4723                 
4724                     assert((put_bits_count(&s->pb)&7) == 0);
4725                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
4726                     
4727                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
4728                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
4729                         int d= 100 / s->avctx->error_rate;
4730                         if(r % d == 0){
4731                             current_packet_size=0;
4732 #ifndef ALT_BITSTREAM_WRITER
4733                             s->pb.buf_ptr= s->ptr_lastgob;
4734 #endif
4735                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
4736                         }
4737                     }
4738
4739                     if (s->avctx->rtp_callback){
4740                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
4741                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
4742                     }
4743                     
4744                     switch(s->codec_id){
4745                     case CODEC_ID_MPEG4:
4746                         ff_mpeg4_encode_video_packet_header(s);
4747                         ff_mpeg4_clean_buffers(s);
4748                     break;
4749                     case CODEC_ID_MPEG1VIDEO:
4750                     case CODEC_ID_MPEG2VIDEO:
4751                         ff_mpeg1_encode_slice_header(s);
4752                         ff_mpeg1_clean_buffers(s);
4753                     break;
4754                     case CODEC_ID_H263:
4755                     case CODEC_ID_H263P:
4756                         h263_encode_gob_header(s, mb_y);                       
4757                     break;
4758                     }
4759
4760                     if(s->flags&CODEC_FLAG_PASS1){
4761                         int bits= put_bits_count(&s->pb);
4762                         s->misc_bits+= bits - s->last_bits;
4763                         s->last_bits= bits;
4764                     }
4765     
4766                     s->ptr_lastgob += current_packet_size;
4767                     s->first_slice_line=1;
4768                     s->resync_mb_x=mb_x;
4769                     s->resync_mb_y=mb_y;
4770                 }
4771             }
4772
4773             if(  (s->resync_mb_x   == s->mb_x)
4774                && s->resync_mb_y+1 == s->mb_y){
4775                 s->first_slice_line=0; 
4776             }
4777
4778             s->mb_skiped=0;
4779             s->dquant=0; //only for QP_RD
4780
4781             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
4782                 int next_block=0;
4783                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
4784
4785                 copy_context_before_encode(&backup_s, s, -1);
4786                 backup_s.pb= s->pb;
4787                 best_s.data_partitioning= s->data_partitioning;
4788                 best_s.partitioned_frame= s->partitioned_frame;
4789                 if(s->data_partitioning){
4790                     backup_s.pb2= s->pb2;
4791                     backup_s.tex_pb= s->tex_pb;
4792                 }
4793
4794                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
4795                     s->mv_dir = MV_DIR_FORWARD;
4796                     s->mv_type = MV_TYPE_16X16;
4797                     s->mb_intra= 0;
4798                     s->mv[0][0][0] = s->p_mv_table[xy][0];
4799                     s->mv[0][0][1] = s->p_mv_table[xy][1];
4800                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb, 
4801                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4802                 }
4803                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){ 
4804                     s->mv_dir = MV_DIR_FORWARD;
4805                     s->mv_type = MV_TYPE_FIELD;
4806                     s->mb_intra= 0;
4807                     for(i=0; i<2; i++){
4808                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
4809                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
4810                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
4811                     }
4812                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb, 
4813                                  &dmin, &next_block, 0, 0);
4814                 }
4815                 if(mb_type&CANDIDATE_MB_TYPE_SKIPED){
4816                     s->mv_dir = MV_DIR_FORWARD;
4817                     s->mv_type = MV_TYPE_16X16;
4818                     s->mb_intra= 0;
4819                     s->mv[0][0][0] = 0;
4820                     s->mv[0][0][1] = 0;
4821                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPED, pb, pb2, tex_pb, 
4822                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4823                 }
4824                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){                 
4825                     s->mv_dir = MV_DIR_FORWARD;
4826                     s->mv_type = MV_TYPE_8X8;
4827                     s->mb_intra= 0;
4828                     for(i=0; i<4; i++){
4829                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
4830                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
4831                     }
4832                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb, 
4833                                  &dmin, &next_block, 0, 0);
4834                 }
4835                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
4836                     s->mv_dir = MV_DIR_FORWARD;
4837                     s->mv_type = MV_TYPE_16X16;
4838                     s->mb_intra= 0;
4839                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
4840                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
4841                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb, 
4842                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4843                 }
4844                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
4845                     s->mv_dir = MV_DIR_BACKWARD;
4846                     s->mv_type = MV_TYPE_16X16;
4847                     s->mb_intra= 0;
4848                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
4849                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
4850                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb, 
4851                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
4852                 }
4853                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
4854                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4855                     s->mv_type = MV_TYPE_16X16;
4856                     s->mb_intra= 0;
4857                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
4858                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
4859                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
4860                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
4861                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb, 
4862                                  &dmin, &next_block, 0, 0);
4863                 }
4864                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
4865                     int mx= s->b_direct_mv_table[xy][0];
4866                     int my= s->b_direct_mv_table[xy][1];
4867                     
4868                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
4869                     s->mb_intra= 0;
4870                     ff_mpeg4_set_direct_mv(s, mx, my);
4871                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb, 
4872                                  &dmin, &next_block, mx, my);
4873                 }
4874                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){ 
4875                     s->mv_dir = MV_DIR_FORWARD;
4876                     s->mv_type = MV_TYPE_FIELD;
4877                     s->mb_intra= 0;
4878                     for(i=0; i<2; i++){
4879                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
4880                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
4881                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
4882                     }
4883                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb, 
4884                                  &dmin, &next_block, 0, 0);
4885                 }
4886                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){ 
4887                     s->mv_dir = MV_DIR_BACKWARD;
4888                     s->mv_type = MV_TYPE_FIELD;
4889                     s->mb_intra= 0;
4890                     for(i=0; i<2; i++){
4891                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
4892                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
4893                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
4894                     }
4895                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb, 
4896                                  &dmin, &next_block, 0, 0);
4897                 }
4898                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){ 
4899                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4900                     s->mv_type = MV_TYPE_FIELD;
4901                     s->mb_intra= 0;
4902                     for(dir=0; dir<2; dir++){
4903                         for(i=0; i<2; i++){
4904                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
4905                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
4906                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
4907                         }
4908                     }
4909                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb, 
4910                                  &dmin, &next_block, 0, 0);
4911                 }
4912                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
4913                     s->mv_dir = 0;
4914                     s->mv_type = MV_TYPE_16X16;
4915                     s->mb_intra= 1;
4916                     s->mv[0][0][0] = 0;
4917                     s->mv[0][0][1] = 0;
4918                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb, 
4919                                  &dmin, &next_block, 0, 0);
4920                     if(s->h263_pred || s->h263_aic){
4921                         if(best_s.mb_intra)
4922                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
4923                         else
4924                             ff_clean_intra_table_entries(s); //old mode?
4925                     }
4926                 }
4927
4928                 if(s->flags & CODEC_FLAG_QP_RD){
4929                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
4930                         const int last_qp= backup_s.qscale;
4931                         int dquant, dir, qp, dc[6];
4932                         DCTELEM ac[6][16];
4933                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
4934                         
4935                         assert(backup_s.dquant == 0);
4936
4937                         //FIXME intra
4938                         s->mv_dir= best_s.mv_dir;
4939                         s->mv_type = MV_TYPE_16X16;
4940                         s->mb_intra= best_s.mb_intra;
4941                         s->mv[0][0][0] = best_s.mv[0][0][0];
4942                         s->mv[0][0][1] = best_s.mv[0][0][1];
4943                         s->mv[1][0][0] = best_s.mv[1][0][0];
4944                         s->mv[1][0][1] = best_s.mv[1][0][1];
4945                         
4946                         dir= s->pict_type == B_TYPE ? 2 : 1;
4947                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
4948                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
4949                             qp= last_qp + dquant;
4950                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
4951                                 break;
4952                             backup_s.dquant= dquant;
4953                             if(s->mb_intra && s->dc_val[0]){
4954                                 for(i=0; i<6; i++){
4955                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
4956                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
4957                                 }
4958                             }
4959
4960                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb, 
4961                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
4962                             if(best_s.qscale != qp){
4963                                 if(s->mb_intra && s->dc_val[0]){
4964                                     for(i=0; i<6; i++){
4965                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
4966                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
4967                                     }
4968                                 }
4969                                 if(dir > 0 && dquant==dir){
4970                                     dquant= 0;
4971                                     dir= -dir;
4972                                 }else
4973                                     break;
4974                             }
4975                         }
4976                         qp= best_s.qscale;
4977                         s->current_picture.qscale_table[xy]= qp;
4978                     }
4979                 }
4980
4981                 copy_context_after_encode(s, &best_s, -1);
4982                 
4983                 pb_bits_count= put_bits_count(&s->pb);
4984                 flush_put_bits(&s->pb);
4985                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
4986                 s->pb= backup_s.pb;
4987                 
4988                 if(s->data_partitioning){
4989                     pb2_bits_count= put_bits_count(&s->pb2);
4990                     flush_put_bits(&s->pb2);
4991                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
4992                     s->pb2= backup_s.pb2;
4993                     
4994                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
4995                     flush_put_bits(&s->tex_pb);
4996                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
4997                     s->tex_pb= backup_s.tex_pb;
4998                 }
4999                 s->last_bits= put_bits_count(&s->pb);
5000                
5001                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5002                     ff_h263_update_motion_val(s);
5003         
5004                 if(next_block==0){ //FIXME 16 vs linesize16
5005                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5006                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5007                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5008                 }
5009
5010                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5011                     MPV_decode_mb(s, s->block);
5012             } else {
5013                 int motion_x, motion_y;
5014                 s->mv_type=MV_TYPE_16X16;
5015                 // only one MB-Type possible
5016                 
5017                 switch(mb_type){
5018                 case CANDIDATE_MB_TYPE_INTRA:
5019                     s->mv_dir = 0;
5020                     s->mb_intra= 1;
5021                     motion_x= s->mv[0][0][0] = 0;
5022                     motion_y= s->mv[0][0][1] = 0;
5023                     break;
5024                 case CANDIDATE_MB_TYPE_INTER:
5025                     s->mv_dir = MV_DIR_FORWARD;
5026                     s->mb_intra= 0;
5027                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5028                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5029                     break;
5030                 case CANDIDATE_MB_TYPE_INTER_I:
5031                     s->mv_dir = MV_DIR_FORWARD;
5032                     s->mv_type = MV_TYPE_FIELD;
5033                     s->mb_intra= 0;
5034                     for(i=0; i<2; i++){
5035                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5036                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5037                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5038                     }
5039                     motion_x = motion_y = 0;
5040                     break;
5041                 case CANDIDATE_MB_TYPE_INTER4V:
5042                     s->mv_dir = MV_DIR_FORWARD;
5043                     s->mv_type = MV_TYPE_8X8;
5044                     s->mb_intra= 0;
5045                     for(i=0; i<4; i++){
5046                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5047                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5048                     }
5049                     motion_x= motion_y= 0;
5050                     break;
5051                 case CANDIDATE_MB_TYPE_DIRECT:
5052                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5053                     s->mb_intra= 0;
5054                     motion_x=s->b_direct_mv_table[xy][0];
5055                     motion_y=s->b_direct_mv_table[xy][1];
5056                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5057                     break;
5058                 case CANDIDATE_MB_TYPE_BIDIR:
5059                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5060                     s->mb_intra= 0;
5061                     motion_x=0;
5062                     motion_y=0;
5063                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5064                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5065                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5066                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5067                     break;
5068                 case CANDIDATE_MB_TYPE_BACKWARD:
5069                     s->mv_dir = MV_DIR_BACKWARD;
5070                     s->mb_intra= 0;
5071                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5072                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5073                     break;
5074                 case CANDIDATE_MB_TYPE_FORWARD:
5075                     s->mv_dir = MV_DIR_FORWARD;
5076                     s->mb_intra= 0;
5077                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5078                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5079 //                    printf(" %d %d ", motion_x, motion_y);
5080                     break;
5081                 case CANDIDATE_MB_TYPE_FORWARD_I:
5082                     s->mv_dir = MV_DIR_FORWARD;
5083                     s->mv_type = MV_TYPE_FIELD;
5084                     s->mb_intra= 0;
5085                     for(i=0; i<2; i++){
5086                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5087                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5088                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5089                     }
5090                     motion_x=motion_y=0;
5091                     break;
5092                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5093                     s->mv_dir = MV_DIR_BACKWARD;
5094                     s->mv_type = MV_TYPE_FIELD;
5095                     s->mb_intra= 0;
5096                     for(i=0; i<2; i++){
5097                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5098                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5099                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5100                     }
5101                     motion_x=motion_y=0;
5102                     break;
5103                 case CANDIDATE_MB_TYPE_BIDIR_I:
5104                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5105                     s->mv_type = MV_TYPE_FIELD;
5106                     s->mb_intra= 0;
5107                     for(dir=0; dir<2; dir++){
5108                         for(i=0; i<2; i++){
5109                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5110                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5111                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5112                         }
5113                     }
5114                     motion_x=motion_y=0;
5115                     break;
5116                 default:
5117                     motion_x=motion_y=0; //gcc warning fix
5118                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5119                 }
5120
5121                 encode_mb(s, motion_x, motion_y);
5122
5123                 // RAL: Update last macrobloc type
5124                 s->last_mv_dir = s->mv_dir;
5125             
5126                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5127                     ff_h263_update_motion_val(s);
5128                 
5129                 MPV_decode_mb(s, s->block);
5130             }
5131
5132             /* clean the MV table in IPS frames for direct mode in B frames */
5133             if(s->mb_intra /* && I,P,S_TYPE */){
5134                 s->p_mv_table[xy][0]=0;
5135                 s->p_mv_table[xy][1]=0;
5136             }
5137             
5138             if(s->flags&CODEC_FLAG_PSNR){
5139                 int w= 16;
5140                 int h= 16;
5141
5142                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5143                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5144
5145                 s->current_picture_ptr->error[0] += sse(
5146                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5147                     s->dest[0], w, h, s->linesize);
5148                 s->current_picture_ptr->error[1] += sse(
5149                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5150                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5151                 s->current_picture_ptr->error[2] += sse(
5152                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5153                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5154             }
5155             if(s->loop_filter){
5156                 if(s->out_format == FMT_H263)
5157                     ff_h263_loop_filter(s);
5158             }
5159 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5160         }
5161     }
5162
5163     //not beautifull here but we must write it before flushing so it has to be here
5164     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5165         msmpeg4_encode_ext_header(s);
5166
5167     write_slice_end(s);
5168
5169     /* Send the last GOB if RTP */    
5170     if (s->avctx->rtp_callback) {
5171         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5172         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5173         /* Call the RTP callback to send the last GOB */
5174         emms_c();
5175         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5176     }
5177
5178     return 0;
5179 }
5180
5181 #define MERGE(field) dst->field += src->field; src->field=0
5182 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5183     MERGE(me.scene_change_score);
5184     MERGE(me.mc_mb_var_sum_temp);
5185     MERGE(me.mb_var_sum_temp);
5186 }
5187
5188 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5189     int i;
5190
5191     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5192     MERGE(dct_count[1]);
5193     MERGE(mv_bits);
5194     MERGE(i_tex_bits);
5195     MERGE(p_tex_bits);
5196     MERGE(i_count);
5197     MERGE(f_count);
5198     MERGE(b_count);
5199     MERGE(skip_count);
5200     MERGE(misc_bits);
5201     MERGE(error_count);
5202     MERGE(padding_bug_score);
5203
5204     if(dst->avctx->noise_reduction){
5205         for(i=0; i<64; i++){
5206             MERGE(dct_error_sum[0][i]);
5207             MERGE(dct_error_sum[1][i]);
5208         }
5209     }
5210     
5211     assert(put_bits_count(&src->pb) % 8 ==0);
5212     assert(put_bits_count(&dst->pb) % 8 ==0);
5213     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5214     flush_put_bits(&dst->pb);
5215 }
5216
5217 static void encode_picture(MpegEncContext *s, int picture_number)
5218 {
5219     int i;
5220     int bits;
5221
5222     s->picture_number = picture_number;
5223     
5224     /* Reset the average MB variance */
5225     s->me.mb_var_sum_temp    =
5226     s->me.mc_mb_var_sum_temp = 0;
5227
5228     /* we need to initialize some time vars before we can encode b-frames */
5229     // RAL: Condition added for MPEG1VIDEO
5230     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5231         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5232         
5233     s->me.scene_change_score=0;
5234     
5235 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5236     
5237     if(s->pict_type==I_TYPE){
5238         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5239         else                        s->no_rounding=0;
5240     }else if(s->pict_type!=B_TYPE){
5241         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5242             s->no_rounding ^= 1;          
5243     }
5244     
5245     s->mb_intra=0; //for the rate distoration & bit compare functions
5246     for(i=1; i<s->avctx->thread_count; i++){
5247         ff_update_duplicate_context(s->thread_context[i], s);
5248     }
5249
5250     ff_init_me(s);
5251
5252     /* Estimate motion for every MB */
5253     if(s->pict_type != I_TYPE){
5254         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5255         s->lambda2= (s->lambda2* s->avctx->me_penalty_compensation + 128)>>8;
5256         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5257             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5258                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5259             }
5260         }
5261
5262         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5263     }else /* if(s->pict_type == I_TYPE) */{
5264         /* I-Frame */
5265         for(i=0; i<s->mb_stride*s->mb_height; i++)
5266             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5267         
5268         if(!s->fixed_qscale){
5269             /* finding spatial complexity for I-frame rate control */
5270             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5271         }
5272     }
5273     for(i=1; i<s->avctx->thread_count; i++){
5274         merge_context_after_me(s, s->thread_context[i]);
5275     }
5276     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5277     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5278     emms_c();
5279
5280     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5281         s->pict_type= I_TYPE;
5282         for(i=0; i<s->mb_stride*s->mb_height; i++)
5283             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5284 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5285     }
5286
5287     if(!s->umvplus){
5288         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5289             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5290
5291             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5292                 int a,b;
5293                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5294                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5295                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5296             }
5297                     
5298             ff_fix_long_p_mvs(s);
5299             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5300             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5301                 int j;
5302                 for(i=0; i<2; i++){
5303                     for(j=0; j<2; j++)
5304                         ff_fix_long_mvs(s, s->p_field_select_table[i], j, 
5305                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5306                 }
5307             }
5308         }
5309
5310         if(s->pict_type==B_TYPE){
5311             int a, b;
5312
5313             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5314             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5315             s->f_code = FFMAX(a, b);
5316
5317             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5318             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5319             s->b_code = FFMAX(a, b);
5320
5321             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5322             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5323             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5324             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5325             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5326                 int dir, j;
5327                 for(dir=0; dir<2; dir++){
5328                     for(i=0; i<2; i++){
5329                         for(j=0; j<2; j++){
5330                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I) 
5331                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5332                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j, 
5333                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5334                         }
5335                     }
5336                 }
5337             }
5338         }
5339     }
5340
5341     if (!s->fixed_qscale) 
5342         s->current_picture.quality = ff_rate_estimate_qscale(s); //FIXME pic_ptr
5343
5344     if(s->adaptive_quant){
5345         switch(s->codec_id){
5346         case CODEC_ID_MPEG4:
5347             ff_clean_mpeg4_qscales(s);
5348             break;
5349         case CODEC_ID_H263:
5350         case CODEC_ID_H263P:
5351         case CODEC_ID_FLV1:
5352             ff_clean_h263_qscales(s);
5353             break;
5354         }
5355
5356         s->lambda= s->lambda_table[0];
5357         //FIXME broken
5358     }else
5359         s->lambda= s->current_picture.quality;
5360 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5361     update_qscale(s);
5362     
5363     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE)) 
5364         s->qscale= 3; //reduce cliping problems
5365         
5366     if (s->out_format == FMT_MJPEG) {
5367         /* for mjpeg, we do include qscale in the matrix */
5368         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5369         for(i=1;i<64;i++){
5370             int j= s->dsp.idct_permutation[i];
5371
5372             s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
5373         }
5374         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, 
5375                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5376         s->qscale= 8;
5377     }
5378     
5379     //FIXME var duplication
5380     s->current_picture_ptr->key_frame=
5381     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5382     s->current_picture_ptr->pict_type=
5383     s->current_picture.pict_type= s->pict_type;
5384
5385     if(s->current_picture.key_frame)
5386         s->picture_in_gop_number=0;
5387
5388     s->last_bits= put_bits_count(&s->pb);
5389     switch(s->out_format) {
5390     case FMT_MJPEG:
5391         mjpeg_picture_header(s);
5392         break;
5393     case FMT_H261:
5394         ff_h261_encode_picture_header(s, picture_number);
5395         break;
5396     case FMT_H263:
5397         if (s->codec_id == CODEC_ID_WMV2) 
5398             ff_wmv2_encode_picture_header(s, picture_number);
5399         else if (s->h263_msmpeg4) 
5400             msmpeg4_encode_picture_header(s, picture_number);
5401         else if (s->h263_pred)
5402             mpeg4_encode_picture_header(s, picture_number);
5403         else if (s->codec_id == CODEC_ID_RV10) 
5404             rv10_encode_picture_header(s, picture_number);
5405         else if (s->codec_id == CODEC_ID_RV20) 
5406             rv20_encode_picture_header(s, picture_number);
5407         else if (s->codec_id == CODEC_ID_FLV1)
5408             ff_flv_encode_picture_header(s, picture_number);
5409         else
5410             h263_encode_picture_header(s, picture_number);
5411         break;
5412     case FMT_MPEG1:
5413         mpeg1_encode_picture_header(s, picture_number);
5414         break;
5415     case FMT_H264:
5416         break;
5417     default:
5418         assert(0);
5419     }
5420     bits= put_bits_count(&s->pb);
5421     s->header_bits= bits - s->last_bits;
5422         
5423     for(i=1; i<s->avctx->thread_count; i++){
5424         update_duplicate_context_after_me(s->thread_context[i], s);
5425     }
5426     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5427     for(i=1; i<s->avctx->thread_count; i++){
5428         merge_context_after_encode(s, s->thread_context[i]);
5429     }
5430     emms_c();
5431 }
5432
5433 #endif //CONFIG_ENCODERS
5434
5435 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5436     const int intra= s->mb_intra;
5437     int i;
5438
5439     s->dct_count[intra]++;
5440
5441     for(i=0; i<64; i++){
5442         int level= block[i];
5443
5444         if(level){
5445             if(level>0){
5446                 s->dct_error_sum[intra][i] += level;
5447                 level -= s->dct_offset[intra][i];
5448                 if(level<0) level=0;
5449             }else{
5450                 s->dct_error_sum[intra][i] -= level;
5451                 level += s->dct_offset[intra][i];
5452                 if(level>0) level=0;
5453             }
5454             block[i]= level;
5455         }
5456     }
5457 }
5458
5459 #ifdef CONFIG_ENCODERS
5460
5461 static int dct_quantize_trellis_c(MpegEncContext *s, 
5462                         DCTELEM *block, int n,
5463                         int qscale, int *overflow){
5464     const int *qmat;
5465     const uint8_t *scantable= s->intra_scantable.scantable;
5466     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5467     int max=0;
5468     unsigned int threshold1, threshold2;
5469     int bias=0;
5470     int run_tab[65];
5471     int level_tab[65];
5472     int score_tab[65];
5473     int survivor[65];
5474     int survivor_count;
5475     int last_run=0;
5476     int last_level=0;
5477     int last_score= 0;
5478     int last_i;
5479     int coeff[2][64];
5480     int coeff_count[64];
5481     int qmul, qadd, start_i, last_non_zero, i, dc;
5482     const int esc_length= s->ac_esc_length;
5483     uint8_t * length;
5484     uint8_t * last_length;
5485     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5486         
5487     s->dsp.fdct (block);
5488     
5489     if(s->dct_error_sum)
5490         s->denoise_dct(s, block);
5491     qmul= qscale*16;
5492     qadd= ((qscale-1)|1)*8;
5493
5494     if (s->mb_intra) {
5495         int q;
5496         if (!s->h263_aic) {
5497             if (n < 4)
5498                 q = s->y_dc_scale;
5499             else
5500                 q = s->c_dc_scale;
5501             q = q << 3;
5502         } else{
5503             /* For AIC we skip quant/dequant of INTRADC */
5504             q = 1 << 3;
5505             qadd=0;
5506         }
5507             
5508         /* note: block[0] is assumed to be positive */
5509         block[0] = (block[0] + (q >> 1)) / q;
5510         start_i = 1;
5511         last_non_zero = 0;
5512         qmat = s->q_intra_matrix[qscale];
5513         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5514             bias= 1<<(QMAT_SHIFT-1);
5515         length     = s->intra_ac_vlc_length;
5516         last_length= s->intra_ac_vlc_last_length;
5517     } else {
5518         start_i = 0;
5519         last_non_zero = -1;
5520         qmat = s->q_inter_matrix[qscale];
5521         length     = s->inter_ac_vlc_length;
5522         last_length= s->inter_ac_vlc_last_length;
5523     }
5524     last_i= start_i;
5525
5526     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5527     threshold2= (threshold1<<1);
5528
5529     for(i=63; i>=start_i; i--) {
5530         const int j = scantable[i];
5531         int level = block[j] * qmat[j];
5532
5533         if(((unsigned)(level+threshold1))>threshold2){
5534             last_non_zero = i;
5535             break;
5536         }
5537     }
5538
5539     for(i=start_i; i<=last_non_zero; i++) {
5540         const int j = scantable[i];
5541         int level = block[j] * qmat[j];
5542
5543 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5544 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5545         if(((unsigned)(level+threshold1))>threshold2){
5546             if(level>0){
5547                 level= (bias + level)>>QMAT_SHIFT;
5548                 coeff[0][i]= level;
5549                 coeff[1][i]= level-1;
5550 //                coeff[2][k]= level-2;
5551             }else{
5552                 level= (bias - level)>>QMAT_SHIFT;
5553                 coeff[0][i]= -level;
5554                 coeff[1][i]= -level+1;
5555 //                coeff[2][k]= -level+2;
5556             }
5557             coeff_count[i]= FFMIN(level, 2);
5558             assert(coeff_count[i]);
5559             max |=level;
5560         }else{
5561             coeff[0][i]= (level>>31)|1;
5562             coeff_count[i]= 1;
5563         }
5564     }
5565     
5566     *overflow= s->max_qcoeff < max; //overflow might have happend
5567     
5568     if(last_non_zero < start_i){
5569         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5570         return last_non_zero;
5571     }
5572
5573     score_tab[start_i]= 0;
5574     survivor[0]= start_i;
5575     survivor_count= 1;
5576     
5577     for(i=start_i; i<=last_non_zero; i++){
5578         int level_index, j;
5579         const int dct_coeff= ABS(block[ scantable[i] ]);
5580         const int zero_distoration= dct_coeff*dct_coeff;
5581         int best_score=256*256*256*120;
5582         for(level_index=0; level_index < coeff_count[i]; level_index++){
5583             int distoration;
5584             int level= coeff[level_index][i];
5585             const int alevel= ABS(level);
5586             int unquant_coeff;
5587             
5588             assert(level);
5589
5590             if(s->out_format == FMT_H263){
5591                 unquant_coeff= alevel*qmul + qadd;
5592             }else{ //MPEG1
5593                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5594                 if(s->mb_intra){
5595                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5596                         unquant_coeff =   (unquant_coeff - 1) | 1;
5597                 }else{
5598                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5599                         unquant_coeff =   (unquant_coeff - 1) | 1;
5600                 }
5601                 unquant_coeff<<= 3;
5602             }
5603
5604             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5605             level+=64;
5606             if((level&(~127)) == 0){
5607                 for(j=survivor_count-1; j>=0; j--){
5608                     int run= i - survivor[j];
5609                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5610                     score += score_tab[i-run];
5611                     
5612                     if(score < best_score){
5613                         best_score= score;
5614                         run_tab[i+1]= run;
5615                         level_tab[i+1]= level-64;
5616                     }
5617                 }
5618
5619                 if(s->out_format == FMT_H263){
5620                     for(j=survivor_count-1; j>=0; j--){
5621                         int run= i - survivor[j];
5622                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5623                         score += score_tab[i-run];
5624                         if(score < last_score){
5625                             last_score= score;
5626                             last_run= run;
5627                             last_level= level-64;
5628                             last_i= i+1;
5629                         }
5630                     }
5631                 }
5632             }else{
5633                 distoration += esc_length*lambda;
5634                 for(j=survivor_count-1; j>=0; j--){
5635                     int run= i - survivor[j];
5636                     int score= distoration + score_tab[i-run];
5637                     
5638                     if(score < best_score){
5639                         best_score= score;
5640                         run_tab[i+1]= run;
5641                         level_tab[i+1]= level-64;
5642                     }
5643                 }
5644
5645                 if(s->out_format == FMT_H263){
5646                   for(j=survivor_count-1; j>=0; j--){
5647                         int run= i - survivor[j];
5648                         int score= distoration + score_tab[i-run];
5649                         if(score < last_score){
5650                             last_score= score;
5651                             last_run= run;
5652                             last_level= level-64;
5653                             last_i= i+1;
5654                         }
5655                     }
5656                 }
5657             }
5658         }
5659         
5660         score_tab[i+1]= best_score;
5661
5662         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5663         if(last_non_zero <= 27){
5664             for(; survivor_count; survivor_count--){
5665                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5666                     break;
5667             }
5668         }else{
5669             for(; survivor_count; survivor_count--){
5670                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5671                     break;
5672             }
5673         }
5674
5675         survivor[ survivor_count++ ]= i+1;
5676     }
5677
5678     if(s->out_format != FMT_H263){
5679         last_score= 256*256*256*120;
5680         for(i= survivor[0]; i<=last_non_zero + 1; i++){
5681             int score= score_tab[i];
5682             if(i) score += lambda*2; //FIXME exacter?
5683
5684             if(score < last_score){
5685                 last_score= score;
5686                 last_i= i;
5687                 last_level= level_tab[i];
5688                 last_run= run_tab[i];
5689             }
5690         }
5691     }
5692
5693     s->coded_score[n] = last_score;
5694     
5695     dc= ABS(block[0]);
5696     last_non_zero= last_i - 1;
5697     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5698     
5699     if(last_non_zero < start_i)
5700         return last_non_zero;
5701
5702     if(last_non_zero == 0 && start_i == 0){
5703         int best_level= 0;
5704         int best_score= dc * dc;
5705         
5706         for(i=0; i<coeff_count[0]; i++){
5707             int level= coeff[i][0];
5708             int alevel= ABS(level);
5709             int unquant_coeff, score, distortion;
5710
5711             if(s->out_format == FMT_H263){
5712                     unquant_coeff= (alevel*qmul + qadd)>>3;
5713             }else{ //MPEG1
5714                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
5715                     unquant_coeff =   (unquant_coeff - 1) | 1;
5716             }
5717             unquant_coeff = (unquant_coeff + 4) >> 3;
5718             unquant_coeff<<= 3 + 3;
5719
5720             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
5721             level+=64;
5722             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
5723             else                    score= distortion + esc_length*lambda;
5724
5725             if(score < best_score){
5726                 best_score= score;
5727                 best_level= level - 64;
5728             }
5729         }
5730         block[0]= best_level;
5731         s->coded_score[n] = best_score - dc*dc;
5732         if(best_level == 0) return -1;
5733         else                return last_non_zero;
5734     }
5735
5736     i= last_i;
5737     assert(last_level);
5738
5739     block[ perm_scantable[last_non_zero] ]= last_level;
5740     i -= last_run + 1;
5741     
5742     for(; i>start_i; i -= run_tab[i] + 1){
5743         block[ perm_scantable[i-1] ]= level_tab[i];
5744     }
5745
5746     return last_non_zero;
5747 }
5748
5749 //#define REFINE_STATS 1
5750 static int16_t basis[64][64];
5751
5752 static void build_basis(uint8_t *perm){
5753     int i, j, x, y;
5754     emms_c();
5755     for(i=0; i<8; i++){
5756         for(j=0; j<8; j++){
5757             for(y=0; y<8; y++){
5758                 for(x=0; x<8; x++){
5759                     double s= 0.25*(1<<BASIS_SHIFT);
5760                     int index= 8*i + j;
5761                     int perm_index= perm[index];
5762                     if(i==0) s*= sqrt(0.5);
5763                     if(j==0) s*= sqrt(0.5);
5764                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
5765                 }
5766             }
5767         }
5768     }
5769 }
5770
5771 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
5772                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
5773                         int n, int qscale){
5774     int16_t rem[64];
5775     DCTELEM d1[64] __align16;
5776     const int *qmat;
5777     const uint8_t *scantable= s->intra_scantable.scantable;
5778     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5779 //    unsigned int threshold1, threshold2;
5780 //    int bias=0;
5781     int run_tab[65];
5782     int prev_run=0;
5783     int prev_level=0;
5784     int qmul, qadd, start_i, last_non_zero, i, dc;
5785     uint8_t * length;
5786     uint8_t * last_length;
5787     int lambda;
5788     int rle_index, run, q, sum;
5789 #ifdef REFINE_STATS
5790 static int count=0;
5791 static int after_last=0;
5792 static int to_zero=0;
5793 static int from_zero=0;
5794 static int raise=0;
5795 static int lower=0;
5796 static int messed_sign=0;
5797 #endif
5798
5799     if(basis[0][0] == 0)
5800         build_basis(s->dsp.idct_permutation);
5801     
5802     qmul= qscale*2;
5803     qadd= (qscale-1)|1;
5804     if (s->mb_intra) {
5805         if (!s->h263_aic) {
5806             if (n < 4)
5807                 q = s->y_dc_scale;
5808             else
5809                 q = s->c_dc_scale;
5810         } else{
5811             /* For AIC we skip quant/dequant of INTRADC */
5812             q = 1;
5813             qadd=0;
5814         }
5815         q <<= RECON_SHIFT-3;
5816         /* note: block[0] is assumed to be positive */
5817         dc= block[0]*q;
5818 //        block[0] = (block[0] + (q >> 1)) / q;
5819         start_i = 1;
5820         qmat = s->q_intra_matrix[qscale];
5821 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5822 //            bias= 1<<(QMAT_SHIFT-1);
5823         length     = s->intra_ac_vlc_length;
5824         last_length= s->intra_ac_vlc_last_length;
5825     } else {
5826         dc= 0;
5827         start_i = 0;
5828         qmat = s->q_inter_matrix[qscale];
5829         length     = s->inter_ac_vlc_length;
5830         last_length= s->inter_ac_vlc_last_length;
5831     }
5832     last_non_zero = s->block_last_index[n];
5833
5834 #ifdef REFINE_STATS
5835 {START_TIMER
5836 #endif
5837     dc += (1<<(RECON_SHIFT-1));
5838     for(i=0; i<64; i++){
5839         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly insteadof copying to rem[]
5840     }
5841 #ifdef REFINE_STATS
5842 STOP_TIMER("memset rem[]")}
5843 #endif
5844     sum=0;
5845     for(i=0; i<64; i++){
5846         int one= 36;
5847         int qns=4;
5848         int w;
5849
5850         w= ABS(weight[i]) + qns*one;
5851         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
5852
5853         weight[i] = w;
5854 //        w=weight[i] = (63*qns + (w/2)) / w;
5855          
5856         assert(w>0);
5857         assert(w<(1<<6));
5858         sum += w*w;
5859     }
5860     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
5861 #ifdef REFINE_STATS
5862 {START_TIMER
5863 #endif
5864     run=0;
5865     rle_index=0;
5866     for(i=start_i; i<=last_non_zero; i++){
5867         int j= perm_scantable[i];
5868         const int level= block[j];
5869         int coeff;
5870         
5871         if(level){
5872             if(level<0) coeff= qmul*level - qadd;
5873             else        coeff= qmul*level + qadd;
5874             run_tab[rle_index++]=run;
5875             run=0;
5876
5877             s->dsp.add_8x8basis(rem, basis[j], coeff);
5878         }else{
5879             run++;
5880         }
5881     }
5882 #ifdef REFINE_STATS
5883 if(last_non_zero>0){
5884 STOP_TIMER("init rem[]")
5885 }
5886 }
5887
5888 {START_TIMER
5889 #endif
5890     for(;;){
5891         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
5892         int best_coeff=0;
5893         int best_change=0;
5894         int run2, best_unquant_change=0, analyze_gradient;
5895 #ifdef REFINE_STATS
5896 {START_TIMER
5897 #endif
5898         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
5899
5900         if(analyze_gradient){
5901 #ifdef REFINE_STATS
5902 {START_TIMER
5903 #endif
5904             for(i=0; i<64; i++){
5905                 int w= weight[i];
5906             
5907                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
5908             }
5909 #ifdef REFINE_STATS
5910 STOP_TIMER("rem*w*w")}
5911 {START_TIMER
5912 #endif
5913             s->dsp.fdct(d1);
5914 #ifdef REFINE_STATS
5915 STOP_TIMER("dct")}
5916 #endif
5917         }
5918
5919         if(start_i){
5920             const int level= block[0];
5921             int change, old_coeff;
5922
5923             assert(s->mb_intra);
5924             
5925             old_coeff= q*level;
5926             
5927             for(change=-1; change<=1; change+=2){
5928                 int new_level= level + change;
5929                 int score, new_coeff;
5930                 
5931                 new_coeff= q*new_level;
5932                 if(new_coeff >= 2048 || new_coeff < 0)
5933                     continue;
5934
5935                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
5936                 if(score<best_score){
5937                     best_score= score;
5938                     best_coeff= 0;
5939                     best_change= change;
5940                     best_unquant_change= new_coeff - old_coeff;
5941                 }
5942             }
5943         }
5944         
5945         run=0;
5946         rle_index=0;
5947         run2= run_tab[rle_index++];
5948         prev_level=0;
5949         prev_run=0;
5950
5951         for(i=start_i; i<64; i++){
5952             int j= perm_scantable[i];
5953             const int level= block[j];
5954             int change, old_coeff;
5955
5956             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
5957                 break;
5958
5959             if(level){
5960                 if(level<0) old_coeff= qmul*level - qadd;
5961                 else        old_coeff= qmul*level + qadd;
5962                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
5963             }else{
5964                 old_coeff=0;
5965                 run2--;
5966                 assert(run2>=0 || i >= last_non_zero );
5967             }
5968             
5969             for(change=-1; change<=1; change+=2){
5970                 int new_level= level + change;
5971                 int score, new_coeff, unquant_change;
5972                 
5973                 score=0;
5974                 if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
5975                    continue;
5976
5977                 if(new_level){
5978                     if(new_level<0) new_coeff= qmul*new_level - qadd;
5979                     else            new_coeff= qmul*new_level + qadd;
5980                     if(new_coeff >= 2048 || new_coeff <= -2048)
5981                         continue;
5982                     //FIXME check for overflow
5983                     
5984                     if(level){
5985                         if(level < 63 && level > -63){
5986                             if(i < last_non_zero)
5987                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
5988                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
5989                             else
5990                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
5991                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
5992                         }
5993                     }else{
5994                         assert(ABS(new_level)==1);
5995                         
5996                         if(analyze_gradient){
5997                             int g= d1[ scantable[i] ];
5998                             if(g && (g^new_level) >= 0)
5999                                 continue;
6000                         }
6001
6002                         if(i < last_non_zero){
6003                             int next_i= i + run2 + 1;
6004                             int next_level= block[ perm_scantable[next_i] ] + 64;
6005                             
6006                             if(next_level&(~127))
6007                                 next_level= 0;
6008
6009                             if(next_i < last_non_zero)
6010                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6011                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6012                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6013                             else
6014                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6015                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6016                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6017                         }else{
6018                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6019                             if(prev_level){
6020                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6021                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6022                             }
6023                         }
6024                     }
6025                 }else{
6026                     new_coeff=0;
6027                     assert(ABS(level)==1);
6028
6029                     if(i < last_non_zero){
6030                         int next_i= i + run2 + 1;
6031                         int next_level= block[ perm_scantable[next_i] ] + 64;
6032                             
6033                         if(next_level&(~127))
6034                             next_level= 0;
6035
6036                         if(next_i < last_non_zero)
6037                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6038                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6039                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6040                         else
6041                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6042                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6043                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6044                     }else{
6045                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6046                         if(prev_level){
6047                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6048                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6049                         }
6050                     }
6051                 }
6052                 
6053                 score *= lambda;
6054
6055                 unquant_change= new_coeff - old_coeff;
6056                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6057                 
6058                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6059                 if(score<best_score){
6060                     best_score= score;
6061                     best_coeff= i;
6062                     best_change= change;
6063                     best_unquant_change= unquant_change;
6064                 }
6065             }
6066             if(level){
6067                 prev_level= level + 64;
6068                 if(prev_level&(~127))
6069                     prev_level= 0;
6070                 prev_run= run;
6071                 run=0;
6072             }else{
6073                 run++;
6074             }
6075         }
6076 #ifdef REFINE_STATS
6077 STOP_TIMER("iterative step")}
6078 #endif
6079
6080         if(best_change){
6081             int j= perm_scantable[ best_coeff ];
6082             
6083             block[j] += best_change;
6084             
6085             if(best_coeff > last_non_zero){
6086                 last_non_zero= best_coeff;
6087                 assert(block[j]);
6088 #ifdef REFINE_STATS
6089 after_last++;
6090 #endif
6091             }else{
6092 #ifdef REFINE_STATS
6093 if(block[j]){
6094     if(block[j] - best_change){
6095         if(ABS(block[j]) > ABS(block[j] - best_change)){
6096             raise++;
6097         }else{
6098             lower++;
6099         }
6100     }else{
6101         from_zero++;
6102     }
6103 }else{
6104     to_zero++;
6105 }
6106 #endif
6107                 for(; last_non_zero>=start_i; last_non_zero--){
6108                     if(block[perm_scantable[last_non_zero]])
6109                         break;
6110                 }
6111             }
6112 #ifdef REFINE_STATS
6113 count++;
6114 if(256*256*256*64 % count == 0){
6115     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6116 }
6117 #endif
6118             run=0;
6119             rle_index=0;
6120             for(i=start_i; i<=last_non_zero; i++){
6121                 int j= perm_scantable[i];
6122                 const int level= block[j];
6123         
6124                  if(level){
6125                      run_tab[rle_index++]=run;
6126                      run=0;
6127                  }else{
6128                      run++;
6129                  }
6130             }
6131             
6132             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6133         }else{
6134             break;
6135         }
6136     }
6137 #ifdef REFINE_STATS
6138 if(last_non_zero>0){
6139 STOP_TIMER("iterative search")
6140 }
6141 }
6142 #endif
6143
6144     return last_non_zero;
6145 }
6146
6147 static int dct_quantize_c(MpegEncContext *s, 
6148                         DCTELEM *block, int n,
6149                         int qscale, int *overflow)
6150 {
6151     int i, j, level, last_non_zero, q, start_i;
6152     const int *qmat;
6153     const uint8_t *scantable= s->intra_scantable.scantable;
6154     int bias;
6155     int max=0;
6156     unsigned int threshold1, threshold2;
6157
6158     s->dsp.fdct (block);
6159
6160     if(s->dct_error_sum)
6161         s->denoise_dct(s, block);
6162
6163     if (s->mb_intra) {
6164         if (!s->h263_aic) {
6165             if (n < 4)
6166                 q = s->y_dc_scale;
6167             else
6168                 q = s->c_dc_scale;
6169             q = q << 3;
6170         } else
6171             /* For AIC we skip quant/dequant of INTRADC */
6172             q = 1 << 3;
6173             
6174         /* note: block[0] is assumed to be positive */
6175         block[0] = (block[0] + (q >> 1)) / q;
6176         start_i = 1;
6177         last_non_zero = 0;
6178         qmat = s->q_intra_matrix[qscale];
6179         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6180     } else {
6181         start_i = 0;
6182         last_non_zero = -1;
6183         qmat = s->q_inter_matrix[qscale];
6184         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6185     }
6186     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6187     threshold2= (threshold1<<1);
6188     for(i=63;i>=start_i;i--) {
6189         j = scantable[i];
6190         level = block[j] * qmat[j];
6191
6192         if(((unsigned)(level+threshold1))>threshold2){
6193             last_non_zero = i;
6194             break;
6195         }else{
6196             block[j]=0;
6197         }
6198     }
6199     for(i=start_i; i<=last_non_zero; i++) {
6200         j = scantable[i];
6201         level = block[j] * qmat[j];
6202
6203 //        if(   bias+level >= (1<<QMAT_SHIFT)
6204 //           || bias-level >= (1<<QMAT_SHIFT)){
6205         if(((unsigned)(level+threshold1))>threshold2){
6206             if(level>0){
6207                 level= (bias + level)>>QMAT_SHIFT;
6208                 block[j]= level;
6209             }else{
6210                 level= (bias - level)>>QMAT_SHIFT;
6211                 block[j]= -level;
6212             }
6213             max |=level;
6214         }else{
6215             block[j]=0;
6216         }
6217     }
6218     *overflow= s->max_qcoeff < max; //overflow might have happend
6219     
6220     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6221     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6222         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6223
6224     return last_non_zero;
6225 }
6226
6227 #endif //CONFIG_ENCODERS
6228
6229 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s, 
6230                                    DCTELEM *block, int n, int qscale)
6231 {
6232     int i, level, nCoeffs;
6233     const uint16_t *quant_matrix;
6234
6235     nCoeffs= s->block_last_index[n];
6236     
6237     if (n < 4) 
6238         block[0] = block[0] * s->y_dc_scale;
6239     else
6240         block[0] = block[0] * s->c_dc_scale;
6241     /* XXX: only mpeg1 */
6242     quant_matrix = s->intra_matrix;
6243     for(i=1;i<=nCoeffs;i++) {
6244         int j= s->intra_scantable.permutated[i];
6245         level = block[j];
6246         if (level) {
6247             if (level < 0) {
6248                 level = -level;
6249                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6250                 level = (level - 1) | 1;
6251                 level = -level;
6252             } else {
6253                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6254                 level = (level - 1) | 1;
6255             }
6256             block[j] = level;
6257         }
6258     }
6259 }
6260
6261 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s, 
6262                                    DCTELEM *block, int n, int qscale)
6263 {
6264     int i, level, nCoeffs;
6265     const uint16_t *quant_matrix;
6266
6267     nCoeffs= s->block_last_index[n];
6268     
6269     quant_matrix = s->inter_matrix;
6270     for(i=0; i<=nCoeffs; i++) {
6271         int j= s->intra_scantable.permutated[i];
6272         level = block[j];
6273         if (level) {
6274             if (level < 0) {
6275                 level = -level;
6276                 level = (((level << 1) + 1) * qscale *
6277                          ((int) (quant_matrix[j]))) >> 4;
6278                 level = (level - 1) | 1;
6279                 level = -level;
6280             } else {
6281                 level = (((level << 1) + 1) * qscale *
6282                          ((int) (quant_matrix[j]))) >> 4;
6283                 level = (level - 1) | 1;
6284             }
6285             block[j] = level;
6286         }
6287     }
6288 }
6289
6290 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s, 
6291                                    DCTELEM *block, int n, int qscale)
6292 {
6293     int i, level, nCoeffs;
6294     const uint16_t *quant_matrix;
6295
6296     if(s->alternate_scan) nCoeffs= 63;
6297     else nCoeffs= s->block_last_index[n];
6298     
6299     if (n < 4) 
6300         block[0] = block[0] * s->y_dc_scale;
6301     else
6302         block[0] = block[0] * s->c_dc_scale;
6303     quant_matrix = s->intra_matrix;
6304     for(i=1;i<=nCoeffs;i++) {
6305         int j= s->intra_scantable.permutated[i];
6306         level = block[j];
6307         if (level) {
6308             if (level < 0) {
6309                 level = -level;
6310                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6311                 level = -level;
6312             } else {
6313                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6314             }
6315             block[j] = level;
6316         }
6317     }
6318 }
6319
6320 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s, 
6321                                    DCTELEM *block, int n, int qscale)
6322 {
6323     int i, level, nCoeffs;
6324     const uint16_t *quant_matrix;
6325     int sum=-1;
6326
6327     if(s->alternate_scan) nCoeffs= 63;
6328     else nCoeffs= s->block_last_index[n];
6329     
6330     quant_matrix = s->inter_matrix;
6331     for(i=0; i<=nCoeffs; i++) {
6332         int j= s->intra_scantable.permutated[i];
6333         level = block[j];
6334         if (level) {
6335             if (level < 0) {
6336                 level = -level;
6337                 level = (((level << 1) + 1) * qscale *
6338                          ((int) (quant_matrix[j]))) >> 4;
6339                 level = -level;
6340             } else {
6341                 level = (((level << 1) + 1) * qscale *
6342                          ((int) (quant_matrix[j]))) >> 4;
6343             }
6344             block[j] = level;
6345             sum+=level;
6346         }
6347     }
6348     block[63]^=sum&1;
6349 }
6350
6351 static void dct_unquantize_h263_intra_c(MpegEncContext *s, 
6352                                   DCTELEM *block, int n, int qscale)
6353 {
6354     int i, level, qmul, qadd;
6355     int nCoeffs;
6356     
6357     assert(s->block_last_index[n]>=0);
6358     
6359     qmul = qscale << 1;
6360     
6361     if (!s->h263_aic) {
6362         if (n < 4) 
6363             block[0] = block[0] * s->y_dc_scale;
6364         else
6365             block[0] = block[0] * s->c_dc_scale;
6366         qadd = (qscale - 1) | 1;
6367     }else{
6368         qadd = 0;
6369     }
6370     if(s->ac_pred)
6371         nCoeffs=63;
6372     else
6373         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6374
6375     for(i=1; i<=nCoeffs; i++) {
6376         level = block[i];
6377         if (level) {
6378             if (level < 0) {
6379                 level = level * qmul - qadd;
6380             } else {
6381                 level = level * qmul + qadd;
6382             }
6383             block[i] = level;
6384         }
6385     }
6386 }
6387
6388 static void dct_unquantize_h263_inter_c(MpegEncContext *s, 
6389                                   DCTELEM *block, int n, int qscale)
6390 {
6391     int i, level, qmul, qadd;
6392     int nCoeffs;
6393     
6394     assert(s->block_last_index[n]>=0);
6395     
6396     qadd = (qscale - 1) | 1;
6397     qmul = qscale << 1;
6398     
6399     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6400
6401     for(i=0; i<=nCoeffs; i++) {
6402         level = block[i];
6403         if (level) {
6404             if (level < 0) {
6405                 level = level * qmul - qadd;
6406             } else {
6407                 level = level * qmul + qadd;
6408             }
6409             block[i] = level;
6410         }
6411     }
6412 }
6413
6414 #ifdef CONFIG_ENCODERS
6415 AVCodec h263_encoder = {
6416     "h263",
6417     CODEC_TYPE_VIDEO,
6418     CODEC_ID_H263,
6419     sizeof(MpegEncContext),
6420     MPV_encode_init,
6421     MPV_encode_picture,
6422     MPV_encode_end,
6423     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6424 };
6425
6426 AVCodec h263p_encoder = {
6427     "h263p",
6428     CODEC_TYPE_VIDEO,
6429     CODEC_ID_H263P,
6430     sizeof(MpegEncContext),
6431     MPV_encode_init,
6432     MPV_encode_picture,
6433     MPV_encode_end,
6434     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6435 };
6436
6437 AVCodec flv_encoder = {
6438     "flv",
6439     CODEC_TYPE_VIDEO,
6440     CODEC_ID_FLV1,
6441     sizeof(MpegEncContext),
6442     MPV_encode_init,
6443     MPV_encode_picture,
6444     MPV_encode_end,
6445     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6446 };
6447
6448 AVCodec rv10_encoder = {
6449     "rv10",
6450     CODEC_TYPE_VIDEO,
6451     CODEC_ID_RV10,
6452     sizeof(MpegEncContext),
6453     MPV_encode_init,
6454     MPV_encode_picture,
6455     MPV_encode_end,
6456     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6457 };
6458
6459 AVCodec rv20_encoder = {
6460     "rv20",
6461     CODEC_TYPE_VIDEO,
6462     CODEC_ID_RV20,
6463     sizeof(MpegEncContext),
6464     MPV_encode_init,
6465     MPV_encode_picture,
6466     MPV_encode_end,
6467     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6468 };
6469
6470 AVCodec mpeg4_encoder = {
6471     "mpeg4",
6472     CODEC_TYPE_VIDEO,
6473     CODEC_ID_MPEG4,
6474     sizeof(MpegEncContext),
6475     MPV_encode_init,
6476     MPV_encode_picture,
6477     MPV_encode_end,
6478     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6479     .capabilities= CODEC_CAP_DELAY,
6480 };
6481
6482 AVCodec msmpeg4v1_encoder = {
6483     "msmpeg4v1",
6484     CODEC_TYPE_VIDEO,
6485     CODEC_ID_MSMPEG4V1,
6486     sizeof(MpegEncContext),
6487     MPV_encode_init,
6488     MPV_encode_picture,
6489     MPV_encode_end,
6490     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6491 };
6492
6493 AVCodec msmpeg4v2_encoder = {
6494     "msmpeg4v2",
6495     CODEC_TYPE_VIDEO,
6496     CODEC_ID_MSMPEG4V2,
6497     sizeof(MpegEncContext),
6498     MPV_encode_init,
6499     MPV_encode_picture,
6500     MPV_encode_end,
6501     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6502 };
6503
6504 AVCodec msmpeg4v3_encoder = {
6505     "msmpeg4",
6506     CODEC_TYPE_VIDEO,
6507     CODEC_ID_MSMPEG4V3,
6508     sizeof(MpegEncContext),
6509     MPV_encode_init,
6510     MPV_encode_picture,
6511     MPV_encode_end,
6512     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6513 };
6514
6515 AVCodec wmv1_encoder = {
6516     "wmv1",
6517     CODEC_TYPE_VIDEO,
6518     CODEC_ID_WMV1,
6519     sizeof(MpegEncContext),
6520     MPV_encode_init,
6521     MPV_encode_picture,
6522     MPV_encode_end,
6523     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6524 };
6525
6526 AVCodec mjpeg_encoder = {
6527     "mjpeg",
6528     CODEC_TYPE_VIDEO,
6529     CODEC_ID_MJPEG,
6530     sizeof(MpegEncContext),
6531     MPV_encode_init,
6532     MPV_encode_picture,
6533     MPV_encode_end,
6534     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, -1},
6535 };
6536
6537 #endif //CONFIG_ENCODERS