]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
changing size segfault fix
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
21  */
22  
23 /**
24  * @file mpegvideo.c
25  * The simplest mpeg encoder (well, it was the simplest!).
26  */ 
27  
28 #include "avcodec.h"
29 #include "dsputil.h"
30 #include "mpegvideo.h"
31 #include "faandct.h"
32 #include <limits.h>
33
34 #ifdef USE_FASTMEMCPY
35 #include "fastmemcpy.h"
36 #endif
37
38 //#undef NDEBUG
39 //#include <assert.h>
40
41 #ifdef CONFIG_ENCODERS
42 static void encode_picture(MpegEncContext *s, int picture_number);
43 #endif //CONFIG_ENCODERS
44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s, 
45                                    DCTELEM *block, int n, int qscale);
46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s, 
47                                    DCTELEM *block, int n, int qscale);
48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
49                                    DCTELEM *block, int n, int qscale);
50 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
51                                    DCTELEM *block, int n, int qscale);
52 static void dct_unquantize_h263_intra_c(MpegEncContext *s, 
53                                   DCTELEM *block, int n, int qscale);
54 static void dct_unquantize_h263_inter_c(MpegEncContext *s, 
55                                   DCTELEM *block, int n, int qscale);
56 static void dct_unquantize_h261_intra_c(MpegEncContext *s, 
57                                   DCTELEM *block, int n, int qscale);
58 static void dct_unquantize_h261_inter_c(MpegEncContext *s, 
59                                   DCTELEM *block, int n, int qscale);
60 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
61 #ifdef CONFIG_ENCODERS
62 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
63 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
64 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
65 static int sse_mb(MpegEncContext *s);
66 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
67 #endif //CONFIG_ENCODERS
68
69 #ifdef HAVE_XVMC
70 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
71 extern void XVMC_field_end(MpegEncContext *s);
72 extern void XVMC_decode_mb(MpegEncContext *s);
73 #endif
74
75 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
76
77
78 /* enable all paranoid tests for rounding, overflows, etc... */
79 //#define PARANOID
80
81 //#define DEBUG
82
83
84 /* for jpeg fast DCT */
85 #define CONST_BITS 14
86
87 static const uint16_t aanscales[64] = {
88     /* precomputed values scaled up by 14 bits */
89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
90     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
91     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
92     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
93     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
94     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
95     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
96     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
97 };
98
99 static const uint8_t h263_chroma_roundtab[16] = {
100 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
101     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
102 };
103
104 static const uint8_t ff_default_chroma_qscale_table[32]={
105 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
106     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
107 };
108
109 #ifdef CONFIG_ENCODERS
110 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
111 static uint8_t default_fcode_tab[MAX_MV*2+1];
112
113 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
114
115 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
116                            const uint16_t *quant_matrix, int bias, int qmin, int qmax)
117 {
118     int qscale;
119
120     for(qscale=qmin; qscale<=qmax; qscale++){
121         int i;
122         if (dsp->fdct == ff_jpeg_fdct_islow 
123 #ifdef FAAN_POSTSCALE
124             || dsp->fdct == ff_faandct
125 #endif
126             ) {
127             for(i=0;i<64;i++) {
128                 const int j= dsp->idct_permutation[i];
129                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
130                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
131                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
132                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
133                 
134                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / 
135                                 (qscale * quant_matrix[j]));
136             }
137         } else if (dsp->fdct == fdct_ifast
138 #ifndef FAAN_POSTSCALE
139                    || dsp->fdct == ff_faandct
140 #endif
141                    ) {
142             for(i=0;i<64;i++) {
143                 const int j= dsp->idct_permutation[i];
144                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
145                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
146                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
147                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
148                 
149                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) / 
150                                 (aanscales[i] * qscale * quant_matrix[j]));
151             }
152         } else {
153             for(i=0;i<64;i++) {
154                 const int j= dsp->idct_permutation[i];
155                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
156                    So 16           <= qscale * quant_matrix[i]             <= 7905
157                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
158                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
159                 */
160                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
161 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
162                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
163
164                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
165                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
166             }
167         }
168     }
169 }
170
171 static inline void update_qscale(MpegEncContext *s){
172     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
173     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
174     
175     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
176 }
177 #endif //CONFIG_ENCODERS
178
179 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
180     int i;
181     int end;
182     
183     st->scantable= src_scantable;
184
185     for(i=0; i<64; i++){
186         int j;
187         j = src_scantable[i];
188         st->permutated[i] = permutation[j];
189 #ifdef ARCH_POWERPC
190         st->inverse[j] = i;
191 #endif
192     }
193     
194     end=-1;
195     for(i=0; i<64; i++){
196         int j;
197         j = st->permutated[i];
198         if(j>end) end=j;
199         st->raster_end[i]= end;
200     }
201 }
202
203 #ifdef CONFIG_ENCODERS
204 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
205     int i;
206
207     if(matrix){
208         put_bits(pb, 1, 1);
209         for(i=0;i<64;i++) {
210             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
211         }
212     }else
213         put_bits(pb, 1, 0);
214 }
215 #endif //CONFIG_ENCODERS
216
217 /* init common dct for both encoder and decoder */
218 int DCT_common_init(MpegEncContext *s)
219 {
220     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
221     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
222     s->dct_unquantize_h261_intra = dct_unquantize_h261_intra_c;
223     s->dct_unquantize_h261_inter = dct_unquantize_h261_inter_c;
224     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
225     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
226     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
227     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
228
229 #ifdef CONFIG_ENCODERS
230     s->dct_quantize= dct_quantize_c;
231     s->denoise_dct= denoise_dct_c;
232 #endif
233         
234 #ifdef HAVE_MMX
235     MPV_common_init_mmx(s);
236 #endif
237 #ifdef ARCH_ALPHA
238     MPV_common_init_axp(s);
239 #endif
240 #ifdef HAVE_MLIB
241     MPV_common_init_mlib(s);
242 #endif
243 #ifdef HAVE_MMI
244     MPV_common_init_mmi(s);
245 #endif
246 #ifdef ARCH_ARMV4L
247     MPV_common_init_armv4l(s);
248 #endif
249 #ifdef ARCH_POWERPC
250     MPV_common_init_ppc(s);
251 #endif
252
253 #ifdef CONFIG_ENCODERS
254     s->fast_dct_quantize= s->dct_quantize;
255
256     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
257         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
258     }
259
260 #endif //CONFIG_ENCODERS
261
262     /* load & permutate scantables
263        note: only wmv uses differnt ones 
264     */
265     if(s->alternate_scan){
266         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
267         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
268     }else{
269         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
270         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
271     }
272     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
273     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
274
275     return 0;
276 }
277
278 static void copy_picture(Picture *dst, Picture *src){
279     *dst = *src;
280     dst->type= FF_BUFFER_TYPE_COPY;
281 }
282
283 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
284     int i;
285
286     dst->pict_type              = src->pict_type;
287     dst->quality                = src->quality;
288     dst->coded_picture_number   = src->coded_picture_number;
289     dst->display_picture_number = src->display_picture_number;
290 //    dst->reference              = src->reference;
291     dst->pts                    = src->pts;
292     dst->interlaced_frame       = src->interlaced_frame;
293     dst->top_field_first        = src->top_field_first;
294
295     if(s->avctx->me_threshold){
296         if(!src->motion_val[0])
297             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
298         if(!src->mb_type)
299             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
300         if(!src->ref_index[0])
301             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
302         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
303             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesnt match! (%d!=%d)\n",
304             src->motion_subsample_log2, dst->motion_subsample_log2);
305
306         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
307         
308         for(i=0; i<2; i++){
309             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
310             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
311
312             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
313                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
314             }
315             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
316                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
317             }
318         }
319     }
320 }
321
322 /**
323  * allocates a Picture
324  * The pixels are allocated/set by calling get_buffer() if shared=0
325  */
326 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
327     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
328     const int mb_array_size= s->mb_stride*s->mb_height;
329     const int b8_array_size= s->b8_stride*s->mb_height*2;
330     const int b4_array_size= s->b4_stride*s->mb_height*4;
331     int i;
332     
333     if(shared){
334         assert(pic->data[0]);
335         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
336         pic->type= FF_BUFFER_TYPE_SHARED;
337     }else{
338         int r;
339         
340         assert(!pic->data[0]);
341         
342         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
343         
344         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
345             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
346             return -1;
347         }
348
349         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
350             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
351             return -1;
352         }
353
354         if(pic->linesize[1] != pic->linesize[2]){
355             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride missmatch)\n");
356             return -1;
357         }
358
359         s->linesize  = pic->linesize[0];
360         s->uvlinesize= pic->linesize[1];
361     }
362     
363     if(pic->qscale_table==NULL){
364         if (s->encoding) {        
365             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
366             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
367             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
368         }
369
370         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
371         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
372         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
373         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
374         if(s->out_format == FMT_H264){
375             for(i=0; i<2; i++){
376                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+2)  * sizeof(int16_t))
377                 pic->motion_val[i]= pic->motion_val_base[i]+2;
378                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
379             }
380             pic->motion_subsample_log2= 2;
381         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
382             for(i=0; i<2; i++){
383                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+2) * sizeof(int16_t))
384                 pic->motion_val[i]= pic->motion_val_base[i]+2;
385                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
386             }
387             pic->motion_subsample_log2= 3;
388         }
389         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
390             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
391         }
392         pic->qstride= s->mb_stride;
393         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
394     }
395
396     //it might be nicer if the application would keep track of these but it would require a API change
397     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
398     s->prev_pict_types[0]= s->pict_type;
399     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
400         pic->age= INT_MAX; // skiped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
401     
402     return 0;
403 fail: //for the CHECKED_ALLOCZ macro
404     return -1;
405 }
406
407 /**
408  * deallocates a picture
409  */
410 static void free_picture(MpegEncContext *s, Picture *pic){
411     int i;
412
413     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
414         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
415     }
416
417     av_freep(&pic->mb_var);
418     av_freep(&pic->mc_mb_var);
419     av_freep(&pic->mb_mean);
420     av_freep(&pic->mbskip_table);
421     av_freep(&pic->qscale_table);
422     av_freep(&pic->mb_type_base);
423     av_freep(&pic->dct_coeff);
424     av_freep(&pic->pan_scan);
425     pic->mb_type= NULL;
426     for(i=0; i<2; i++){
427         av_freep(&pic->motion_val_base[i]);
428         av_freep(&pic->ref_index[i]);
429     }
430     
431     if(pic->type == FF_BUFFER_TYPE_SHARED){
432         for(i=0; i<4; i++){
433             pic->base[i]=
434             pic->data[i]= NULL;
435         }
436         pic->type= 0;        
437     }
438 }
439
440 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
441     int i;
442
443     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264) 
444     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
445     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
446
447      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
448     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t)) 
449     s->rd_scratchpad=   s->me.scratchpad;
450     s->b_scratchpad=    s->me.scratchpad;
451     s->obmc_scratchpad= s->me.scratchpad + 16;
452     if (s->encoding) {
453         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
454         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
455         if(s->avctx->noise_reduction){
456             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
457         }
458     }   
459     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
460     s->block= s->blocks[0];
461
462     for(i=0;i<12;i++){
463         s->pblocks[i] = (short *)(&s->block[i]);
464     }
465     return 0;
466 fail:
467     return -1; //free() through MPV_common_end()
468 }
469
470 static void free_duplicate_context(MpegEncContext *s){
471     if(s==NULL) return;
472
473     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
474     av_freep(&s->me.scratchpad);
475     s->rd_scratchpad=   
476     s->b_scratchpad=    
477     s->obmc_scratchpad= NULL;
478     
479     av_freep(&s->dct_error_sum);
480     av_freep(&s->me.map);
481     av_freep(&s->me.score_map);
482     av_freep(&s->blocks);
483     s->block= NULL;
484 }
485
486 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
487 #define COPY(a) bak->a= src->a
488     COPY(allocated_edge_emu_buffer);
489     COPY(edge_emu_buffer);
490     COPY(me.scratchpad);
491     COPY(rd_scratchpad);
492     COPY(b_scratchpad);
493     COPY(obmc_scratchpad);
494     COPY(me.map);
495     COPY(me.score_map);
496     COPY(blocks);
497     COPY(block);
498     COPY(start_mb_y);
499     COPY(end_mb_y);
500     COPY(me.map_generation);
501     COPY(pb);
502     COPY(dct_error_sum);
503     COPY(dct_count[0]);
504     COPY(dct_count[1]);
505 #undef COPY
506 }
507
508 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
509     MpegEncContext bak;
510     int i;
511     //FIXME copy only needed parts
512 //START_TIMER
513     backup_duplicate_context(&bak, dst);
514     memcpy(dst, src, sizeof(MpegEncContext));
515     backup_duplicate_context(dst, &bak);
516     for(i=0;i<12;i++){
517         dst->pblocks[i] = (short *)(&dst->block[i]);
518     }
519 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
520 }
521
522 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
523 #define COPY(a) dst->a= src->a
524     COPY(pict_type);
525     COPY(current_picture);
526     COPY(f_code);
527     COPY(b_code);
528     COPY(qscale);
529     COPY(lambda);
530     COPY(lambda2);
531     COPY(picture_in_gop_number);
532     COPY(gop_picture_number);
533     COPY(frame_pred_frame_dct); //FIXME dont set in encode_header
534     COPY(progressive_frame); //FIXME dont set in encode_header
535     COPY(partitioned_frame); //FIXME dont set in encode_header
536 #undef COPY
537 }
538
539 /**
540  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
541  * the changed fields will not depend upon the prior state of the MpegEncContext.
542  */
543 static void MPV_common_defaults(MpegEncContext *s){
544     s->y_dc_scale_table=
545     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
546     s->chroma_qscale_table= ff_default_chroma_qscale_table;
547     s->progressive_frame= 1;
548     s->progressive_sequence= 1;
549     s->picture_structure= PICT_FRAME;
550
551     s->coded_picture_number = 0;
552     s->picture_number = 0;
553     s->input_picture_number = 0;
554
555     s->picture_in_gop_number = 0;
556
557     s->f_code = 1;
558     s->b_code = 1;
559 }
560
561 /**
562  * sets the given MpegEncContext to defaults for decoding.
563  * the changed fields will not depend upon the prior state of the MpegEncContext.
564  */
565 void MPV_decode_defaults(MpegEncContext *s){
566     MPV_common_defaults(s);
567 }
568
569 /**
570  * sets the given MpegEncContext to defaults for encoding.
571  * the changed fields will not depend upon the prior state of the MpegEncContext.
572  */
573
574 #ifdef CONFIG_ENCODERS
575 static void MPV_encode_defaults(MpegEncContext *s){
576     static int done=0;
577     
578     MPV_common_defaults(s);
579     
580     if(!done){
581         int i;
582         done=1;
583
584         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
585         memset(default_mv_penalty, 0, sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1));
586         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
587
588         for(i=-16; i<16; i++){
589             default_fcode_tab[i + MAX_MV]= 1;
590         }
591     }
592     s->me.mv_penalty= default_mv_penalty;
593     s->fcode_tab= default_fcode_tab;
594 }
595 #endif //CONFIG_ENCODERS
596
597 /** 
598  * init common structure for both encoder and decoder.
599  * this assumes that some variables like width/height are already set
600  */
601 int MPV_common_init(MpegEncContext *s)
602 {
603     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
604
605     if(s->avctx->thread_count > MAX_THREADS || (16*s->avctx->thread_count > s->height && s->height)){
606         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
607         return -1;
608     }
609
610     dsputil_init(&s->dsp, s->avctx);
611     DCT_common_init(s);
612
613     s->flags= s->avctx->flags;
614     s->flags2= s->avctx->flags2;
615
616     s->mb_width  = (s->width  + 15) / 16;
617     s->mb_height = (s->height + 15) / 16;
618     s->mb_stride = s->mb_width + 1;
619     s->b8_stride = s->mb_width*2 + 1;
620     s->b4_stride = s->mb_width*4 + 1;
621     mb_array_size= s->mb_height * s->mb_stride;
622     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
623
624     /* set chroma shifts */
625     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
626                                                     &(s->chroma_y_shift) );
627
628     /* set default edge pos, will be overriden in decode_header if needed */
629     s->h_edge_pos= s->mb_width*16;
630     s->v_edge_pos= s->mb_height*16;
631
632     s->mb_num = s->mb_width * s->mb_height;
633     
634     s->block_wrap[0]=
635     s->block_wrap[1]=
636     s->block_wrap[2]=
637     s->block_wrap[3]= s->b8_stride;
638     s->block_wrap[4]=
639     s->block_wrap[5]= s->mb_stride;
640  
641     y_size = s->b8_stride * (2 * s->mb_height + 1);
642     c_size = s->mb_stride * (s->mb_height + 1);
643     yc_size = y_size + 2 * c_size;
644     
645     /* convert fourcc to upper case */
646     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)          
647                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
648                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16) 
649                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
650
651     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)          
652                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
653                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16) 
654                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
655
656     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
657
658     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
659     for(y=0; y<s->mb_height; y++){
660         for(x=0; x<s->mb_width; x++){
661             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
662         }
663     }
664     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
665     
666     if (s->encoding) {
667         /* Allocate MV tables */
668         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
669         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
670         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
671         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
672         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
673         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
674         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
675         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
676         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
677         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
678         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
679         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
680
681         if(s->msmpeg4_version){
682             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
683         }
684         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
685
686         /* Allocate MB type table */
687         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
688         
689         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
690         
691         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
692         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
693         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
694         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
695         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
696         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
697         
698         if(s->avctx->noise_reduction){
699             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
700         }
701     }
702     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
703
704     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
705     
706     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
707         /* interlaced direct mode decoding tables */
708             for(i=0; i<2; i++){
709                 int j, k;
710                 for(j=0; j<2; j++){
711                     for(k=0; k<2; k++){
712                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
713                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
714                     }
715                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
716                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
717                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
718                 }
719                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
720             }
721     }
722     if (s->out_format == FMT_H263) {
723         /* ac values */
724         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
725         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
726         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
727         s->ac_val[2] = s->ac_val[1] + c_size;
728         
729         /* cbp values */
730         CHECKED_ALLOCZ(s->coded_block_base, y_size);
731         s->coded_block= s->coded_block_base + s->b8_stride + 1;
732         
733         /* divx501 bitstream reorder buffer */
734         CHECKED_ALLOCZ(s->bitstream_buffer, BITSTREAM_BUFFER_SIZE);
735
736         /* cbp, ac_pred, pred_dir */
737         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
738         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
739     }
740     
741     if (s->h263_pred || s->h263_plus || !s->encoding) {
742         /* dc values */
743         //MN: we need these for error resilience of intra-frames
744         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
745         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
746         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
747         s->dc_val[2] = s->dc_val[1] + c_size;
748         for(i=0;i<yc_size;i++)
749             s->dc_val_base[i] = 1024;
750     }
751
752     /* which mb is a intra block */
753     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
754     memset(s->mbintra_table, 1, mb_array_size);
755     
756     /* init macroblock skip table */
757     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
758     //Note the +1 is for a quicker mpeg4 slice_end detection
759     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
760     
761     s->parse_context.state= -1;
762     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
763        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
764        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
765        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
766     }
767
768     s->context_initialized = 1;
769
770     s->thread_context[0]= s;
771     for(i=1; i<s->avctx->thread_count; i++){
772         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
773         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
774     }
775
776     for(i=0; i<s->avctx->thread_count; i++){
777         if(init_duplicate_context(s->thread_context[i], s) < 0)
778            goto fail;
779         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
780         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
781     }
782
783     return 0;
784  fail:
785     MPV_common_end(s);
786     return -1;
787 }
788
789 /* init common structure for both encoder and decoder */
790 void MPV_common_end(MpegEncContext *s)
791 {
792     int i, j, k;
793
794     for(i=0; i<s->avctx->thread_count; i++){
795         free_duplicate_context(s->thread_context[i]);
796     }
797     for(i=1; i<s->avctx->thread_count; i++){
798         av_freep(&s->thread_context[i]);
799     }
800
801     av_freep(&s->parse_context.buffer);
802     s->parse_context.buffer_size=0;
803
804     av_freep(&s->mb_type);
805     av_freep(&s->p_mv_table_base);
806     av_freep(&s->b_forw_mv_table_base);
807     av_freep(&s->b_back_mv_table_base);
808     av_freep(&s->b_bidir_forw_mv_table_base);
809     av_freep(&s->b_bidir_back_mv_table_base);
810     av_freep(&s->b_direct_mv_table_base);
811     s->p_mv_table= NULL;
812     s->b_forw_mv_table= NULL;
813     s->b_back_mv_table= NULL;
814     s->b_bidir_forw_mv_table= NULL;
815     s->b_bidir_back_mv_table= NULL;
816     s->b_direct_mv_table= NULL;
817     for(i=0; i<2; i++){
818         for(j=0; j<2; j++){
819             for(k=0; k<2; k++){
820                 av_freep(&s->b_field_mv_table_base[i][j][k]);
821                 s->b_field_mv_table[i][j][k]=NULL;
822             }
823             av_freep(&s->b_field_select_table[i][j]);
824             av_freep(&s->p_field_mv_table_base[i][j]);
825             s->p_field_mv_table[i][j]=NULL;
826         }
827         av_freep(&s->p_field_select_table[i]);
828     }
829     
830     av_freep(&s->dc_val_base);
831     av_freep(&s->ac_val_base);
832     av_freep(&s->coded_block_base);
833     av_freep(&s->mbintra_table);
834     av_freep(&s->cbp_table);
835     av_freep(&s->pred_dir_table);
836     
837     av_freep(&s->mbskip_table);
838     av_freep(&s->prev_pict_types);
839     av_freep(&s->bitstream_buffer);
840     av_freep(&s->avctx->stats_out);
841     av_freep(&s->ac_stats);
842     av_freep(&s->error_status_table);
843     av_freep(&s->mb_index2xy);
844     av_freep(&s->lambda_table);
845     av_freep(&s->q_intra_matrix);
846     av_freep(&s->q_inter_matrix);
847     av_freep(&s->q_intra_matrix16);
848     av_freep(&s->q_inter_matrix16);
849     av_freep(&s->input_picture);
850     av_freep(&s->reordered_input_picture);
851     av_freep(&s->dct_offset);
852
853     if(s->picture){
854         for(i=0; i<MAX_PICTURE_COUNT; i++){
855             free_picture(s, &s->picture[i]);
856         }
857     }
858     av_freep(&s->picture);
859     s->context_initialized = 0;
860     s->last_picture_ptr=
861     s->next_picture_ptr=
862     s->current_picture_ptr= NULL;
863     s->linesize= s->uvlinesize= 0;
864
865     for(i=0; i<3; i++)
866         av_freep(&s->visualization_buffer[i]);
867
868     avcodec_default_free_buffers(s->avctx);
869 }
870
871 #ifdef CONFIG_ENCODERS
872
873 /* init video encoder */
874 int MPV_encode_init(AVCodecContext *avctx)
875 {
876     MpegEncContext *s = avctx->priv_data;
877     int i, dummy;
878     int chroma_h_shift, chroma_v_shift;
879     
880     MPV_encode_defaults(s);
881
882     avctx->pix_fmt = PIX_FMT_YUV420P; // FIXME
883
884     s->bit_rate = avctx->bit_rate;
885     s->width = avctx->width;
886     s->height = avctx->height;
887     if(avctx->gop_size > 600){
888         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
889         avctx->gop_size=600;
890     }
891     s->gop_size = avctx->gop_size;
892     s->avctx = avctx;
893     s->flags= avctx->flags;
894     s->flags2= avctx->flags2;
895     s->max_b_frames= avctx->max_b_frames;
896     s->codec_id= avctx->codec->id;
897     s->luma_elim_threshold  = avctx->luma_elim_threshold;
898     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
899     s->strict_std_compliance= avctx->strict_std_compliance;
900     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
901     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
902     s->mpeg_quant= avctx->mpeg_quant;
903     s->rtp_mode= !!avctx->rtp_payload_size;
904     s->intra_dc_precision= avctx->intra_dc_precision;
905
906     if (s->gop_size <= 1) {
907         s->intra_only = 1;
908         s->gop_size = 12;
909     } else {
910         s->intra_only = 0;
911     }
912
913     s->me_method = avctx->me_method;
914
915     /* Fixed QSCALE */
916     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
917     
918     s->adaptive_quant= (   s->avctx->lumi_masking
919                         || s->avctx->dark_masking
920                         || s->avctx->temporal_cplx_masking 
921                         || s->avctx->spatial_cplx_masking
922                         || s->avctx->p_masking
923                         || (s->flags&CODEC_FLAG_QP_RD))
924                        && !s->fixed_qscale;
925     
926     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
927     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
928     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
929
930     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
931         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
932         return -1;
933     }    
934
935     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
936         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isnt recommanded!\n");
937     }
938     
939     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
940         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
941         return -1;
942     }
943     
944     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
945         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
946         return -1;
947     }
948         
949     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate 
950        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
951        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
952         
953         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
954     }
955        
956     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4 
957        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
958         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
959         return -1;
960     }
961         
962     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
963         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decission\n");
964         return -1;
965     }
966     
967     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
968         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
969         return -1;
970     }
971     
972     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
973         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
974         return -1;
975     }
976
977     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
978         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
979         return -1;
980     }
981     
982     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
983         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
984         return -1;
985     }
986
987     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN)) 
988        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
989         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
990         return -1;
991     }
992         
993     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
994         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supporetd by codec\n");
995         return -1;
996     }
997         
998     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
999         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1000         return -1;
1001     }
1002
1003     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1004         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1005         return -1;
1006     }
1007     
1008     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1009         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1010         return -1;
1011     }
1012     
1013     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4 
1014        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO 
1015        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1016         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1017         return -1;
1018     }
1019     
1020     if(s->avctx->thread_count > 1)
1021         s->rtp_mode= 1;
1022
1023     i= ff_gcd(avctx->frame_rate, avctx->frame_rate_base);
1024     if(i > 1){
1025         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1026         avctx->frame_rate /= i;
1027         avctx->frame_rate_base /= i;
1028 //        return -1;
1029     }
1030     
1031     if(s->codec_id==CODEC_ID_MJPEG){
1032         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1033         s->inter_quant_bias= 0;
1034     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1035         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1036         s->inter_quant_bias= 0;
1037     }else{
1038         s->intra_quant_bias=0;
1039         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1040     }
1041     
1042     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1043         s->intra_quant_bias= avctx->intra_quant_bias;
1044     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1045         s->inter_quant_bias= avctx->inter_quant_bias;
1046         
1047     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1048
1049     av_reduce(&s->time_increment_resolution, &dummy, s->avctx->frame_rate, s->avctx->frame_rate_base, (1<<16)-1);
1050     s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
1051
1052     switch(avctx->codec->id) {
1053     case CODEC_ID_MPEG1VIDEO:
1054         s->out_format = FMT_MPEG1;
1055         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1056         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1057         break;
1058     case CODEC_ID_MPEG2VIDEO:
1059         s->out_format = FMT_MPEG1;
1060         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1061         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1062         s->rtp_mode= 1;
1063         break;
1064     case CODEC_ID_LJPEG:
1065     case CODEC_ID_MJPEG:
1066         s->out_format = FMT_MJPEG;
1067         s->intra_only = 1; /* force intra only for jpeg */
1068         s->mjpeg_write_tables = 1; /* write all tables */
1069         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1070         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1071         s->mjpeg_vsample[1] = 1;
1072         s->mjpeg_vsample[2] = 1; 
1073         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1074         s->mjpeg_hsample[1] = 1; 
1075         s->mjpeg_hsample[2] = 1; 
1076         if (mjpeg_init(s) < 0)
1077             return -1;
1078         avctx->delay=0;
1079         s->low_delay=1;
1080         break;
1081 #ifdef CONFIG_RISKY
1082     case CODEC_ID_H263:
1083         if (h263_get_picture_format(s->width, s->height) == 7) {
1084             av_log(avctx, AV_LOG_INFO, "Input picture size isn't suitable for h263 codec! try h263+\n");
1085             return -1;
1086         }
1087         s->out_format = FMT_H263;
1088         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1089         avctx->delay=0;
1090         s->low_delay=1;
1091         break;
1092     case CODEC_ID_H263P:
1093         s->out_format = FMT_H263;
1094         s->h263_plus = 1;
1095         /* Fx */
1096         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1097         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1098         s->modified_quant= s->h263_aic;
1099         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1100         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1101         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1102         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1103         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1104
1105         /* /Fx */
1106         /* These are just to be sure */
1107         avctx->delay=0;
1108         s->low_delay=1;
1109         break;
1110     case CODEC_ID_FLV1:
1111         s->out_format = FMT_H263;
1112         s->h263_flv = 2; /* format = 1; 11-bit codes */
1113         s->unrestricted_mv = 1;
1114         s->rtp_mode=0; /* don't allow GOB */
1115         avctx->delay=0;
1116         s->low_delay=1;
1117         break;
1118     case CODEC_ID_RV10:
1119         s->out_format = FMT_H263;
1120         avctx->delay=0;
1121         s->low_delay=1;
1122         break;
1123     case CODEC_ID_MPEG4:
1124         s->out_format = FMT_H263;
1125         s->h263_pred = 1;
1126         s->unrestricted_mv = 1;
1127         s->low_delay= s->max_b_frames ? 0 : 1;
1128         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1129         break;
1130     case CODEC_ID_MSMPEG4V1:
1131         s->out_format = FMT_H263;
1132         s->h263_msmpeg4 = 1;
1133         s->h263_pred = 1;
1134         s->unrestricted_mv = 1;
1135         s->msmpeg4_version= 1;
1136         avctx->delay=0;
1137         s->low_delay=1;
1138         break;
1139     case CODEC_ID_MSMPEG4V2:
1140         s->out_format = FMT_H263;
1141         s->h263_msmpeg4 = 1;
1142         s->h263_pred = 1;
1143         s->unrestricted_mv = 1;
1144         s->msmpeg4_version= 2;
1145         avctx->delay=0;
1146         s->low_delay=1;
1147         break;
1148     case CODEC_ID_MSMPEG4V3:
1149         s->out_format = FMT_H263;
1150         s->h263_msmpeg4 = 1;
1151         s->h263_pred = 1;
1152         s->unrestricted_mv = 1;
1153         s->msmpeg4_version= 3;
1154         s->flipflop_rounding=1;
1155         avctx->delay=0;
1156         s->low_delay=1;
1157         break;
1158     case CODEC_ID_WMV1:
1159         s->out_format = FMT_H263;
1160         s->h263_msmpeg4 = 1;
1161         s->h263_pred = 1;
1162         s->unrestricted_mv = 1;
1163         s->msmpeg4_version= 4;
1164         s->flipflop_rounding=1;
1165         avctx->delay=0;
1166         s->low_delay=1;
1167         break;
1168     case CODEC_ID_WMV2:
1169         s->out_format = FMT_H263;
1170         s->h263_msmpeg4 = 1;
1171         s->h263_pred = 1;
1172         s->unrestricted_mv = 1;
1173         s->msmpeg4_version= 5;
1174         s->flipflop_rounding=1;
1175         avctx->delay=0;
1176         s->low_delay=1;
1177         break;
1178 #endif
1179     default:
1180         return -1;
1181     }
1182     
1183     avctx->has_b_frames= !s->low_delay;
1184
1185     s->encoding = 1;
1186
1187     /* init */
1188     if (MPV_common_init(s) < 0)
1189         return -1;
1190
1191     if(s->modified_quant)
1192         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1193     s->progressive_frame= 
1194     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1195     s->quant_precision=5;
1196     
1197     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1198     
1199 #ifdef CONFIG_ENCODERS
1200 #ifdef CONFIG_RISKY
1201     if (s->out_format == FMT_H263)
1202         h263_encode_init(s);
1203     if(s->msmpeg4_version)
1204         ff_msmpeg4_encode_init(s);
1205 #endif
1206     if (s->out_format == FMT_MPEG1)
1207         ff_mpeg1_encode_init(s);
1208 #endif
1209
1210     /* init q matrix */
1211     for(i=0;i<64;i++) {
1212         int j= s->dsp.idct_permutation[i];
1213 #ifdef CONFIG_RISKY
1214         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1215             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1216             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1217         }else if(s->out_format == FMT_H263){
1218             s->intra_matrix[j] =
1219             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1220         }else
1221 #endif
1222         { /* mpeg1/2 */
1223             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1224             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1225         }
1226         if(s->avctx->intra_matrix)
1227             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1228         if(s->avctx->inter_matrix)
1229             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1230     }
1231
1232     /* precompute matrix */
1233     /* for mjpeg, we do include qscale in the matrix */
1234     if (s->out_format != FMT_MJPEG) {
1235         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, 
1236                        s->intra_matrix, s->intra_quant_bias, 1, 31);
1237         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16, 
1238                        s->inter_matrix, s->inter_quant_bias, 1, 31);
1239     }
1240
1241     if(ff_rate_control_init(s) < 0)
1242         return -1;
1243     
1244     return 0;
1245 }
1246
1247 int MPV_encode_end(AVCodecContext *avctx)
1248 {
1249     MpegEncContext *s = avctx->priv_data;
1250
1251 #ifdef STATS
1252     print_stats();
1253 #endif
1254
1255     ff_rate_control_uninit(s);
1256
1257     MPV_common_end(s);
1258     if (s->out_format == FMT_MJPEG)
1259         mjpeg_close(s);
1260
1261     av_freep(&avctx->extradata);
1262       
1263     return 0;
1264 }
1265
1266 #endif //CONFIG_ENCODERS
1267
1268 void init_rl(RLTable *rl)
1269 {
1270     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1271     uint8_t index_run[MAX_RUN+1];
1272     int last, run, level, start, end, i;
1273
1274     /* compute max_level[], max_run[] and index_run[] */
1275     for(last=0;last<2;last++) {
1276         if (last == 0) {
1277             start = 0;
1278             end = rl->last;
1279         } else {
1280             start = rl->last;
1281             end = rl->n;
1282         }
1283
1284         memset(max_level, 0, MAX_RUN + 1);
1285         memset(max_run, 0, MAX_LEVEL + 1);
1286         memset(index_run, rl->n, MAX_RUN + 1);
1287         for(i=start;i<end;i++) {
1288             run = rl->table_run[i];
1289             level = rl->table_level[i];
1290             if (index_run[run] == rl->n)
1291                 index_run[run] = i;
1292             if (level > max_level[run])
1293                 max_level[run] = level;
1294             if (run > max_run[level])
1295                 max_run[level] = run;
1296         }
1297         rl->max_level[last] = av_malloc(MAX_RUN + 1);
1298         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1299         rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1300         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1301         rl->index_run[last] = av_malloc(MAX_RUN + 1);
1302         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1303     }
1304 }
1305
1306 /* draw the edges of width 'w' of an image of size width, height */
1307 //FIXME check that this is ok for mpeg4 interlaced
1308 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1309 {
1310     uint8_t *ptr, *last_line;
1311     int i;
1312
1313     last_line = buf + (height - 1) * wrap;
1314     for(i=0;i<w;i++) {
1315         /* top and bottom */
1316         memcpy(buf - (i + 1) * wrap, buf, width);
1317         memcpy(last_line + (i + 1) * wrap, last_line, width);
1318     }
1319     /* left and right */
1320     ptr = buf;
1321     for(i=0;i<height;i++) {
1322         memset(ptr - w, ptr[0], w);
1323         memset(ptr + width, ptr[width-1], w);
1324         ptr += wrap;
1325     }
1326     /* corners */
1327     for(i=0;i<w;i++) {
1328         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1329         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1330         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1331         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1332     }
1333 }
1334
1335 int ff_find_unused_picture(MpegEncContext *s, int shared){
1336     int i;
1337     
1338     if(shared){
1339         for(i=0; i<MAX_PICTURE_COUNT; i++){
1340             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1341         }
1342     }else{
1343         for(i=0; i<MAX_PICTURE_COUNT; i++){
1344             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1345         }
1346         for(i=0; i<MAX_PICTURE_COUNT; i++){
1347             if(s->picture[i].data[0]==NULL) return i;
1348         }
1349     }
1350
1351     assert(0);
1352     return -1;
1353 }
1354
1355 static void update_noise_reduction(MpegEncContext *s){
1356     int intra, i;
1357
1358     for(intra=0; intra<2; intra++){
1359         if(s->dct_count[intra] > (1<<16)){
1360             for(i=0; i<64; i++){
1361                 s->dct_error_sum[intra][i] >>=1;
1362             }
1363             s->dct_count[intra] >>= 1;
1364         }
1365         
1366         for(i=0; i<64; i++){
1367             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1368         }
1369     }
1370 }
1371
1372 /**
1373  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1374  */
1375 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1376 {
1377     int i;
1378     AVFrame *pic;
1379     s->mb_skiped = 0;
1380
1381     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1382
1383     /* mark&release old frames */
1384     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1385         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1386
1387         /* release forgotten pictures */
1388         /* if(mpeg124/h263) */
1389         if(!s->encoding){
1390             for(i=0; i<MAX_PICTURE_COUNT; i++){
1391                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1392                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1393                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);                
1394                 }
1395             }
1396         }
1397     }
1398 alloc:
1399     if(!s->encoding){
1400         /* release non refernce frames */
1401         for(i=0; i<MAX_PICTURE_COUNT; i++){
1402             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1403                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1404             }
1405         }
1406
1407         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1408             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1409         else{
1410             i= ff_find_unused_picture(s, 0);
1411             pic= (AVFrame*)&s->picture[i];
1412         }
1413
1414         pic->reference= s->pict_type != B_TYPE && !s->dropable ? 3 : 0;
1415
1416         pic->coded_picture_number= s->coded_picture_number++;
1417         
1418         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1419             return -1;
1420
1421         s->current_picture_ptr= (Picture*)pic;
1422         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1423         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1424     }
1425
1426     s->current_picture_ptr->pict_type= s->pict_type;
1427 //    if(s->flags && CODEC_FLAG_QSCALE) 
1428   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1429     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1430
1431     copy_picture(&s->current_picture, s->current_picture_ptr);
1432   
1433   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1434     if (s->pict_type != B_TYPE) {
1435         s->last_picture_ptr= s->next_picture_ptr;
1436         if(!s->dropable)
1437             s->next_picture_ptr= s->current_picture_ptr;
1438     }
1439 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1440         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL, 
1441         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL, 
1442         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1443         s->pict_type, s->dropable);*/
1444     
1445     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1446     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1447     
1448     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1449         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1450         assert(s->pict_type != B_TYPE); //these should have been dropped if we dont have a reference
1451         goto alloc;
1452     }
1453
1454     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1455
1456     if(s->picture_structure!=PICT_FRAME){
1457         int i;
1458         for(i=0; i<4; i++){
1459             if(s->picture_structure == PICT_BOTTOM_FIELD){
1460                  s->current_picture.data[i] += s->current_picture.linesize[i];
1461             } 
1462             s->current_picture.linesize[i] *= 2;
1463             s->last_picture.linesize[i] *=2;
1464             s->next_picture.linesize[i] *=2;
1465         }
1466     }
1467   }
1468    
1469     s->hurry_up= s->avctx->hurry_up;
1470     s->error_resilience= avctx->error_resilience;
1471
1472     /* set dequantizer, we cant do it during init as it might change for mpeg4
1473        and we cant do it in the header decode as init isnt called for mpeg4 there yet */
1474     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1475         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1476         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1477     }else if(s->out_format == FMT_H263){
1478         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1479         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1480     }else if(s->out_format == FMT_H261){
1481         s->dct_unquantize_intra = s->dct_unquantize_h261_intra;
1482         s->dct_unquantize_inter = s->dct_unquantize_h261_inter;
1483     }else{
1484         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1485         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1486     }
1487
1488     if(s->dct_error_sum){
1489         assert(s->avctx->noise_reduction && s->encoding);
1490
1491         update_noise_reduction(s);
1492     }
1493         
1494 #ifdef HAVE_XVMC
1495     if(s->avctx->xvmc_acceleration)
1496         return XVMC_field_start(s, avctx);
1497 #endif
1498     return 0;
1499 }
1500
1501 /* generic function for encode/decode called after a frame has been coded/decoded */
1502 void MPV_frame_end(MpegEncContext *s)
1503 {
1504     int i;
1505     /* draw edge for correct motion prediction if outside */
1506 #ifdef HAVE_XVMC
1507 //just to make sure that all data is rendered.
1508     if(s->avctx->xvmc_acceleration){
1509         XVMC_field_end(s);
1510     }else
1511 #endif
1512     if(s->unrestricted_mv && s->pict_type != B_TYPE && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1513             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1514             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1515             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1516     }
1517     emms_c();
1518     
1519     s->last_pict_type    = s->pict_type;
1520     if(s->pict_type!=B_TYPE){
1521         s->last_non_b_pict_type= s->pict_type;
1522     }
1523 #if 0
1524         /* copy back current_picture variables */
1525     for(i=0; i<MAX_PICTURE_COUNT; i++){
1526         if(s->picture[i].data[0] == s->current_picture.data[0]){
1527             s->picture[i]= s->current_picture;
1528             break;
1529         }    
1530     }
1531     assert(i<MAX_PICTURE_COUNT);
1532 #endif    
1533
1534     if(s->encoding){
1535         /* release non refernce frames */
1536         for(i=0; i<MAX_PICTURE_COUNT; i++){
1537             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1538                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1539             }
1540         }
1541     }
1542     // clear copies, to avoid confusion
1543 #if 0
1544     memset(&s->last_picture, 0, sizeof(Picture));
1545     memset(&s->next_picture, 0, sizeof(Picture));
1546     memset(&s->current_picture, 0, sizeof(Picture));
1547 #endif
1548     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1549 }
1550
1551 /**
1552  * draws an line from (ex, ey) -> (sx, sy).
1553  * @param w width of the image
1554  * @param h height of the image
1555  * @param stride stride/linesize of the image
1556  * @param color color of the arrow
1557  */
1558 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1559     int t, x, y, fr, f;
1560     
1561     sx= clip(sx, 0, w-1);
1562     sy= clip(sy, 0, h-1);
1563     ex= clip(ex, 0, w-1);
1564     ey= clip(ey, 0, h-1);
1565     
1566     buf[sy*stride + sx]+= color;
1567     
1568     if(ABS(ex - sx) > ABS(ey - sy)){
1569         if(sx > ex){
1570             t=sx; sx=ex; ex=t;
1571             t=sy; sy=ey; ey=t;
1572         }
1573         buf+= sx + sy*stride;
1574         ex-= sx;
1575         f= ((ey-sy)<<16)/ex;
1576         for(x= 0; x <= ex; x++){
1577             y = (x*f)>>16;
1578             fr= (x*f)&0xFFFF;
1579             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1580             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1581         }
1582     }else{
1583         if(sy > ey){
1584             t=sx; sx=ex; ex=t;
1585             t=sy; sy=ey; ey=t;
1586         }
1587         buf+= sx + sy*stride;
1588         ey-= sy;
1589         if(ey) f= ((ex-sx)<<16)/ey;
1590         else   f= 0;
1591         for(y= 0; y <= ey; y++){
1592             x = (y*f)>>16;
1593             fr= (y*f)&0xFFFF;
1594             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1595             buf[y*stride + x+1]+= (color*         fr )>>16;;
1596         }
1597     }
1598 }
1599
1600 /**
1601  * draws an arrow from (ex, ey) -> (sx, sy).
1602  * @param w width of the image
1603  * @param h height of the image
1604  * @param stride stride/linesize of the image
1605  * @param color color of the arrow
1606  */
1607 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){ 
1608     int dx,dy;
1609
1610     sx= clip(sx, -100, w+100);
1611     sy= clip(sy, -100, h+100);
1612     ex= clip(ex, -100, w+100);
1613     ey= clip(ey, -100, h+100);
1614     
1615     dx= ex - sx;
1616     dy= ey - sy;
1617     
1618     if(dx*dx + dy*dy > 3*3){
1619         int rx=  dx + dy;
1620         int ry= -dx + dy;
1621         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1622         
1623         //FIXME subpixel accuracy
1624         rx= ROUNDED_DIV(rx*3<<4, length);
1625         ry= ROUNDED_DIV(ry*3<<4, length);
1626         
1627         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1628         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1629     }
1630     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1631 }
1632
1633 /**
1634  * prints debuging info for the given picture.
1635  */
1636 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1637
1638     if(!pict || !pict->mb_type) return;
1639
1640     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1641         int x,y;
1642         
1643         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1644         switch (pict->pict_type) {
1645             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1646             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1647             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1648             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1649             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1650             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;            
1651         }
1652         for(y=0; y<s->mb_height; y++){
1653             for(x=0; x<s->mb_width; x++){
1654                 if(s->avctx->debug&FF_DEBUG_SKIP){
1655                     int count= s->mbskip_table[x + y*s->mb_stride];
1656                     if(count>9) count=9;
1657                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1658                 }
1659                 if(s->avctx->debug&FF_DEBUG_QP){
1660                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1661                 }
1662                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1663                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1664                     //Type & MV direction
1665                     if(IS_PCM(mb_type))
1666                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1667                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1668                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1669                     else if(IS_INTRA4x4(mb_type))
1670                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1671                     else if(IS_INTRA16x16(mb_type))
1672                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1673                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1674                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1675                     else if(IS_DIRECT(mb_type))
1676                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1677                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1678                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1679                     else if(IS_GMC(mb_type))
1680                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1681                     else if(IS_SKIP(mb_type))
1682                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1683                     else if(!USES_LIST(mb_type, 1))
1684                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1685                     else if(!USES_LIST(mb_type, 0))
1686                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1687                     else{
1688                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1689                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1690                     }
1691                     
1692                     //segmentation
1693                     if(IS_8X8(mb_type))
1694                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1695                     else if(IS_16X8(mb_type))
1696                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1697                     else if(IS_8X16(mb_type))
1698                         av_log(s->avctx, AV_LOG_DEBUG, "¦");
1699                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1700                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1701                     else
1702                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1703                     
1704                         
1705                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1706                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1707                     else
1708                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1709                 }
1710 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1711             }
1712             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1713         }
1714     }
1715
1716     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1717         const int shift= 1 + s->quarter_sample;
1718         int mb_y;
1719         uint8_t *ptr;
1720         int i;
1721         int h_chroma_shift, v_chroma_shift;
1722         s->low_delay=0; //needed to see the vectors without trashing the buffers
1723
1724         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1725         for(i=0; i<3; i++){
1726             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*s->height:pict->linesize[i]*s->height >> v_chroma_shift);
1727             pict->data[i]= s->visualization_buffer[i];
1728         }
1729         pict->type= FF_BUFFER_TYPE_COPY;
1730         ptr= pict->data[0];
1731
1732         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1733             int mb_x;
1734             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1735                 const int mb_index= mb_x + mb_y*s->mb_stride;
1736                 if((s->avctx->debug_mv) && pict->motion_val){
1737                   int type;
1738                   for(type=0; type<3; type++){
1739                     int direction = 0;
1740                     switch (type) {
1741                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1742                                 continue;
1743                               direction = 0;
1744                               break;
1745                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1746                                 continue;
1747                               direction = 0;
1748                               break;
1749                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1750                                 continue;
1751                               direction = 1;
1752                               break;
1753                     }
1754                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1755                         continue;
1756
1757                     //FIXME for h264
1758                     if(IS_8X8(pict->mb_type[mb_index])){
1759                       int i;
1760                       for(i=0; i<4; i++){
1761                         int sx= mb_x*16 + 4 + 8*(i&1);
1762                         int sy= mb_y*16 + 4 + 8*(i>>1);
1763                         int xy= mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*s->b8_stride;
1764                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1765                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1766                         draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
1767                       }
1768                     }else if(IS_16X8(pict->mb_type[mb_index])){
1769                       int i;
1770                       for(i=0; i<2; i++){
1771                         int sx=mb_x*16 + 8;
1772                         int sy=mb_y*16 + 4 + 8*i;
1773                         int xy= mb_x*2 + (mb_y*2 + i)*s->b8_stride;
1774                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1775                         int my=(pict->motion_val[direction][xy][1]>>shift);
1776                         
1777                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1778                             my*=2;
1779                         
1780                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, s->width, s->height, s->linesize, 100);
1781                       }
1782                     }else{
1783                       int sx= mb_x*16 + 8;
1784                       int sy= mb_y*16 + 8;
1785                       int xy= mb_x*2 + mb_y*2*s->b8_stride;
1786                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1787                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1788                       draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
1789                     }
1790                   }                  
1791                 }
1792                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1793                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1794                     int y;
1795                     for(y=0; y<8; y++){
1796                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1797                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1798                     }
1799                 }
1800                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1801                     int mb_type= pict->mb_type[mb_index];
1802                     uint64_t u,v;
1803                     int y;
1804 #define COLOR(theta, r)\
1805 u= (int)(128 + r*cos(theta*3.141592/180));\
1806 v= (int)(128 + r*sin(theta*3.141592/180));
1807
1808                     
1809                     u=v=128;
1810                     if(IS_PCM(mb_type)){
1811                         COLOR(120,48)
1812                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1813                         COLOR(30,48)
1814                     }else if(IS_INTRA4x4(mb_type)){
1815                         COLOR(90,48)
1816                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1817 //                        COLOR(120,48)
1818                     }else if(IS_DIRECT(mb_type)){
1819                         COLOR(150,48)
1820                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1821                         COLOR(170,48)
1822                     }else if(IS_GMC(mb_type)){
1823                         COLOR(190,48)
1824                     }else if(IS_SKIP(mb_type)){
1825 //                        COLOR(180,48)
1826                     }else if(!USES_LIST(mb_type, 1)){
1827                         COLOR(240,48)
1828                     }else if(!USES_LIST(mb_type, 0)){
1829                         COLOR(0,48)
1830                     }else{
1831                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1832                         COLOR(300,48)
1833                     }
1834
1835                     u*= 0x0101010101010101ULL;
1836                     v*= 0x0101010101010101ULL;
1837                     for(y=0; y<8; y++){
1838                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1839                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1840                     }
1841
1842                     //segmentation
1843                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1844                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1845                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1846                     }
1847                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1848                         for(y=0; y<16; y++)
1849                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1850                     }
1851                         
1852                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1853                         // hmm
1854                     }
1855                 }
1856                 s->mbskip_table[mb_index]=0;
1857             }
1858         }
1859     }
1860 }
1861
1862 #ifdef CONFIG_ENCODERS
1863
1864 static int get_sae(uint8_t *src, int ref, int stride){
1865     int x,y;
1866     int acc=0;
1867     
1868     for(y=0; y<16; y++){
1869         for(x=0; x<16; x++){
1870             acc+= ABS(src[x+y*stride] - ref);
1871         }
1872     }
1873     
1874     return acc;
1875 }
1876
1877 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
1878     int x, y, w, h;
1879     int acc=0;
1880     
1881     w= s->width &~15;
1882     h= s->height&~15;
1883     
1884     for(y=0; y<h; y+=16){
1885         for(x=0; x<w; x+=16){
1886             int offset= x + y*stride;
1887             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
1888             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1889             int sae = get_sae(src + offset, mean, stride);
1890             
1891             acc+= sae + 500 < sad;
1892         }
1893     }
1894     return acc;
1895 }
1896
1897
1898 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
1899     AVFrame *pic=NULL;
1900     int i;
1901     const int encoding_delay= s->max_b_frames;
1902     int direct=1;
1903     
1904   if(pic_arg){
1905     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
1906     if(pic_arg->linesize[0] != s->linesize) direct=0;
1907     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
1908     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
1909   
1910 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
1911     
1912     if(direct){
1913         i= ff_find_unused_picture(s, 1);
1914
1915         pic= (AVFrame*)&s->picture[i];
1916         pic->reference= 3;
1917     
1918         for(i=0; i<4; i++){
1919             pic->data[i]= pic_arg->data[i];
1920             pic->linesize[i]= pic_arg->linesize[i];
1921         }
1922         alloc_picture(s, (Picture*)pic, 1);
1923     }else{
1924         int offset= 16;
1925         i= ff_find_unused_picture(s, 0);
1926
1927         pic= (AVFrame*)&s->picture[i];
1928         pic->reference= 3;
1929
1930         alloc_picture(s, (Picture*)pic, 0);
1931
1932         if(   pic->data[0] + offset == pic_arg->data[0] 
1933            && pic->data[1] + offset == pic_arg->data[1]
1934            && pic->data[2] + offset == pic_arg->data[2]){
1935        // empty
1936         }else{
1937             int h_chroma_shift, v_chroma_shift;
1938             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1939         
1940             for(i=0; i<3; i++){
1941                 int src_stride= pic_arg->linesize[i];
1942                 int dst_stride= i ? s->uvlinesize : s->linesize;
1943                 int h_shift= i ? h_chroma_shift : 0;
1944                 int v_shift= i ? v_chroma_shift : 0;
1945                 int w= s->width >>h_shift;
1946                 int h= s->height>>v_shift;
1947                 uint8_t *src= pic_arg->data[i];
1948                 uint8_t *dst= pic->data[i] + offset;
1949             
1950                 if(src_stride==dst_stride)
1951                     memcpy(dst, src, src_stride*h);
1952                 else{
1953                     while(h--){
1954                         memcpy(dst, src, w);
1955                         dst += dst_stride;
1956                         src += src_stride;
1957                     }
1958                 }
1959             }
1960         }
1961     }
1962     copy_picture_attributes(s, pic, pic_arg);
1963     
1964     pic->display_picture_number= s->input_picture_number++;
1965     if(pic->pts != AV_NOPTS_VALUE){ 
1966         s->user_specified_pts= pic->pts;
1967     }else{
1968         if(s->user_specified_pts){
1969             pic->pts= s->user_specified_pts + AV_TIME_BASE*(int64_t)s->avctx->frame_rate_base / s->avctx->frame_rate;
1970             av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%Ld)\n", pic->pts);
1971         }else{
1972             pic->pts= av_rescale(pic->display_picture_number*(int64_t)s->avctx->frame_rate_base, AV_TIME_BASE, s->avctx->frame_rate);
1973         }
1974     }
1975   }
1976   
1977     /* shift buffer entries */
1978     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
1979         s->input_picture[i-1]= s->input_picture[i];
1980         
1981     s->input_picture[encoding_delay]= (Picture*)pic;
1982
1983     return 0;
1984 }
1985
1986 static void select_input_picture(MpegEncContext *s){
1987     int i;
1988
1989     for(i=1; i<MAX_PICTURE_COUNT; i++)
1990         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
1991     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
1992
1993     /* set next picture types & ordering */
1994     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
1995         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
1996             s->reordered_input_picture[0]= s->input_picture[0];
1997             s->reordered_input_picture[0]->pict_type= I_TYPE;
1998             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
1999         }else{
2000             int b_frames;
2001             
2002             if(s->flags&CODEC_FLAG_PASS2){
2003                 for(i=0; i<s->max_b_frames+1; i++){
2004                     int pict_num= s->input_picture[0]->display_picture_number + i;
2005                     int pict_type= s->rc_context.entry[pict_num].new_pict_type;
2006                     s->input_picture[i]->pict_type= pict_type;
2007                     
2008                     if(i + 1 >= s->rc_context.num_entries) break;
2009                 }
2010             }
2011
2012             if(s->input_picture[0]->pict_type){
2013                 /* user selected pict_type */
2014                 for(b_frames=0; b_frames<s->max_b_frames+1; b_frames++){
2015                     if(s->input_picture[b_frames]->pict_type!=B_TYPE) break;
2016                 }
2017             
2018                 if(b_frames > s->max_b_frames){
2019                     av_log(s->avctx, AV_LOG_ERROR, "warning, too many bframes in a row\n");
2020                     b_frames = s->max_b_frames;
2021                 }
2022             }else if(s->avctx->b_frame_strategy==0){
2023                 b_frames= s->max_b_frames;
2024                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2025             }else if(s->avctx->b_frame_strategy==1){
2026                 for(i=1; i<s->max_b_frames+1; i++){
2027                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2028                         s->input_picture[i]->b_frame_score= 
2029                             get_intra_count(s, s->input_picture[i  ]->data[0], 
2030                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2031                     }
2032                 }
2033                 for(i=0; i<s->max_b_frames; i++){
2034                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
2035                 }
2036                                 
2037                 b_frames= FFMAX(0, i-1);
2038                 
2039                 /* reset scores */
2040                 for(i=0; i<b_frames+1; i++){
2041                     s->input_picture[i]->b_frame_score=0;
2042                 }
2043             }else{
2044                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2045                 b_frames=0;
2046             }
2047
2048             emms_c();
2049 //static int b_count=0;
2050 //b_count+= b_frames;
2051 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2052             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2053                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2054                     b_frames=0;
2055                 s->input_picture[b_frames]->pict_type= I_TYPE;
2056             }
2057             
2058             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2059                && b_frames
2060                && s->input_picture[b_frames]->pict_type== I_TYPE)
2061                 b_frames--;
2062
2063             s->reordered_input_picture[0]= s->input_picture[b_frames];
2064             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2065                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2066             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2067             for(i=0; i<b_frames; i++){
2068                 s->reordered_input_picture[i+1]= s->input_picture[i];
2069                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2070                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2071             }
2072         }
2073     }
2074     
2075     if(s->reordered_input_picture[0]){
2076         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2077
2078         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2079
2080         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2081             // input is a shared pix, so we cant modifiy it -> alloc a new one & ensure that the shared one is reuseable
2082         
2083             int i= ff_find_unused_picture(s, 0);
2084             Picture *pic= &s->picture[i];
2085
2086             /* mark us unused / free shared pic */
2087             for(i=0; i<4; i++)
2088                 s->reordered_input_picture[0]->data[i]= NULL;
2089             s->reordered_input_picture[0]->type= 0;
2090             
2091             pic->reference              = s->reordered_input_picture[0]->reference;
2092             
2093             alloc_picture(s, pic, 0);
2094
2095             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2096
2097             s->current_picture_ptr= pic;
2098         }else{
2099             // input is not a shared pix -> reuse buffer for current_pix
2100
2101             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER 
2102                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2103             
2104             s->current_picture_ptr= s->reordered_input_picture[0];
2105             for(i=0; i<4; i++){
2106                 s->new_picture.data[i]+=16;
2107             }
2108         }
2109         copy_picture(&s->current_picture, s->current_picture_ptr);
2110     
2111         s->picture_number= s->new_picture.display_picture_number;
2112 //printf("dpn:%d\n", s->picture_number);
2113     }else{
2114        memset(&s->new_picture, 0, sizeof(Picture));
2115     }
2116 }
2117
2118 int MPV_encode_picture(AVCodecContext *avctx,
2119                        unsigned char *buf, int buf_size, void *data)
2120 {
2121     MpegEncContext *s = avctx->priv_data;
2122     AVFrame *pic_arg = data;
2123     int i, stuffing_count;
2124
2125     if(avctx->pix_fmt != PIX_FMT_YUV420P){
2126         av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
2127         return -1;
2128     }
2129     
2130     for(i=0; i<avctx->thread_count; i++){
2131         int start_y= s->thread_context[i]->start_mb_y;
2132         int   end_y= s->thread_context[i]->  end_mb_y;
2133         int h= s->mb_height;
2134         uint8_t *start= buf + buf_size*start_y/h;
2135         uint8_t *end  = buf + buf_size*  end_y/h;
2136
2137         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2138     }
2139
2140     s->picture_in_gop_number++;
2141
2142     load_input_picture(s, pic_arg);
2143     
2144     select_input_picture(s);
2145     
2146     /* output? */
2147     if(s->new_picture.data[0]){
2148         s->pict_type= s->new_picture.pict_type;
2149 //emms_c();
2150 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2151         MPV_frame_start(s, avctx);
2152
2153         encode_picture(s, s->picture_number);
2154         
2155         avctx->real_pict_num  = s->picture_number;
2156         avctx->header_bits = s->header_bits;
2157         avctx->mv_bits     = s->mv_bits;
2158         avctx->misc_bits   = s->misc_bits;
2159         avctx->i_tex_bits  = s->i_tex_bits;
2160         avctx->p_tex_bits  = s->p_tex_bits;
2161         avctx->i_count     = s->i_count;
2162         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2163         avctx->skip_count  = s->skip_count;
2164
2165         MPV_frame_end(s);
2166
2167         if (s->out_format == FMT_MJPEG)
2168             mjpeg_picture_trailer(s);
2169         
2170         if(s->flags&CODEC_FLAG_PASS1)
2171             ff_write_pass1_stats(s);
2172
2173         for(i=0; i<4; i++){
2174             avctx->error[i] += s->current_picture_ptr->error[i];
2175         }
2176
2177         flush_put_bits(&s->pb);
2178         s->frame_bits  = put_bits_count(&s->pb);
2179
2180         stuffing_count= ff_vbv_update(s, s->frame_bits);
2181         if(stuffing_count){
2182             switch(s->codec_id){
2183             case CODEC_ID_MPEG1VIDEO:
2184             case CODEC_ID_MPEG2VIDEO:
2185                 while(stuffing_count--){
2186                     put_bits(&s->pb, 8, 0);
2187                 }
2188             break;
2189             case CODEC_ID_MPEG4:
2190                 put_bits(&s->pb, 16, 0);
2191                 put_bits(&s->pb, 16, 0x1C3);
2192                 stuffing_count -= 4;
2193                 while(stuffing_count--){
2194                     put_bits(&s->pb, 8, 0xFF);
2195                 }
2196             break;
2197             default:
2198                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2199             }
2200             flush_put_bits(&s->pb);
2201             s->frame_bits  = put_bits_count(&s->pb);
2202         }
2203
2204         /* update mpeg1/2 vbv_delay for CBR */    
2205         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2206            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2207             int vbv_delay;
2208
2209             assert(s->repeat_first_field==0);
2210             
2211             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2212             assert(vbv_delay < 0xFFFF);
2213
2214             s->vbv_delay_ptr[0] &= 0xF8;
2215             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2216             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2217             s->vbv_delay_ptr[2] &= 0x07;
2218             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2219         }
2220         s->total_bits += s->frame_bits;
2221         avctx->frame_bits  = s->frame_bits;
2222     }else{
2223         assert((pbBufPtr(&s->pb) == s->pb.buf));
2224         s->frame_bits=0;
2225     }
2226     assert((s->frame_bits&7)==0);
2227     
2228     return s->frame_bits/8;
2229 }
2230
2231 #endif //CONFIG_ENCODERS
2232
2233 static inline void gmc1_motion(MpegEncContext *s,
2234                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2235                                uint8_t **ref_picture)
2236 {
2237     uint8_t *ptr;
2238     int offset, src_x, src_y, linesize, uvlinesize;
2239     int motion_x, motion_y;
2240     int emu=0;
2241
2242     motion_x= s->sprite_offset[0][0];
2243     motion_y= s->sprite_offset[0][1];
2244     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2245     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2246     motion_x<<=(3-s->sprite_warping_accuracy);
2247     motion_y<<=(3-s->sprite_warping_accuracy);
2248     src_x = clip(src_x, -16, s->width);
2249     if (src_x == s->width)
2250         motion_x =0;
2251     src_y = clip(src_y, -16, s->height);
2252     if (src_y == s->height)
2253         motion_y =0;
2254
2255     linesize = s->linesize;
2256     uvlinesize = s->uvlinesize;
2257     
2258     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2259
2260     if(s->flags&CODEC_FLAG_EMU_EDGE){
2261         if(   (unsigned)src_x >= s->h_edge_pos - 17
2262            || (unsigned)src_y >= s->v_edge_pos - 17){
2263             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2264             ptr= s->edge_emu_buffer;
2265         }
2266     }
2267     
2268     if((motion_x|motion_y)&7){
2269         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2270         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2271     }else{
2272         int dxy;
2273         
2274         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2275         if (s->no_rounding){
2276             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2277         }else{
2278             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2279         }
2280     }
2281     
2282     if(s->flags&CODEC_FLAG_GRAY) return;
2283
2284     motion_x= s->sprite_offset[1][0];
2285     motion_y= s->sprite_offset[1][1];
2286     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2287     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2288     motion_x<<=(3-s->sprite_warping_accuracy);
2289     motion_y<<=(3-s->sprite_warping_accuracy);
2290     src_x = clip(src_x, -8, s->width>>1);
2291     if (src_x == s->width>>1)
2292         motion_x =0;
2293     src_y = clip(src_y, -8, s->height>>1);
2294     if (src_y == s->height>>1)
2295         motion_y =0;
2296
2297     offset = (src_y * uvlinesize) + src_x;
2298     ptr = ref_picture[1] + offset;
2299     if(s->flags&CODEC_FLAG_EMU_EDGE){
2300         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2301            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2302             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2303             ptr= s->edge_emu_buffer;
2304             emu=1;
2305         }
2306     }
2307     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2308     
2309     ptr = ref_picture[2] + offset;
2310     if(emu){
2311         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2312         ptr= s->edge_emu_buffer;
2313     }
2314     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2315     
2316     return;
2317 }
2318
2319 static inline void gmc_motion(MpegEncContext *s,
2320                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2321                                uint8_t **ref_picture)
2322 {
2323     uint8_t *ptr;
2324     int linesize, uvlinesize;
2325     const int a= s->sprite_warping_accuracy;
2326     int ox, oy;
2327
2328     linesize = s->linesize;
2329     uvlinesize = s->uvlinesize;
2330
2331     ptr = ref_picture[0];
2332
2333     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2334     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2335
2336     s->dsp.gmc(dest_y, ptr, linesize, 16,
2337            ox, 
2338            oy, 
2339            s->sprite_delta[0][0], s->sprite_delta[0][1],
2340            s->sprite_delta[1][0], s->sprite_delta[1][1], 
2341            a+1, (1<<(2*a+1)) - s->no_rounding,
2342            s->h_edge_pos, s->v_edge_pos);
2343     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2344            ox + s->sprite_delta[0][0]*8, 
2345            oy + s->sprite_delta[1][0]*8, 
2346            s->sprite_delta[0][0], s->sprite_delta[0][1],
2347            s->sprite_delta[1][0], s->sprite_delta[1][1], 
2348            a+1, (1<<(2*a+1)) - s->no_rounding,
2349            s->h_edge_pos, s->v_edge_pos);
2350
2351     if(s->flags&CODEC_FLAG_GRAY) return;
2352
2353     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2354     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2355
2356     ptr = ref_picture[1];
2357     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2358            ox, 
2359            oy, 
2360            s->sprite_delta[0][0], s->sprite_delta[0][1],
2361            s->sprite_delta[1][0], s->sprite_delta[1][1], 
2362            a+1, (1<<(2*a+1)) - s->no_rounding,
2363            s->h_edge_pos>>1, s->v_edge_pos>>1);
2364     
2365     ptr = ref_picture[2];
2366     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2367            ox, 
2368            oy, 
2369            s->sprite_delta[0][0], s->sprite_delta[0][1],
2370            s->sprite_delta[1][0], s->sprite_delta[1][1], 
2371            a+1, (1<<(2*a+1)) - s->no_rounding,
2372            s->h_edge_pos>>1, s->v_edge_pos>>1);
2373 }
2374
2375 /**
2376  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2377  * @param buf destination buffer
2378  * @param src source buffer
2379  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2380  * @param block_w width of block
2381  * @param block_h height of block
2382  * @param src_x x coordinate of the top left sample of the block in the source buffer
2383  * @param src_y y coordinate of the top left sample of the block in the source buffer
2384  * @param w width of the source buffer
2385  * @param h height of the source buffer
2386  */
2387 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h, 
2388                                     int src_x, int src_y, int w, int h){
2389     int x, y;
2390     int start_y, start_x, end_y, end_x;
2391
2392     if(src_y>= h){
2393         src+= (h-1-src_y)*linesize;
2394         src_y=h-1;
2395     }else if(src_y<=-block_h){
2396         src+= (1-block_h-src_y)*linesize;
2397         src_y=1-block_h;
2398     }
2399     if(src_x>= w){
2400         src+= (w-1-src_x);
2401         src_x=w-1;
2402     }else if(src_x<=-block_w){
2403         src+= (1-block_w-src_x);
2404         src_x=1-block_w;
2405     }
2406
2407     start_y= FFMAX(0, -src_y);
2408     start_x= FFMAX(0, -src_x);
2409     end_y= FFMIN(block_h, h-src_y);
2410     end_x= FFMIN(block_w, w-src_x);
2411
2412     // copy existing part
2413     for(y=start_y; y<end_y; y++){
2414         for(x=start_x; x<end_x; x++){
2415             buf[x + y*linesize]= src[x + y*linesize];
2416         }
2417     }
2418
2419     //top
2420     for(y=0; y<start_y; y++){
2421         for(x=start_x; x<end_x; x++){
2422             buf[x + y*linesize]= buf[x + start_y*linesize];
2423         }
2424     }
2425
2426     //bottom
2427     for(y=end_y; y<block_h; y++){
2428         for(x=start_x; x<end_x; x++){
2429             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2430         }
2431     }
2432                                     
2433     for(y=0; y<block_h; y++){
2434        //left
2435         for(x=0; x<start_x; x++){
2436             buf[x + y*linesize]= buf[start_x + y*linesize];
2437         }
2438        
2439        //right
2440         for(x=end_x; x<block_w; x++){
2441             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2442         }
2443     }
2444 }
2445
2446 static inline int hpel_motion(MpegEncContext *s, 
2447                                   uint8_t *dest, uint8_t *src,
2448                                   int field_based, int field_select,
2449                                   int src_x, int src_y,
2450                                   int width, int height, int stride,
2451                                   int h_edge_pos, int v_edge_pos,
2452                                   int w, int h, op_pixels_func *pix_op,
2453                                   int motion_x, int motion_y)
2454 {
2455     int dxy;
2456     int emu=0;
2457
2458     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2459     src_x += motion_x >> 1;
2460     src_y += motion_y >> 1;
2461                 
2462     /* WARNING: do no forget half pels */
2463     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2464     if (src_x == width)
2465         dxy &= ~1;
2466     src_y = clip(src_y, -16, height);
2467     if (src_y == height)
2468         dxy &= ~2;
2469     src += src_y * stride + src_x;
2470
2471     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2472         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2473            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2474             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2475                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2476             src= s->edge_emu_buffer;
2477             emu=1;
2478         }
2479     }
2480     if(field_select)
2481         src += s->linesize;
2482     pix_op[dxy](dest, src, stride, h);
2483     return emu;
2484 }
2485
2486 /* apply one mpeg motion vector to the three components */
2487 static always_inline void mpeg_motion(MpegEncContext *s,
2488                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2489                                int field_based, int bottom_field, int field_select,
2490                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2491                                int motion_x, int motion_y, int h)
2492 {
2493     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2494     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2495     
2496 #if 0    
2497 if(s->quarter_sample)
2498 {
2499     motion_x>>=1;
2500     motion_y>>=1;
2501 }
2502 #endif
2503
2504     v_edge_pos = s->v_edge_pos >> field_based;
2505     linesize   = s->current_picture.linesize[0] << field_based;
2506     uvlinesize = s->current_picture.linesize[1] << field_based;
2507
2508     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2509     src_x = s->mb_x* 16               + (motion_x >> 1);
2510     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2511
2512     if (s->out_format == FMT_H263) {
2513         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2514             mx = (motion_x>>1)|(motion_x&1);
2515             my = motion_y >>1;
2516             uvdxy = ((my & 1) << 1) | (mx & 1);
2517             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2518             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2519         }else{
2520             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2521             uvsrc_x = src_x>>1;
2522             uvsrc_y = src_y>>1;
2523         }
2524     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2525         mx = motion_x / 4;
2526         my = motion_y / 4;
2527         uvdxy = 0;
2528         uvsrc_x = s->mb_x*8 + mx;
2529         uvsrc_y = s->mb_y*8 + my;
2530     } else {
2531         if(s->chroma_y_shift){
2532             mx = motion_x / 2;
2533             my = motion_y / 2;
2534             uvdxy = ((my & 1) << 1) | (mx & 1);
2535             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2536             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2537         } else {
2538             if(s->chroma_x_shift){
2539             //Chroma422
2540                 mx = motion_x / 2;
2541                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2542                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2543                 uvsrc_y = src_y;
2544             } else {
2545             //Chroma444
2546                 uvdxy = dxy;
2547                 uvsrc_x = src_x;
2548                 uvsrc_y = src_y;
2549             }
2550         }
2551     }
2552
2553     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2554     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2555     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2556
2557     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2558        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2559             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2560                s->codec_id == CODEC_ID_MPEG1VIDEO){
2561                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2562                 return ;
2563             }
2564             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2565                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2566             ptr_y = s->edge_emu_buffer;
2567             if(!(s->flags&CODEC_FLAG_GRAY)){
2568                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2569                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based, 
2570                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2571                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based, 
2572                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2573                 ptr_cb= uvbuf;
2574                 ptr_cr= uvbuf+16;
2575             }
2576     }
2577
2578     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2579         dest_y += s->linesize;
2580         dest_cb+= s->uvlinesize;
2581         dest_cr+= s->uvlinesize;
2582     }
2583
2584     if(field_select){
2585         ptr_y += s->linesize;
2586         ptr_cb+= s->uvlinesize;
2587         ptr_cr+= s->uvlinesize;
2588     }
2589
2590     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2591     
2592     if(!(s->flags&CODEC_FLAG_GRAY)){
2593         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
2594         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
2595     }
2596 }
2597 //FIXME move to dsputil, avg variant, 16x16 version
2598 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
2599     int x;
2600     uint8_t * const top   = src[1];
2601     uint8_t * const left  = src[2];
2602     uint8_t * const mid   = src[0];
2603     uint8_t * const right = src[3];
2604     uint8_t * const bottom= src[4];
2605 #define OBMC_FILTER(x, t, l, m, r, b)\
2606     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
2607 #define OBMC_FILTER4(x, t, l, m, r, b)\
2608     OBMC_FILTER(x         , t, l, m, r, b);\
2609     OBMC_FILTER(x+1       , t, l, m, r, b);\
2610     OBMC_FILTER(x  +stride, t, l, m, r, b);\
2611     OBMC_FILTER(x+1+stride, t, l, m, r, b);
2612     
2613     x=0;
2614     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
2615     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
2616     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
2617     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
2618     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
2619     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
2620     x+= stride;
2621     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
2622     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
2623     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
2624     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
2625     x+= stride;
2626     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
2627     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
2628     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
2629     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
2630     x+= 2*stride;
2631     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
2632     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
2633     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
2634     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
2635     x+= 2*stride;
2636     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
2637     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
2638     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
2639     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
2640     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
2641     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
2642     x+= stride;
2643     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
2644     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
2645     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
2646     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
2647 }
2648
2649 /* obmc for 1 8x8 luma block */
2650 static inline void obmc_motion(MpegEncContext *s,
2651                                uint8_t *dest, uint8_t *src,
2652                                int src_x, int src_y,
2653                                op_pixels_func *pix_op,
2654                                int16_t mv[5][2]/* mid top left right bottom*/)
2655 #define MID    0
2656 {
2657     int i;
2658     uint8_t *ptr[5];
2659     
2660     assert(s->quarter_sample==0);
2661     
2662     for(i=0; i<5; i++){
2663         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
2664             ptr[i]= ptr[MID];
2665         }else{
2666             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
2667             hpel_motion(s, ptr[i], src, 0, 0,
2668                         src_x, src_y,
2669                         s->width, s->height, s->linesize,
2670                         s->h_edge_pos, s->v_edge_pos,
2671                         8, 8, pix_op,
2672                         mv[i][0], mv[i][1]);
2673         }
2674     }
2675
2676     put_obmc(dest, ptr, s->linesize);                
2677 }
2678
2679 static inline void qpel_motion(MpegEncContext *s,
2680                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2681                                int field_based, int bottom_field, int field_select,
2682                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2683                                qpel_mc_func (*qpix_op)[16],
2684                                int motion_x, int motion_y, int h)
2685 {
2686     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2687     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
2688
2689     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
2690     src_x = s->mb_x *  16                 + (motion_x >> 2);
2691     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
2692
2693     v_edge_pos = s->v_edge_pos >> field_based;
2694     linesize = s->linesize << field_based;
2695     uvlinesize = s->uvlinesize << field_based;
2696     
2697     if(field_based){
2698         mx= motion_x/2;
2699         my= motion_y>>1;
2700     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
2701         static const int rtab[8]= {0,0,1,1,0,0,0,1};
2702         mx= (motion_x>>1) + rtab[motion_x&7];
2703         my= (motion_y>>1) + rtab[motion_y&7];
2704     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
2705         mx= (motion_x>>1)|(motion_x&1);
2706         my= (motion_y>>1)|(motion_y&1);
2707     }else{
2708         mx= motion_x/2;
2709         my= motion_y/2;
2710     }
2711     mx= (mx>>1)|(mx&1);
2712     my= (my>>1)|(my&1);
2713
2714     uvdxy= (mx&1) | ((my&1)<<1);
2715     mx>>=1;
2716     my>>=1;
2717
2718     uvsrc_x = s->mb_x *  8                 + mx;
2719     uvsrc_y = s->mb_y * (8 >> field_based) + my;
2720
2721     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
2722     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2723     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2724
2725     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16 
2726        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
2727         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based, 
2728                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2729         ptr_y= s->edge_emu_buffer;
2730         if(!(s->flags&CODEC_FLAG_GRAY)){
2731             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
2732             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based, 
2733                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2734             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based, 
2735                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2736             ptr_cb= uvbuf;
2737             ptr_cr= uvbuf + 16;
2738         }
2739     }
2740
2741     if(!field_based)
2742         qpix_op[0][dxy](dest_y, ptr_y, linesize);
2743     else{
2744         if(bottom_field){
2745             dest_y += s->linesize;
2746             dest_cb+= s->uvlinesize;
2747             dest_cr+= s->uvlinesize;
2748         }
2749
2750         if(field_select){
2751             ptr_y  += s->linesize;
2752             ptr_cb += s->uvlinesize;
2753             ptr_cr += s->uvlinesize;
2754         }
2755         //damn interlaced mode
2756         //FIXME boundary mirroring is not exactly correct here
2757         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
2758         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
2759     }
2760     if(!(s->flags&CODEC_FLAG_GRAY)){
2761         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
2762         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
2763     }
2764 }
2765
2766 inline int ff_h263_round_chroma(int x){
2767     if (x >= 0)
2768         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
2769     else {
2770         x = -x;
2771         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
2772     }
2773 }
2774
2775 /**
2776  * h263 chorma 4mv motion compensation.
2777  */
2778 static inline void chroma_4mv_motion(MpegEncContext *s,
2779                                      uint8_t *dest_cb, uint8_t *dest_cr,
2780                                      uint8_t **ref_picture,
2781                                      op_pixels_func *pix_op,
2782                                      int mx, int my){
2783     int dxy, emu=0, src_x, src_y, offset;
2784     uint8_t *ptr;
2785     
2786     /* In case of 8X8, we construct a single chroma motion vector
2787        with a special rounding */
2788     mx= ff_h263_round_chroma(mx);
2789     my= ff_h263_round_chroma(my);
2790     
2791     dxy = ((my & 1) << 1) | (mx & 1);
2792     mx >>= 1;
2793     my >>= 1;
2794
2795     src_x = s->mb_x * 8 + mx;
2796     src_y = s->mb_y * 8 + my;
2797     src_x = clip(src_x, -8, s->width/2);
2798     if (src_x == s->width/2)
2799         dxy &= ~1;
2800     src_y = clip(src_y, -8, s->height/2);
2801     if (src_y == s->height/2)
2802         dxy &= ~2;
2803     
2804     offset = (src_y * (s->uvlinesize)) + src_x;
2805     ptr = ref_picture[1] + offset;
2806     if(s->flags&CODEC_FLAG_EMU_EDGE){
2807         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
2808            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
2809             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2810             ptr= s->edge_emu_buffer;
2811             emu=1;
2812         }
2813     }
2814     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
2815
2816     ptr = ref_picture[2] + offset;
2817     if(emu){
2818         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2819         ptr= s->edge_emu_buffer;
2820     }
2821     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
2822 }
2823
2824 /**
2825  * motion compesation of a single macroblock
2826  * @param s context
2827  * @param dest_y luma destination pointer
2828  * @param dest_cb chroma cb/u destination pointer
2829  * @param dest_cr chroma cr/v destination pointer
2830  * @param dir direction (0->forward, 1->backward)
2831  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
2832  * @param pic_op halfpel motion compensation function (average or put normally)
2833  * @param pic_op qpel motion compensation function (average or put normally)
2834  * the motion vectors are taken from s->mv and the MV type from s->mv_type
2835  */
2836 static inline void MPV_motion(MpegEncContext *s, 
2837                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2838                               int dir, uint8_t **ref_picture, 
2839                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
2840 {
2841     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
2842     int mb_x, mb_y, i;
2843     uint8_t *ptr, *dest;
2844
2845     mb_x = s->mb_x;
2846     mb_y = s->mb_y;
2847
2848     if(s->obmc && s->pict_type != B_TYPE){
2849         int16_t mv_cache[4][4][2];
2850         const int xy= s->mb_x + s->mb_y*s->mb_stride;
2851         const int mot_stride= s->b8_stride;
2852         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
2853
2854         assert(!s->mb_skiped);
2855                 
2856         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
2857         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
2858         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
2859
2860         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
2861             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
2862         }else{
2863             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
2864         }
2865
2866         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
2867             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
2868             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
2869         }else{
2870             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
2871             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
2872         }
2873
2874         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
2875             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
2876             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
2877         }else{
2878             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
2879             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
2880         }
2881         
2882         mx = 0;
2883         my = 0;
2884         for(i=0;i<4;i++) {
2885             const int x= (i&1)+1;
2886             const int y= (i>>1)+1;
2887             int16_t mv[5][2]= {
2888                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
2889                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
2890                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
2891                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
2892                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
2893             //FIXME cleanup
2894             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
2895                         ref_picture[0],
2896                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
2897                         pix_op[1],
2898                         mv);
2899
2900             mx += mv[0][0];
2901             my += mv[0][1];
2902         }
2903         if(!(s->flags&CODEC_FLAG_GRAY))
2904             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
2905
2906         return;
2907     }
2908    
2909     switch(s->mv_type) {
2910     case MV_TYPE_16X16:
2911 #ifdef CONFIG_RISKY
2912         if(s->mcsel){
2913             if(s->real_sprite_warping_points==1){
2914                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
2915                             ref_picture);
2916             }else{
2917                 gmc_motion(s, dest_y, dest_cb, dest_cr,
2918                             ref_picture);
2919             }
2920         }else if(s->quarter_sample){
2921             qpel_motion(s, dest_y, dest_cb, dest_cr, 
2922                         0, 0, 0,
2923                         ref_picture, pix_op, qpix_op,
2924                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2925         }else if(s->mspel){
2926             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
2927                         ref_picture, pix_op,
2928                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2929         }else
2930 #endif
2931         {
2932             mpeg_motion(s, dest_y, dest_cb, dest_cr, 
2933                         0, 0, 0,
2934                         ref_picture, pix_op,
2935                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2936         }           
2937         break;
2938     case MV_TYPE_8X8:
2939         mx = 0;
2940         my = 0;
2941         if(s->quarter_sample){
2942             for(i=0;i<4;i++) {
2943                 motion_x = s->mv[dir][i][0];
2944                 motion_y = s->mv[dir][i][1];
2945
2946                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
2947                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
2948                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
2949                     
2950                 /* WARNING: do no forget half pels */
2951                 src_x = clip(src_x, -16, s->width);
2952                 if (src_x == s->width)
2953                     dxy &= ~3;
2954                 src_y = clip(src_y, -16, s->height);
2955                 if (src_y == s->height)
2956                     dxy &= ~12;
2957                     
2958                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
2959                 if(s->flags&CODEC_FLAG_EMU_EDGE){
2960                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8 
2961                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
2962                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2963                         ptr= s->edge_emu_buffer;
2964                     }
2965                 }
2966                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
2967                 qpix_op[1][dxy](dest, ptr, s->linesize);
2968
2969                 mx += s->mv[dir][i][0]/2;
2970                 my += s->mv[dir][i][1]/2;
2971             }
2972         }else{
2973             for(i=0;i<4;i++) {
2974                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
2975                             ref_picture[0], 0, 0,
2976                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
2977                             s->width, s->height, s->linesize,
2978                             s->h_edge_pos, s->v_edge_pos,
2979                             8, 8, pix_op[1],
2980                             s->mv[dir][i][0], s->mv[dir][i][1]);
2981
2982                 mx += s->mv[dir][i][0];
2983                 my += s->mv[dir][i][1];
2984             }
2985         }
2986
2987         if(!(s->flags&CODEC_FLAG_GRAY))
2988             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
2989         break;
2990     case MV_TYPE_FIELD:
2991         if (s->picture_structure == PICT_FRAME) {
2992             if(s->quarter_sample){
2993                 for(i=0; i<2; i++){
2994                     qpel_motion(s, dest_y, dest_cb, dest_cr,
2995                                 1, i, s->field_select[dir][i],
2996                                 ref_picture, pix_op, qpix_op,
2997                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
2998                 }
2999             }else{
3000                 /* top field */       
3001                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3002                             1, 0, s->field_select[dir][0],
3003                             ref_picture, pix_op,
3004                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3005                 /* bottom field */
3006                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3007                             1, 1, s->field_select[dir][1],
3008                             ref_picture, pix_op,
3009                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3010             }
3011         } else {
3012             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3013                 ref_picture= s->current_picture_ptr->data;
3014             } 
3015
3016             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3017                         0, 0, s->field_select[dir][0],
3018                         ref_picture, pix_op,
3019                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3020         }
3021         break;
3022     case MV_TYPE_16X8:
3023         for(i=0; i<2; i++){
3024             uint8_t ** ref2picture;
3025
3026             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3027                 ref2picture= ref_picture;
3028             }else{
3029                 ref2picture= s->current_picture_ptr->data;
3030             } 
3031
3032             mpeg_motion(s, dest_y, dest_cb, dest_cr, 
3033                         0, 0, s->field_select[dir][i],
3034                         ref2picture, pix_op,
3035                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3036                 
3037             dest_y += 16*s->linesize;
3038             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3039             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3040         }        
3041         break;
3042     case MV_TYPE_DMV:
3043         if(s->picture_structure == PICT_FRAME){
3044             for(i=0; i<2; i++){
3045                 int j;
3046                 for(j=0; j<2; j++){
3047                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3048                                 1, j, j^i,
3049                                 ref_picture, pix_op,
3050                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3051                 }
3052                 pix_op = s->dsp.avg_pixels_tab; 
3053             }
3054         }else{
3055             for(i=0; i<2; i++){
3056                 mpeg_motion(s, dest_y, dest_cb, dest_cr, 
3057                             0, 0, s->picture_structure != i+1,
3058                             ref_picture, pix_op,
3059                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3060
3061                 // after put we make avg of the same block
3062                 pix_op=s->dsp.avg_pixels_tab; 
3063
3064                 //opposite parity is always in the same frame if this is second field
3065                 if(!s->first_field){
3066                     ref_picture = s->current_picture_ptr->data;    
3067                 }
3068             }
3069         }
3070     break;
3071     default: assert(0);
3072     }
3073 }
3074
3075
3076 /* put block[] to dest[] */
3077 static inline void put_dct(MpegEncContext *s, 
3078                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3079 {
3080     s->dct_unquantize_intra(s, block, i, qscale);
3081     s->dsp.idct_put (dest, line_size, block);
3082 }
3083
3084 /* add block[] to dest[] */
3085 static inline void add_dct(MpegEncContext *s, 
3086                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3087 {
3088     if (s->block_last_index[i] >= 0) {
3089         s->dsp.idct_add (dest, line_size, block);
3090     }
3091 }
3092
3093 static inline void add_dequant_dct(MpegEncContext *s, 
3094                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3095 {
3096     if (s->block_last_index[i] >= 0) {
3097         s->dct_unquantize_inter(s, block, i, qscale);
3098
3099         s->dsp.idct_add (dest, line_size, block);
3100     }
3101 }
3102
3103 /**
3104  * cleans dc, ac, coded_block for the current non intra MB
3105  */
3106 void ff_clean_intra_table_entries(MpegEncContext *s)
3107 {
3108     int wrap = s->b8_stride;
3109     int xy = s->block_index[0];
3110     
3111     s->dc_val[0][xy           ] = 
3112     s->dc_val[0][xy + 1       ] = 
3113     s->dc_val[0][xy     + wrap] =
3114     s->dc_val[0][xy + 1 + wrap] = 1024;
3115     /* ac pred */
3116     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3117     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3118     if (s->msmpeg4_version>=3) {
3119         s->coded_block[xy           ] =
3120         s->coded_block[xy + 1       ] =
3121         s->coded_block[xy     + wrap] =
3122         s->coded_block[xy + 1 + wrap] = 0;
3123     }
3124     /* chroma */
3125     wrap = s->mb_stride;
3126     xy = s->mb_x + s->mb_y * wrap;
3127     s->dc_val[1][xy] =
3128     s->dc_val[2][xy] = 1024;
3129     /* ac pred */
3130     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3131     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3132     
3133     s->mbintra_table[xy]= 0;
3134 }
3135
3136 /* generic function called after a macroblock has been parsed by the
3137    decoder or after it has been encoded by the encoder.
3138
3139    Important variables used:
3140    s->mb_intra : true if intra macroblock
3141    s->mv_dir   : motion vector direction
3142    s->mv_type  : motion vector type
3143    s->mv       : motion vector
3144    s->interlaced_dct : true if interlaced dct used (mpeg2)
3145  */
3146 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64])
3147 {
3148     int mb_x, mb_y;
3149     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3150 #ifdef HAVE_XVMC
3151     if(s->avctx->xvmc_acceleration){
3152         XVMC_decode_mb(s);//xvmc uses pblocks
3153         return;
3154     }
3155 #endif
3156
3157     mb_x = s->mb_x;
3158     mb_y = s->mb_y;
3159
3160     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3161        /* save DCT coefficients */
3162        int i,j;
3163        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3164        for(i=0; i<6; i++)
3165            for(j=0; j<64; j++)
3166                *dct++ = block[i][s->dsp.idct_permutation[j]];
3167     }
3168
3169     s->current_picture.qscale_table[mb_xy]= s->qscale;
3170
3171     /* update DC predictors for P macroblocks */
3172     if (!s->mb_intra) {
3173         if (s->h263_pred || s->h263_aic) {
3174             if(s->mbintra_table[mb_xy])
3175                 ff_clean_intra_table_entries(s);
3176         } else {
3177             s->last_dc[0] =
3178             s->last_dc[1] =
3179             s->last_dc[2] = 128 << s->intra_dc_precision;
3180         }
3181     }
3182     else if (s->h263_pred || s->h263_aic)
3183         s->mbintra_table[mb_xy]=1;
3184
3185     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3186         uint8_t *dest_y, *dest_cb, *dest_cr;
3187         int dct_linesize, dct_offset;
3188         op_pixels_func (*op_pix)[4];
3189         qpel_mc_func (*op_qpix)[16];
3190         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
3191         const int uvlinesize= s->current_picture.linesize[1];
3192         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band;
3193
3194         /* avoid copy if macroblock skipped in last frame too */
3195         /* skip only during decoding as we might trash the buffers during encoding a bit */
3196         if(!s->encoding){
3197             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3198             const int age= s->current_picture.age;
3199
3200             assert(age);
3201
3202             if (s->mb_skiped) {
3203                 s->mb_skiped= 0;
3204                 assert(s->pict_type!=I_TYPE);
3205  
3206                 (*mbskip_ptr) ++; /* indicate that this time we skiped it */
3207                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3208
3209                 /* if previous was skipped too, then nothing to do !  */
3210                 if (*mbskip_ptr >= age && s->current_picture.reference){
3211                     return;
3212                 }
3213             } else if(!s->current_picture.reference){
3214                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3215                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3216             } else{
3217                 *mbskip_ptr = 0; /* not skipped */
3218             }
3219         }
3220
3221         dct_linesize = linesize << s->interlaced_dct;
3222         dct_offset =(s->interlaced_dct)? linesize : linesize*8;
3223
3224         if(readable){
3225             dest_y=  s->dest[0];
3226             dest_cb= s->dest[1];
3227             dest_cr= s->dest[2];
3228         }else{
3229             dest_y = s->b_scratchpad;
3230             dest_cb= s->b_scratchpad+16*linesize;
3231             dest_cr= s->b_scratchpad+32*linesize;
3232         }
3233         if (!s->mb_intra) {
3234             /* motion handling */
3235             /* decoding or more than one mb_type (MC was allready done otherwise) */
3236             if(!s->encoding){
3237                 if ((!s->no_rounding) || s->pict_type==B_TYPE){                
3238                     op_pix = s->dsp.put_pixels_tab;
3239                     op_qpix= s->dsp.put_qpel_pixels_tab;
3240                 }else{
3241                     op_pix = s->dsp.put_no_rnd_pixels_tab;
3242                     op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3243                 }
3244
3245                 if (s->mv_dir & MV_DIR_FORWARD) {
3246                     MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3247                     op_pix = s->dsp.avg_pixels_tab;
3248                     op_qpix= s->dsp.avg_qpel_pixels_tab;
3249                 }
3250                 if (s->mv_dir & MV_DIR_BACKWARD) {
3251                     MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3252                 }
3253             }
3254
3255             /* skip dequant / idct if we are really late ;) */
3256             if(s->hurry_up>1) return;
3257
3258             /* add dct residue */
3259             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3260                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3261                 add_dequant_dct(s, block[0], 0, dest_y, dct_linesize, s->qscale);
3262                 add_dequant_dct(s, block[1], 1, dest_y + 8, dct_linesize, s->qscale);
3263                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize, s->qscale);
3264                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize, s->qscale);
3265
3266                 if(!(s->flags&CODEC_FLAG_GRAY)){
3267                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3268                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3269                 }
3270             } else if(s->codec_id != CODEC_ID_WMV2){
3271                 add_dct(s, block[0], 0, dest_y, dct_linesize);
3272                 add_dct(s, block[1], 1, dest_y + 8, dct_linesize);
3273                 add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
3274                 add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
3275
3276                 if(!(s->flags&CODEC_FLAG_GRAY)){
3277                     if(s->chroma_y_shift){//Chroma420
3278                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
3279                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
3280                     }else{
3281                         //chroma422
3282                         dct_linesize = uvlinesize << s->interlaced_dct;
3283                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3284
3285                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
3286                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
3287                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
3288                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
3289                         if(!s->chroma_x_shift){//Chroma444
3290                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
3291                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
3292                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
3293                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
3294                         }
3295                     }
3296                 }//fi gray
3297             }
3298 #ifdef CONFIG_RISKY
3299             else{
3300                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
3301             }
3302 #endif
3303         } else {
3304             /* dct only in intra block */
3305             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
3306                 put_dct(s, block[0], 0, dest_y, dct_linesize, s->qscale);
3307                 put_dct(s, block[1], 1, dest_y + 8, dct_linesize, s->qscale);
3308                 put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize, s->qscale);
3309                 put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize, s->qscale);
3310
3311                 if(!(s->flags&CODEC_FLAG_GRAY)){
3312                     put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3313                     put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3314                 }
3315             }else{
3316                 s->dsp.idct_put(dest_y                 , dct_linesize, block[0]);
3317                 s->dsp.idct_put(dest_y              + 8, dct_linesize, block[1]);
3318                 s->dsp.idct_put(dest_y + dct_offset    , dct_linesize, block[2]);
3319                 s->dsp.idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]);
3320
3321                 if(!(s->flags&CODEC_FLAG_GRAY)){
3322                     if(s->chroma_y_shift){
3323                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
3324                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
3325                     }else{
3326
3327                         dct_linesize = uvlinesize << s->interlaced_dct;
3328                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3329
3330                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
3331                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
3332                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
3333                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
3334                         if(!s->chroma_x_shift){//Chroma444
3335                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
3336                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
3337                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
3338                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
3339                         }
3340                     }
3341                 }//gray
3342             }
3343         }
3344         if(!readable){
3345             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
3346             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
3347             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
3348         }
3349     }
3350 }
3351
3352 #ifdef CONFIG_ENCODERS
3353
3354 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
3355 {
3356     static const char tab[64]=
3357         {3,2,2,1,1,1,1,1,
3358          1,1,1,1,1,1,1,1,
3359          1,1,1,1,1,1,1,1,
3360          0,0,0,0,0,0,0,0,
3361          0,0,0,0,0,0,0,0,
3362          0,0,0,0,0,0,0,0,
3363          0,0,0,0,0,0,0,0,
3364          0,0,0,0,0,0,0,0};
3365     int score=0;
3366     int run=0;
3367     int i;
3368     DCTELEM *block= s->block[n];
3369     const int last_index= s->block_last_index[n];
3370     int skip_dc;
3371
3372     if(threshold<0){
3373         skip_dc=0;
3374         threshold= -threshold;
3375     }else
3376         skip_dc=1;
3377
3378     /* are all which we could set to zero are allready zero? */
3379     if(last_index<=skip_dc - 1) return;
3380
3381     for(i=0; i<=last_index; i++){
3382         const int j = s->intra_scantable.permutated[i];
3383         const int level = ABS(block[j]);
3384         if(level==1){
3385             if(skip_dc && i==0) continue;
3386             score+= tab[run];
3387             run=0;
3388         }else if(level>1){
3389             return;
3390         }else{
3391             run++;
3392         }
3393     }
3394     if(score >= threshold) return;
3395     for(i=skip_dc; i<=last_index; i++){
3396         const int j = s->intra_scantable.permutated[i];
3397         block[j]=0;
3398     }
3399     if(block[0]) s->block_last_index[n]= 0;
3400     else         s->block_last_index[n]= -1;
3401 }
3402
3403 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
3404 {
3405     int i;
3406     const int maxlevel= s->max_qcoeff;
3407     const int minlevel= s->min_qcoeff;
3408     int overflow=0;
3409     
3410     if(s->mb_intra){
3411         i=1; //skip clipping of intra dc
3412     }else
3413         i=0;
3414     
3415     for(;i<=last_index; i++){
3416         const int j= s->intra_scantable.permutated[i];
3417         int level = block[j];
3418        
3419         if     (level>maxlevel){
3420             level=maxlevel;
3421             overflow++;
3422         }else if(level<minlevel){
3423             level=minlevel;
3424             overflow++;
3425         }
3426         
3427         block[j]= level;
3428     }
3429     
3430     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
3431         av_log(s->avctx, AV_LOG_INFO, "warning, cliping %d dct coefficents to %d..%d\n", overflow, minlevel, maxlevel);
3432 }
3433
3434 #endif //CONFIG_ENCODERS
3435
3436 /**
3437  *
3438  * @param h is the normal height, this will be reduced automatically if needed for the last row
3439  */
3440 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
3441     if (s->avctx->draw_horiz_band) {
3442         AVFrame *src;
3443         int offset[4];
3444         
3445         if(s->picture_structure != PICT_FRAME){
3446             h <<= 1;
3447             y <<= 1;
3448             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
3449         }
3450
3451         h= FFMIN(h, s->height - y);
3452
3453         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER)) 
3454             src= (AVFrame*)s->current_picture_ptr;
3455         else if(s->last_picture_ptr)
3456             src= (AVFrame*)s->last_picture_ptr;
3457         else
3458             return;
3459             
3460         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
3461             offset[0]=
3462             offset[1]=
3463             offset[2]=
3464             offset[3]= 0;
3465         }else{
3466             offset[0]= y * s->linesize;;
3467             offset[1]= 
3468             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
3469             offset[3]= 0;
3470         }
3471
3472         emms_c();
3473
3474         s->avctx->draw_horiz_band(s->avctx, src, offset,
3475                                   y, s->picture_structure, h);
3476     }
3477 }
3478
3479 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
3480     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
3481     const int uvlinesize= s->current_picture.linesize[1];
3482         
3483     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
3484     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
3485     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
3486     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
3487     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
3488     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
3489     //block_index is not used by mpeg2, so it is not affected by chroma_format
3490
3491     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << 4);
3492     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (4 - s->chroma_x_shift));
3493     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (4 - s->chroma_x_shift));
3494
3495     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
3496     {
3497         s->dest[0] += s->mb_y *   linesize << 4;
3498         s->dest[1] += s->mb_y * uvlinesize << (4 - s->chroma_y_shift);
3499         s->dest[2] += s->mb_y * uvlinesize << (4 - s->chroma_y_shift);
3500     }
3501 }
3502
3503 #ifdef CONFIG_ENCODERS
3504
3505 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
3506     int x, y;
3507 //FIXME optimize
3508     for(y=0; y<8; y++){
3509         for(x=0; x<8; x++){
3510             int x2, y2;
3511             int sum=0;
3512             int sqr=0;
3513             int count=0;
3514
3515             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
3516                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
3517                     int v= ptr[x2 + y2*stride];
3518                     sum += v;
3519                     sqr += v*v;
3520                     count++;
3521                 }
3522             }
3523             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
3524         }
3525     }
3526 }
3527
3528 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
3529 {
3530     int16_t weight[6][64];
3531     DCTELEM orig[6][64];
3532     const int mb_x= s->mb_x;
3533     const int mb_y= s->mb_y;
3534     int i;
3535     int skip_dct[6];
3536     int dct_offset   = s->linesize*8; //default for progressive frames
3537     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3538     int wrap_y, wrap_c;
3539     
3540     for(i=0; i<6; i++) skip_dct[i]=0;
3541     
3542     if(s->adaptive_quant){
3543         const int last_qp= s->qscale;
3544         const int mb_xy= mb_x + mb_y*s->mb_stride;
3545
3546         s->lambda= s->lambda_table[mb_xy];
3547         update_qscale(s);
3548     
3549         if(!(s->flags&CODEC_FLAG_QP_RD)){
3550             s->dquant= s->qscale - last_qp;
3551
3552             if(s->out_format==FMT_H263){
3553                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
3554             
3555                 if(s->codec_id==CODEC_ID_MPEG4){        
3556                     if(!s->mb_intra){
3557                         if(s->pict_type == B_TYPE){
3558                             if(s->dquant&1) 
3559                                 s->dquant= (s->dquant/2)*2;
3560                             if(s->mv_dir&MV_DIRECT)
3561                                 s->dquant= 0;
3562                         }
3563                         if(s->mv_type==MV_TYPE_8X8)
3564                             s->dquant=0;
3565                     }
3566                 }
3567             }
3568         }
3569         ff_set_qscale(s, last_qp + s->dquant);
3570     }else if(s->flags&CODEC_FLAG_QP_RD)
3571         ff_set_qscale(s, s->qscale + s->dquant);
3572
3573     wrap_y = s->linesize;
3574     wrap_c = s->uvlinesize;
3575     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
3576     ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
3577     ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
3578
3579     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
3580         uint8_t *ebuf= s->edge_emu_buffer + 32;
3581         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
3582         ptr_y= ebuf;
3583         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
3584         ptr_cb= ebuf+18*wrap_y;
3585         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
3586         ptr_cr= ebuf+18*wrap_y+8;
3587     }
3588
3589     if (s->mb_intra) {
3590         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
3591             int progressive_score, interlaced_score;
3592
3593             s->interlaced_dct=0;
3594             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8) 
3595                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
3596
3597             if(progressive_score > 0){
3598                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8) 
3599                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
3600                 if(progressive_score > interlaced_score){
3601                     s->interlaced_dct=1;
3602             
3603                     dct_offset= wrap_y;
3604                     wrap_y<<=1;
3605                 }
3606             }
3607         }
3608         
3609         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
3610         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
3611         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
3612         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
3613
3614         if(s->flags&CODEC_FLAG_GRAY){
3615             skip_dct[4]= 1;
3616             skip_dct[5]= 1;
3617         }else{
3618             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
3619             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
3620         }
3621     }else{
3622         op_pixels_func (*op_pix)[4];
3623         qpel_mc_func (*op_qpix)[16];
3624         uint8_t *dest_y, *dest_cb, *dest_cr;
3625
3626         dest_y  = s->dest[0];
3627         dest_cb = s->dest[1];
3628         dest_cr = s->dest[2];
3629
3630         if ((!s->no_rounding) || s->pict_type==B_TYPE){
3631             op_pix = s->dsp.put_pixels_tab;
3632             op_qpix= s->dsp.put_qpel_pixels_tab;
3633         }else{
3634             op_pix = s->dsp.put_no_rnd_pixels_tab;
3635             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3636         }
3637
3638         if (s->mv_dir & MV_DIR_FORWARD) {
3639             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3640             op_pix = s->dsp.avg_pixels_tab;
3641             op_qpix= s->dsp.avg_qpel_pixels_tab;
3642         }
3643         if (s->mv_dir & MV_DIR_BACKWARD) {
3644             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3645         }
3646
3647         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
3648             int progressive_score, interlaced_score;
3649
3650             s->interlaced_dct=0;
3651             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8) 
3652                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
3653             
3654             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
3655
3656             if(progressive_score>0){
3657                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8) 
3658                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
3659             
3660                 if(progressive_score > interlaced_score){
3661                     s->interlaced_dct=1;
3662             
3663                     dct_offset= wrap_y;
3664                     wrap_y<<=1;
3665                 }
3666             }
3667         }
3668         
3669         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
3670         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
3671         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
3672         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
3673         
3674         if(s->flags&CODEC_FLAG_GRAY){
3675             skip_dct[4]= 1;
3676             skip_dct[5]= 1;
3677         }else{
3678             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
3679             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
3680         }
3681         /* pre quantization */         
3682         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
3683             //FIXME optimize
3684             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
3685             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
3686             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
3687             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
3688             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
3689             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
3690         }
3691     }
3692
3693     if(s->avctx->quantizer_noise_shaping){
3694         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
3695         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
3696         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
3697         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
3698         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
3699         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
3700         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
3701     }
3702             
3703     /* DCT & quantize */
3704     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
3705     {
3706         for(i=0;i<6;i++) {
3707             if(!skip_dct[i]){
3708                 int overflow;
3709                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
3710             // FIXME we could decide to change to quantizer instead of clipping
3711             // JS: I don't think that would be a good idea it could lower quality instead
3712             //     of improve it. Just INTRADC clipping deserves changes in quantizer
3713                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
3714             }else
3715                 s->block_last_index[i]= -1;
3716         }
3717         if(s->avctx->quantizer_noise_shaping){
3718             for(i=0;i<6;i++) {
3719                 if(!skip_dct[i]){
3720                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
3721                 }
3722             }
3723         }
3724         
3725         if(s->luma_elim_threshold && !s->mb_intra)
3726             for(i=0; i<4; i++)
3727                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
3728         if(s->chroma_elim_threshold && !s->mb_intra)
3729             for(i=4; i<6; i++)
3730                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
3731
3732         if(s->flags & CODEC_FLAG_CBP_RD){
3733             for(i=0;i<6;i++) {
3734                 if(s->block_last_index[i] == -1)
3735                     s->coded_score[i]= INT_MAX/256;
3736             }
3737         }
3738     }
3739
3740     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
3741         s->block_last_index[4]=
3742         s->block_last_index[5]= 0;
3743         s->block[4][0]=
3744         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
3745     }
3746
3747     //non c quantize code returns incorrect block_last_index FIXME
3748     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
3749         for(i=0; i<6; i++){
3750             int j;
3751             if(s->block_last_index[i]>0){
3752                 for(j=63; j>0; j--){
3753                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
3754                 }
3755                 s->block_last_index[i]= j;
3756             }
3757         }
3758     }
3759
3760     /* huffman encode */
3761     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
3762     case CODEC_ID_MPEG1VIDEO:
3763     case CODEC_ID_MPEG2VIDEO:
3764         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
3765 #ifdef CONFIG_RISKY
3766     case CODEC_ID_MPEG4:
3767         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
3768     case CODEC_ID_MSMPEG4V2:
3769     case CODEC_ID_MSMPEG4V3:
3770     case CODEC_ID_WMV1:
3771         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
3772     case CODEC_ID_WMV2:
3773          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
3774     case CODEC_ID_H263:
3775     case CODEC_ID_H263P:
3776     case CODEC_ID_FLV1:
3777     case CODEC_ID_RV10:
3778         h263_encode_mb(s, s->block, motion_x, motion_y); break;
3779 #endif
3780     case CODEC_ID_MJPEG:
3781         mjpeg_encode_mb(s, s->block); break;
3782     default:
3783         assert(0);
3784     }
3785 }
3786
3787 #endif //CONFIG_ENCODERS
3788
3789 void ff_mpeg_flush(AVCodecContext *avctx){
3790     int i;
3791     MpegEncContext *s = avctx->priv_data;
3792     
3793     if(s==NULL || s->picture==NULL) 
3794         return;
3795     
3796     for(i=0; i<MAX_PICTURE_COUNT; i++){
3797        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
3798                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
3799         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
3800     }
3801     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
3802     
3803     s->parse_context.state= -1;
3804     s->parse_context.frame_start_found= 0;
3805     s->parse_context.overread= 0;
3806     s->parse_context.overread_index= 0;
3807     s->parse_context.index= 0;
3808     s->parse_context.last_index= 0;
3809     s->bitstream_buffer_size=0;
3810 }
3811
3812 #ifdef CONFIG_ENCODERS
3813 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
3814 {
3815     const uint16_t *srcw= (uint16_t*)src;
3816     int words= length>>4;
3817     int bits= length&15;
3818     int i;
3819
3820     if(length==0) return;
3821     
3822     if(words < 16){
3823         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
3824     }else if(put_bits_count(pb)&7){
3825         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
3826     }else{
3827         for(i=0; put_bits_count(pb)&31; i++)
3828             put_bits(pb, 8, src[i]);
3829         flush_put_bits(pb);
3830         memcpy(pbBufPtr(pb), src+i, 2*words-i);
3831         skip_put_bytes(pb, 2*words-i);
3832     }
3833         
3834     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
3835 }
3836
3837 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
3838     int i;
3839
3840     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
3841
3842     /* mpeg1 */
3843     d->mb_skip_run= s->mb_skip_run;
3844     for(i=0; i<3; i++)
3845         d->last_dc[i]= s->last_dc[i];
3846     
3847     /* statistics */
3848     d->mv_bits= s->mv_bits;
3849     d->i_tex_bits= s->i_tex_bits;
3850     d->p_tex_bits= s->p_tex_bits;
3851     d->i_count= s->i_count;
3852     d->f_count= s->f_count;
3853     d->b_count= s->b_count;
3854     d->skip_count= s->skip_count;
3855     d->misc_bits= s->misc_bits;
3856     d->last_bits= 0;
3857
3858     d->mb_skiped= 0;
3859     d->qscale= s->qscale;
3860     d->dquant= s->dquant;
3861 }
3862
3863 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
3864     int i;
3865
3866     memcpy(d->mv, s->mv, 2*4*2*sizeof(int)); 
3867     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
3868     
3869     /* mpeg1 */
3870     d->mb_skip_run= s->mb_skip_run;
3871     for(i=0; i<3; i++)
3872         d->last_dc[i]= s->last_dc[i];
3873     
3874     /* statistics */
3875     d->mv_bits= s->mv_bits;
3876     d->i_tex_bits= s->i_tex_bits;
3877     d->p_tex_bits= s->p_tex_bits;
3878     d->i_count= s->i_count;
3879     d->f_count= s->f_count;
3880     d->b_count= s->b_count;
3881     d->skip_count= s->skip_count;
3882     d->misc_bits= s->misc_bits;
3883
3884     d->mb_intra= s->mb_intra;
3885     d->mb_skiped= s->mb_skiped;
3886     d->mv_type= s->mv_type;
3887     d->mv_dir= s->mv_dir;
3888     d->pb= s->pb;
3889     if(s->data_partitioning){
3890         d->pb2= s->pb2;
3891         d->tex_pb= s->tex_pb;
3892     }
3893     d->block= s->block;
3894     for(i=0; i<6; i++)
3895         d->block_last_index[i]= s->block_last_index[i];
3896     d->interlaced_dct= s->interlaced_dct;
3897     d->qscale= s->qscale;
3898 }
3899
3900 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type, 
3901                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
3902                            int *dmin, int *next_block, int motion_x, int motion_y)
3903 {
3904     int score;
3905     uint8_t *dest_backup[3];
3906     
3907     copy_context_before_encode(s, backup, type);
3908
3909     s->block= s->blocks[*next_block];
3910     s->pb= pb[*next_block];
3911     if(s->data_partitioning){
3912         s->pb2   = pb2   [*next_block];
3913         s->tex_pb= tex_pb[*next_block];
3914     }
3915     
3916     if(*next_block){
3917         memcpy(dest_backup, s->dest, sizeof(s->dest));
3918         s->dest[0] = s->rd_scratchpad;
3919         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
3920         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
3921         assert(s->linesize >= 32); //FIXME
3922     }
3923
3924     encode_mb(s, motion_x, motion_y);
3925     
3926     score= put_bits_count(&s->pb);
3927     if(s->data_partitioning){
3928         score+= put_bits_count(&s->pb2);
3929         score+= put_bits_count(&s->tex_pb);
3930     }
3931    
3932     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
3933         MPV_decode_mb(s, s->block);
3934
3935         score *= s->lambda2;
3936         score += sse_mb(s) << FF_LAMBDA_SHIFT;
3937     }
3938     
3939     if(*next_block){
3940         memcpy(s->dest, dest_backup, sizeof(s->dest));
3941     }
3942
3943     if(score<*dmin){
3944         *dmin= score;
3945         *next_block^=1;
3946
3947         copy_context_after_encode(best, s, type);
3948     }
3949 }
3950                 
3951 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
3952     uint32_t *sq = squareTbl + 256;
3953     int acc=0;
3954     int x,y;
3955     
3956     if(w==16 && h==16) 
3957         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
3958     else if(w==8 && h==8)
3959         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
3960     
3961     for(y=0; y<h; y++){
3962         for(x=0; x<w; x++){
3963             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
3964         } 
3965     }
3966     
3967     assert(acc>=0);
3968     
3969     return acc;
3970 }
3971
3972 static int sse_mb(MpegEncContext *s){
3973     int w= 16;
3974     int h= 16;
3975
3976     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3977     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3978
3979     if(w==16 && h==16)
3980       if(s->avctx->mb_cmp == FF_CMP_NSSE){
3981         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
3982                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
3983                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
3984       }else{
3985         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
3986                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
3987                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
3988       }
3989     else
3990         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
3991                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
3992                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
3993 }
3994
3995 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
3996     MpegEncContext *s= arg;
3997
3998     
3999     s->me.pre_pass=1;
4000     s->me.dia_size= s->avctx->pre_dia_size;
4001     s->first_slice_line=1;
4002     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4003         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4004             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4005         }
4006         s->first_slice_line=0;
4007     }
4008     
4009     s->me.pre_pass=0;
4010     
4011     return 0;
4012 }
4013
4014 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4015     MpegEncContext *s= arg;
4016
4017     s->me.dia_size= s->avctx->dia_size;
4018     s->first_slice_line=1;
4019     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4020         s->mb_x=0; //for block init below
4021         ff_init_block_index(s);
4022         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4023             s->block_index[0]+=2;
4024             s->block_index[1]+=2;
4025             s->block_index[2]+=2;
4026             s->block_index[3]+=2;
4027             
4028             /* compute motion vector & mb_type and store in context */
4029             if(s->pict_type==B_TYPE)
4030                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4031             else
4032                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4033         }
4034         s->first_slice_line=0;
4035     }
4036     return 0;
4037 }
4038
4039 static int mb_var_thread(AVCodecContext *c, void *arg){
4040     MpegEncContext *s= arg;
4041     int mb_x, mb_y;
4042
4043     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4044         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4045             int xx = mb_x * 16;
4046             int yy = mb_y * 16;
4047             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4048             int varc;
4049             int sum = s->dsp.pix_sum(pix, s->linesize);
4050     
4051             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4052
4053             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4054             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4055             s->me.mb_var_sum_temp    += varc;
4056         }
4057     }
4058     return 0;
4059 }
4060
4061 static void write_slice_end(MpegEncContext *s){
4062     if(s->codec_id==CODEC_ID_MPEG4){
4063         if(s->partitioned_frame){
4064             ff_mpeg4_merge_partitions(s);
4065         }
4066     
4067         ff_mpeg4_stuffing(&s->pb);
4068     }else if(s->out_format == FMT_MJPEG){
4069         ff_mjpeg_stuffing(&s->pb);
4070     }
4071
4072     align_put_bits(&s->pb);
4073     flush_put_bits(&s->pb);
4074 }
4075
4076 static int encode_thread(AVCodecContext *c, void *arg){
4077     MpegEncContext *s= arg;
4078     int mb_x, mb_y, pdif = 0;
4079     int i, j;
4080     MpegEncContext best_s, backup_s;
4081     uint8_t bit_buf[2][3000];
4082     uint8_t bit_buf2[2][3000];
4083     uint8_t bit_buf_tex[2][3000];
4084     PutBitContext pb[2], pb2[2], tex_pb[2];
4085 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4086
4087     for(i=0; i<2; i++){
4088         init_put_bits(&pb    [i], bit_buf    [i], 3000);
4089         init_put_bits(&pb2   [i], bit_buf2   [i], 3000);
4090         init_put_bits(&tex_pb[i], bit_buf_tex[i], 3000);
4091     }
4092
4093     s->last_bits= put_bits_count(&s->pb);
4094     s->mv_bits=0;
4095     s->misc_bits=0;
4096     s->i_tex_bits=0;
4097     s->p_tex_bits=0;
4098     s->i_count=0;
4099     s->f_count=0;
4100     s->b_count=0;
4101     s->skip_count=0;
4102
4103     for(i=0; i<3; i++){
4104         /* init last dc values */
4105         /* note: quant matrix value (8) is implied here */
4106         s->last_dc[i] = 128 << s->intra_dc_precision;
4107         
4108         s->current_picture_ptr->error[i] = 0;
4109     }
4110     s->mb_skip_run = 0;
4111     memset(s->last_mv, 0, sizeof(s->last_mv));
4112      
4113     s->last_mv_dir = 0;
4114
4115 #ifdef CONFIG_RISKY
4116     switch(s->codec_id){
4117     case CODEC_ID_H263:
4118     case CODEC_ID_H263P:
4119     case CODEC_ID_FLV1:
4120         s->gob_index = ff_h263_get_gob_height(s);
4121         break;
4122     case CODEC_ID_MPEG4:
4123         if(s->partitioned_frame)
4124             ff_mpeg4_init_partitions(s);
4125         break;
4126     }
4127 #endif
4128
4129     s->resync_mb_x=0;
4130     s->resync_mb_y=0; 
4131     s->first_slice_line = 1;
4132     s->ptr_lastgob = s->pb.buf;
4133     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4134 //    printf("row %d at %X\n", s->mb_y, (int)s);
4135         s->mb_x=0;
4136         s->mb_y= mb_y;
4137
4138         ff_set_qscale(s, s->qscale);
4139         ff_init_block_index(s);
4140         
4141         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4142             const int xy= mb_y*s->mb_stride + mb_x;
4143             int mb_type= s->mb_type[xy];
4144 //            int d;
4145             int dmin= INT_MAX;
4146             int dir;
4147
4148             s->mb_x = mb_x;
4149             ff_update_block_index(s);
4150
4151             /* write gob / video packet header  */
4152 #ifdef CONFIG_RISKY
4153             if(s->rtp_mode){
4154                 int current_packet_size, is_gob_start;
4155                 
4156                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4157                 
4158                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0; 
4159                 
4160                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4161                 
4162                 switch(s->codec_id){
4163                 case CODEC_ID_H263:
4164                 case CODEC_ID_H263P:
4165                     if(!s->h263_slice_structured)
4166                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
4167                     break;
4168                 case CODEC_ID_MPEG2VIDEO:
4169                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
4170                 case CODEC_ID_MPEG1VIDEO:
4171                     if(s->mb_skip_run) is_gob_start=0;
4172                     break;
4173                 }
4174
4175                 if(is_gob_start){
4176                     if(s->start_mb_y != mb_y || mb_x!=0){
4177                         write_slice_end(s);
4178
4179                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
4180                             ff_mpeg4_init_partitions(s);
4181                         }
4182                     }
4183                 
4184                     assert((put_bits_count(&s->pb)&7) == 0);
4185                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
4186                     
4187                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
4188                         int r= put_bits_count(&s->pb)/8 + s->picture_number + s->codec_id + s->mb_x + s->mb_y;
4189                         int d= 100 / s->avctx->error_rate;
4190                         if(r % d == 0){
4191                             current_packet_size=0;
4192 #ifndef ALT_BITSTREAM_WRITER
4193                             s->pb.buf_ptr= s->ptr_lastgob;
4194 #endif
4195                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
4196                         }
4197                     }
4198         
4199                     if (s->avctx->rtp_callback)
4200                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, 0);
4201                     
4202                     switch(s->codec_id){
4203                     case CODEC_ID_MPEG4:
4204                         ff_mpeg4_encode_video_packet_header(s);
4205                         ff_mpeg4_clean_buffers(s);
4206                     break;
4207                     case CODEC_ID_MPEG1VIDEO:
4208                     case CODEC_ID_MPEG2VIDEO:
4209                         ff_mpeg1_encode_slice_header(s);
4210                         ff_mpeg1_clean_buffers(s);
4211                     break;
4212                     case CODEC_ID_H263:
4213                     case CODEC_ID_H263P:
4214                         h263_encode_gob_header(s, mb_y);                       
4215                     break;
4216                     }
4217
4218                     if(s->flags&CODEC_FLAG_PASS1){
4219                         int bits= put_bits_count(&s->pb);
4220                         s->misc_bits+= bits - s->last_bits;
4221                         s->last_bits= bits;
4222                     }
4223     
4224                     s->ptr_lastgob += current_packet_size;
4225                     s->first_slice_line=1;
4226                     s->resync_mb_x=mb_x;
4227                     s->resync_mb_y=mb_y;
4228                 }
4229             }
4230 #endif
4231
4232             if(  (s->resync_mb_x   == s->mb_x)
4233                && s->resync_mb_y+1 == s->mb_y){
4234                 s->first_slice_line=0; 
4235             }
4236
4237             s->mb_skiped=0;
4238             s->dquant=0; //only for QP_RD
4239
4240             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
4241                 int next_block=0;
4242                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
4243
4244                 copy_context_before_encode(&backup_s, s, -1);
4245                 backup_s.pb= s->pb;
4246                 best_s.data_partitioning= s->data_partitioning;
4247                 best_s.partitioned_frame= s->partitioned_frame;
4248                 if(s->data_partitioning){
4249                     backup_s.pb2= s->pb2;
4250                     backup_s.tex_pb= s->tex_pb;
4251                 }
4252
4253                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
4254                     s->mv_dir = MV_DIR_FORWARD;
4255                     s->mv_type = MV_TYPE_16X16;
4256                     s->mb_intra= 0;
4257                     s->mv[0][0][0] = s->p_mv_table[xy][0];
4258                     s->mv[0][0][1] = s->p_mv_table[xy][1];
4259                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb, 
4260                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4261                 }
4262                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){ 
4263                     s->mv_dir = MV_DIR_FORWARD;
4264                     s->mv_type = MV_TYPE_FIELD;
4265                     s->mb_intra= 0;
4266                     for(i=0; i<2; i++){
4267                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
4268                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
4269                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
4270                     }
4271                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb, 
4272                                  &dmin, &next_block, 0, 0);
4273                 }
4274                 if(mb_type&CANDIDATE_MB_TYPE_SKIPED){
4275                     s->mv_dir = MV_DIR_FORWARD;
4276                     s->mv_type = MV_TYPE_16X16;
4277                     s->mb_intra= 0;
4278                     s->mv[0][0][0] = 0;
4279                     s->mv[0][0][1] = 0;
4280                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPED, pb, pb2, tex_pb, 
4281                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4282                 }
4283                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){                 
4284                     s->mv_dir = MV_DIR_FORWARD;
4285                     s->mv_type = MV_TYPE_8X8;
4286                     s->mb_intra= 0;
4287                     for(i=0; i<4; i++){
4288                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
4289                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
4290                     }
4291                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb, 
4292                                  &dmin, &next_block, 0, 0);
4293                 }
4294                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
4295                     s->mv_dir = MV_DIR_FORWARD;
4296                     s->mv_type = MV_TYPE_16X16;
4297                     s->mb_intra= 0;
4298                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
4299                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
4300                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb, 
4301                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4302                 }
4303                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
4304                     s->mv_dir = MV_DIR_BACKWARD;
4305                     s->mv_type = MV_TYPE_16X16;
4306                     s->mb_intra= 0;
4307                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
4308                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
4309                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb, 
4310                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
4311                 }
4312                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
4313                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4314                     s->mv_type = MV_TYPE_16X16;
4315                     s->mb_intra= 0;
4316                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
4317                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
4318                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
4319                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
4320                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb, 
4321                                  &dmin, &next_block, 0, 0);
4322                 }
4323                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
4324                     int mx= s->b_direct_mv_table[xy][0];
4325                     int my= s->b_direct_mv_table[xy][1];
4326                     
4327                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
4328                     s->mb_intra= 0;
4329 #ifdef CONFIG_RISKY
4330                     ff_mpeg4_set_direct_mv(s, mx, my);
4331 #endif
4332                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb, 
4333                                  &dmin, &next_block, mx, my);
4334                 }
4335                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){ 
4336                     s->mv_dir = MV_DIR_FORWARD;
4337                     s->mv_type = MV_TYPE_FIELD;
4338                     s->mb_intra= 0;
4339                     for(i=0; i<2; i++){
4340                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
4341                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
4342                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
4343                     }
4344                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb, 
4345                                  &dmin, &next_block, 0, 0);
4346                 }
4347                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){ 
4348                     s->mv_dir = MV_DIR_BACKWARD;
4349                     s->mv_type = MV_TYPE_FIELD;
4350                     s->mb_intra= 0;
4351                     for(i=0; i<2; i++){
4352                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
4353                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
4354                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
4355                     }
4356                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb, 
4357                                  &dmin, &next_block, 0, 0);
4358                 }
4359                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){ 
4360                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4361                     s->mv_type = MV_TYPE_FIELD;
4362                     s->mb_intra= 0;
4363                     for(dir=0; dir<2; dir++){
4364                         for(i=0; i<2; i++){
4365                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
4366                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
4367                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
4368                         }
4369                     }
4370                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb, 
4371                                  &dmin, &next_block, 0, 0);
4372                 }
4373                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
4374                     s->mv_dir = 0;
4375                     s->mv_type = MV_TYPE_16X16;
4376                     s->mb_intra= 1;
4377                     s->mv[0][0][0] = 0;
4378                     s->mv[0][0][1] = 0;
4379                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb, 
4380                                  &dmin, &next_block, 0, 0);
4381                     if(s->h263_pred || s->h263_aic){
4382                         if(best_s.mb_intra)
4383                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
4384                         else
4385                             ff_clean_intra_table_entries(s); //old mode?
4386                     }
4387                 }
4388
4389                 if(s->flags & CODEC_FLAG_QP_RD){
4390                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
4391                         const int last_qp= backup_s.qscale;
4392                         int dquant, dir, qp, dc[6];
4393                         DCTELEM ac[6][16];
4394                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
4395                         
4396                         assert(backup_s.dquant == 0);
4397
4398                         //FIXME intra
4399                         s->mv_dir= best_s.mv_dir;
4400                         s->mv_type = MV_TYPE_16X16;
4401                         s->mb_intra= best_s.mb_intra;
4402                         s->mv[0][0][0] = best_s.mv[0][0][0];
4403                         s->mv[0][0][1] = best_s.mv[0][0][1];
4404                         s->mv[1][0][0] = best_s.mv[1][0][0];
4405                         s->mv[1][0][1] = best_s.mv[1][0][1];
4406                         
4407                         dir= s->pict_type == B_TYPE ? 2 : 1;
4408                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
4409                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
4410                             qp= last_qp + dquant;
4411                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
4412                                 break;
4413                             backup_s.dquant= dquant;
4414                             if(s->mb_intra && s->dc_val[0]){
4415                                 for(i=0; i<6; i++){
4416                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
4417                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
4418                                 }
4419                             }
4420
4421                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb, 
4422                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
4423                             if(best_s.qscale != qp){
4424                                 if(s->mb_intra && s->dc_val[0]){
4425                                     for(i=0; i<6; i++){
4426                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
4427                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
4428                                     }
4429                                 }
4430                                 if(dir > 0 && dquant==dir){
4431                                     dquant= 0;
4432                                     dir= -dir;
4433                                 }else
4434                                     break;
4435                             }
4436                         }
4437                         qp= best_s.qscale;
4438                         s->current_picture.qscale_table[xy]= qp;
4439                     }
4440                 }
4441
4442                 copy_context_after_encode(s, &best_s, -1);
4443                 
4444                 pb_bits_count= put_bits_count(&s->pb);
4445                 flush_put_bits(&s->pb);
4446                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
4447                 s->pb= backup_s.pb;
4448                 
4449                 if(s->data_partitioning){
4450                     pb2_bits_count= put_bits_count(&s->pb2);
4451                     flush_put_bits(&s->pb2);
4452                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
4453                     s->pb2= backup_s.pb2;
4454                     
4455                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
4456                     flush_put_bits(&s->tex_pb);
4457                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
4458                     s->tex_pb= backup_s.tex_pb;
4459                 }
4460                 s->last_bits= put_bits_count(&s->pb);
4461                
4462 #ifdef CONFIG_RISKY
4463                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
4464                     ff_h263_update_motion_val(s);
4465 #endif
4466         
4467                 if(next_block==0){ //FIXME 16 vs linesize16
4468                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
4469                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
4470                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
4471                 }
4472
4473                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
4474                     MPV_decode_mb(s, s->block);
4475             } else {
4476                 int motion_x, motion_y;
4477                 s->mv_type=MV_TYPE_16X16;
4478                 // only one MB-Type possible
4479                 
4480                 switch(mb_type){
4481                 case CANDIDATE_MB_TYPE_INTRA:
4482                     s->mv_dir = 0;
4483                     s->mb_intra= 1;
4484                     motion_x= s->mv[0][0][0] = 0;
4485                     motion_y= s->mv[0][0][1] = 0;
4486                     break;
4487                 case CANDIDATE_MB_TYPE_INTER:
4488                     s->mv_dir = MV_DIR_FORWARD;
4489                     s->mb_intra= 0;
4490                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
4491                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
4492                     break;
4493                 case CANDIDATE_MB_TYPE_INTER_I:
4494                     s->mv_dir = MV_DIR_FORWARD;
4495                     s->mv_type = MV_TYPE_FIELD;
4496                     s->mb_intra= 0;
4497                     for(i=0; i<2; i++){
4498                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
4499                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
4500                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
4501                     }
4502                     motion_x = motion_y = 0;
4503                     break;
4504                 case CANDIDATE_MB_TYPE_INTER4V:
4505                     s->mv_dir = MV_DIR_FORWARD;
4506                     s->mv_type = MV_TYPE_8X8;
4507                     s->mb_intra= 0;
4508                     for(i=0; i<4; i++){
4509                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
4510                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
4511                     }
4512                     motion_x= motion_y= 0;
4513                     break;
4514                 case CANDIDATE_MB_TYPE_DIRECT:
4515                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
4516                     s->mb_intra= 0;
4517                     motion_x=s->b_direct_mv_table[xy][0];
4518                     motion_y=s->b_direct_mv_table[xy][1];
4519 #ifdef CONFIG_RISKY
4520                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
4521 #endif
4522                     break;
4523                 case CANDIDATE_MB_TYPE_BIDIR:
4524                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4525                     s->mb_intra= 0;
4526                     motion_x=0;
4527                     motion_y=0;
4528                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
4529                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
4530                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
4531                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
4532                     break;
4533                 case CANDIDATE_MB_TYPE_BACKWARD:
4534                     s->mv_dir = MV_DIR_BACKWARD;
4535                     s->mb_intra= 0;
4536                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
4537                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
4538                     break;
4539                 case CANDIDATE_MB_TYPE_FORWARD:
4540                     s->mv_dir = MV_DIR_FORWARD;
4541                     s->mb_intra= 0;
4542                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
4543                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
4544 //                    printf(" %d %d ", motion_x, motion_y);
4545                     break;
4546                 case CANDIDATE_MB_TYPE_FORWARD_I:
4547                     s->mv_dir = MV_DIR_FORWARD;
4548                     s->mv_type = MV_TYPE_FIELD;
4549                     s->mb_intra= 0;
4550                     for(i=0; i<2; i++){
4551                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
4552                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
4553                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
4554                     }
4555                     motion_x=motion_y=0;
4556                     break;
4557                 case CANDIDATE_MB_TYPE_BACKWARD_I:
4558                     s->mv_dir = MV_DIR_BACKWARD;
4559                     s->mv_type = MV_TYPE_FIELD;
4560                     s->mb_intra= 0;
4561                     for(i=0; i<2; i++){
4562                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
4563                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
4564                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
4565                     }
4566                     motion_x=motion_y=0;
4567                     break;
4568                 case CANDIDATE_MB_TYPE_BIDIR_I:
4569                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4570                     s->mv_type = MV_TYPE_FIELD;
4571                     s->mb_intra= 0;
4572                     for(dir=0; dir<2; dir++){
4573                         for(i=0; i<2; i++){
4574                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
4575                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
4576                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
4577                         }
4578                     }
4579                     motion_x=motion_y=0;
4580                     break;
4581                 default:
4582                     motion_x=motion_y=0; //gcc warning fix
4583                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
4584                 }
4585
4586                 encode_mb(s, motion_x, motion_y);
4587
4588                 // RAL: Update last macrobloc type
4589                 s->last_mv_dir = s->mv_dir;
4590             
4591 #ifdef CONFIG_RISKY
4592                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
4593                     ff_h263_update_motion_val(s);
4594 #endif
4595                 
4596                 MPV_decode_mb(s, s->block);
4597             }
4598
4599             /* clean the MV table in IPS frames for direct mode in B frames */
4600             if(s->mb_intra /* && I,P,S_TYPE */){
4601                 s->p_mv_table[xy][0]=0;
4602                 s->p_mv_table[xy][1]=0;
4603             }
4604             
4605             if(s->flags&CODEC_FLAG_PSNR){
4606                 int w= 16;
4607                 int h= 16;
4608
4609                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4610                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4611
4612                 s->current_picture_ptr->error[0] += sse(
4613                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
4614                     s->dest[0], w, h, s->linesize);
4615                 s->current_picture_ptr->error[1] += sse(
4616                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
4617                     s->dest[1], w>>1, h>>1, s->uvlinesize);
4618                 s->current_picture_ptr->error[2] += sse(
4619                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
4620                     s->dest[2], w>>1, h>>1, s->uvlinesize);
4621             }
4622             if(s->loop_filter)
4623                 ff_h263_loop_filter(s);
4624 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
4625         }
4626     }
4627
4628 #ifdef CONFIG_RISKY
4629     //not beautifull here but we must write it before flushing so it has to be here
4630     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
4631         msmpeg4_encode_ext_header(s);
4632 #endif
4633
4634     write_slice_end(s);
4635
4636     /* Send the last GOB if RTP */    
4637     if (s->avctx->rtp_callback) {
4638         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
4639         /* Call the RTP callback to send the last GOB */
4640         emms_c();
4641         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, 0);
4642     }
4643
4644     return 0;
4645 }
4646
4647 #define MERGE(field) dst->field += src->field; src->field=0
4648 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
4649     MERGE(me.scene_change_score);
4650     MERGE(me.mc_mb_var_sum_temp);
4651     MERGE(me.mb_var_sum_temp);
4652 }
4653
4654 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
4655     int i;
4656
4657     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
4658     MERGE(dct_count[1]);
4659     MERGE(mv_bits);
4660     MERGE(i_tex_bits);
4661     MERGE(p_tex_bits);
4662     MERGE(i_count);
4663     MERGE(f_count);
4664     MERGE(b_count);
4665     MERGE(skip_count);
4666     MERGE(misc_bits);
4667     MERGE(error_count);
4668     MERGE(padding_bug_score);
4669
4670     if(dst->avctx->noise_reduction){
4671         for(i=0; i<64; i++){
4672             MERGE(dct_error_sum[0][i]);
4673             MERGE(dct_error_sum[1][i]);
4674         }
4675     }
4676     
4677     assert(put_bits_count(&src->pb) % 8 ==0);
4678     assert(put_bits_count(&dst->pb) % 8 ==0);
4679     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
4680     flush_put_bits(&dst->pb);
4681 }
4682
4683 static void encode_picture(MpegEncContext *s, int picture_number)
4684 {
4685     int i;
4686     int bits;
4687
4688     s->picture_number = picture_number;
4689     
4690     /* Reset the average MB variance */
4691     s->me.mb_var_sum_temp    =
4692     s->me.mc_mb_var_sum_temp = 0;
4693
4694 #ifdef CONFIG_RISKY
4695     /* we need to initialize some time vars before we can encode b-frames */
4696     // RAL: Condition added for MPEG1VIDEO
4697     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
4698         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
4699 #endif
4700         
4701     s->me.scene_change_score=0;
4702     
4703 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
4704     
4705     if(s->pict_type==I_TYPE){
4706         if(s->msmpeg4_version >= 3) s->no_rounding=1;
4707         else                        s->no_rounding=0;
4708     }else if(s->pict_type!=B_TYPE){
4709         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
4710             s->no_rounding ^= 1;          
4711     }
4712     
4713     s->mb_intra=0; //for the rate distoration & bit compare functions
4714     for(i=1; i<s->avctx->thread_count; i++){
4715         ff_update_duplicate_context(s->thread_context[i], s);
4716     }
4717
4718     ff_init_me(s);
4719
4720     /* Estimate motion for every MB */
4721     if(s->pict_type != I_TYPE){
4722         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
4723             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
4724                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
4725             }
4726         }
4727
4728         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
4729     }else /* if(s->pict_type == I_TYPE) */{
4730         /* I-Frame */
4731         for(i=0; i<s->mb_stride*s->mb_height; i++)
4732             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
4733         
4734         if(!s->fixed_qscale){
4735             /* finding spatial complexity for I-frame rate control */
4736             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
4737         }
4738     }
4739     for(i=1; i<s->avctx->thread_count; i++){
4740         merge_context_after_me(s, s->thread_context[i]);
4741     }
4742     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
4743     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
4744     emms_c();
4745
4746     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
4747         s->pict_type= I_TYPE;
4748         for(i=0; i<s->mb_stride*s->mb_height; i++)
4749             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
4750 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
4751     }
4752
4753     if(!s->umvplus){
4754         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
4755             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
4756
4757             if(s->flags & CODEC_FLAG_INTERLACED_ME){
4758                 int a,b;
4759                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
4760                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
4761                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
4762             }
4763                     
4764             ff_fix_long_p_mvs(s);
4765             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
4766             if(s->flags & CODEC_FLAG_INTERLACED_ME){
4767                 int j;
4768                 for(i=0; i<2; i++){
4769                     for(j=0; j<2; j++)
4770                         ff_fix_long_mvs(s, s->p_field_select_table[i], j, 
4771                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
4772                 }
4773             }
4774         }
4775
4776         if(s->pict_type==B_TYPE){
4777             int a, b;
4778
4779             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
4780             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
4781             s->f_code = FFMAX(a, b);
4782
4783             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
4784             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
4785             s->b_code = FFMAX(a, b);
4786
4787             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
4788             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
4789             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
4790             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
4791             if(s->flags & CODEC_FLAG_INTERLACED_ME){
4792                 int dir, j;
4793                 for(dir=0; dir<2; dir++){
4794                     for(i=0; i<2; i++){
4795                         for(j=0; j<2; j++){
4796                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I) 
4797                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
4798                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j, 
4799                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
4800                         }
4801                     }
4802                 }
4803             }
4804         }
4805     }
4806
4807     if (!s->fixed_qscale) 
4808         s->current_picture.quality = ff_rate_estimate_qscale(s); //FIXME pic_ptr
4809
4810     if(s->adaptive_quant){
4811 #ifdef CONFIG_RISKY
4812         switch(s->codec_id){
4813         case CODEC_ID_MPEG4:
4814             ff_clean_mpeg4_qscales(s);
4815             break;
4816         case CODEC_ID_H263:
4817         case CODEC_ID_H263P:
4818         case CODEC_ID_FLV1:
4819             ff_clean_h263_qscales(s);
4820             break;
4821         }
4822 #endif
4823
4824         s->lambda= s->lambda_table[0];
4825         //FIXME broken
4826     }else
4827         s->lambda= s->current_picture.quality;
4828 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
4829     update_qscale(s);
4830     
4831     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE)) 
4832         s->qscale= 3; //reduce cliping problems
4833         
4834     if (s->out_format == FMT_MJPEG) {
4835         /* for mjpeg, we do include qscale in the matrix */
4836         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
4837         for(i=1;i<64;i++){
4838             int j= s->dsp.idct_permutation[i];
4839
4840             s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
4841         }
4842         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, 
4843                        s->intra_matrix, s->intra_quant_bias, 8, 8);
4844         s->qscale= 8;
4845     }
4846     
4847     //FIXME var duplication
4848     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
4849     s->current_picture.pict_type= s->pict_type;
4850
4851     if(s->current_picture.key_frame)
4852         s->picture_in_gop_number=0;
4853
4854     s->last_bits= put_bits_count(&s->pb);
4855     switch(s->out_format) {
4856     case FMT_MJPEG:
4857         mjpeg_picture_header(s);
4858         break;
4859 #ifdef CONFIG_RISKY
4860     case FMT_H263:
4861         if (s->codec_id == CODEC_ID_WMV2) 
4862             ff_wmv2_encode_picture_header(s, picture_number);
4863         else if (s->h263_msmpeg4) 
4864             msmpeg4_encode_picture_header(s, picture_number);
4865         else if (s->h263_pred)
4866             mpeg4_encode_picture_header(s, picture_number);
4867         else if (s->codec_id == CODEC_ID_RV10) 
4868             rv10_encode_picture_header(s, picture_number);
4869         else if (s->codec_id == CODEC_ID_FLV1)
4870             ff_flv_encode_picture_header(s, picture_number);
4871         else
4872             h263_encode_picture_header(s, picture_number);
4873         break;
4874 #endif
4875     case FMT_MPEG1:
4876         mpeg1_encode_picture_header(s, picture_number);
4877         break;
4878     case FMT_H264:
4879         break;
4880     default:
4881         assert(0);
4882     }
4883     bits= put_bits_count(&s->pb);
4884     s->header_bits= bits - s->last_bits;
4885         
4886     for(i=1; i<s->avctx->thread_count; i++){
4887         update_duplicate_context_after_me(s->thread_context[i], s);
4888     }
4889     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
4890     for(i=1; i<s->avctx->thread_count; i++){
4891         merge_context_after_encode(s, s->thread_context[i]);
4892     }
4893     emms_c();
4894 }
4895
4896 #endif //CONFIG_ENCODERS
4897
4898 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
4899     const int intra= s->mb_intra;
4900     int i;
4901
4902     s->dct_count[intra]++;
4903
4904     for(i=0; i<64; i++){
4905         int level= block[i];
4906
4907         if(level){
4908             if(level>0){
4909                 s->dct_error_sum[intra][i] += level;
4910                 level -= s->dct_offset[intra][i];
4911                 if(level<0) level=0;
4912             }else{
4913                 s->dct_error_sum[intra][i] -= level;
4914                 level += s->dct_offset[intra][i];
4915                 if(level>0) level=0;
4916             }
4917             block[i]= level;
4918         }
4919     }
4920 }
4921
4922 #ifdef CONFIG_ENCODERS
4923
4924 static int dct_quantize_trellis_c(MpegEncContext *s, 
4925                         DCTELEM *block, int n,
4926                         int qscale, int *overflow){
4927     const int *qmat;
4928     const uint8_t *scantable= s->intra_scantable.scantable;
4929     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4930     int max=0;
4931     unsigned int threshold1, threshold2;
4932     int bias=0;
4933     int run_tab[65];
4934     int level_tab[65];
4935     int score_tab[65];
4936     int survivor[65];
4937     int survivor_count;
4938     int last_run=0;
4939     int last_level=0;
4940     int last_score= 0;
4941     int last_i;
4942     int coeff[2][64];
4943     int coeff_count[64];
4944     int qmul, qadd, start_i, last_non_zero, i, dc;
4945     const int esc_length= s->ac_esc_length;
4946     uint8_t * length;
4947     uint8_t * last_length;
4948     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
4949         
4950     s->dsp.fdct (block);
4951     
4952     if(s->dct_error_sum)
4953         s->denoise_dct(s, block);
4954     qmul= qscale*16;
4955     qadd= ((qscale-1)|1)*8;
4956
4957     if (s->mb_intra) {
4958         int q;
4959         if (!s->h263_aic) {
4960             if (n < 4)
4961                 q = s->y_dc_scale;
4962             else
4963                 q = s->c_dc_scale;
4964             q = q << 3;
4965         } else{
4966             /* For AIC we skip quant/dequant of INTRADC */
4967             q = 1 << 3;
4968             qadd=0;
4969         }
4970             
4971         /* note: block[0] is assumed to be positive */
4972         block[0] = (block[0] + (q >> 1)) / q;
4973         start_i = 1;
4974         last_non_zero = 0;
4975         qmat = s->q_intra_matrix[qscale];
4976         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4977             bias= 1<<(QMAT_SHIFT-1);
4978         length     = s->intra_ac_vlc_length;
4979         last_length= s->intra_ac_vlc_last_length;
4980     } else {
4981         start_i = 0;
4982         last_non_zero = -1;
4983         qmat = s->q_inter_matrix[qscale];
4984         length     = s->inter_ac_vlc_length;
4985         last_length= s->inter_ac_vlc_last_length;
4986     }
4987     last_i= start_i;
4988
4989     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4990     threshold2= (threshold1<<1);
4991
4992     for(i=63; i>=start_i; i--) {
4993         const int j = scantable[i];
4994         int level = block[j] * qmat[j];
4995
4996         if(((unsigned)(level+threshold1))>threshold2){
4997             last_non_zero = i;
4998             break;
4999         }
5000     }
5001
5002     for(i=start_i; i<=last_non_zero; i++) {
5003         const int j = scantable[i];
5004         int level = block[j] * qmat[j];
5005
5006 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5007 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5008         if(((unsigned)(level+threshold1))>threshold2){
5009             if(level>0){
5010                 level= (bias + level)>>QMAT_SHIFT;
5011                 coeff[0][i]= level;
5012                 coeff[1][i]= level-1;
5013 //                coeff[2][k]= level-2;
5014             }else{
5015                 level= (bias - level)>>QMAT_SHIFT;
5016                 coeff[0][i]= -level;
5017                 coeff[1][i]= -level+1;
5018 //                coeff[2][k]= -level+2;
5019             }
5020             coeff_count[i]= FFMIN(level, 2);
5021             assert(coeff_count[i]);
5022             max |=level;
5023         }else{
5024             coeff[0][i]= (level>>31)|1;
5025             coeff_count[i]= 1;
5026         }
5027     }
5028     
5029     *overflow= s->max_qcoeff < max; //overflow might have happend
5030     
5031     if(last_non_zero < start_i){
5032         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5033         return last_non_zero;
5034     }
5035
5036     score_tab[start_i]= 0;
5037     survivor[0]= start_i;
5038     survivor_count= 1;
5039     
5040     for(i=start_i; i<=last_non_zero; i++){
5041         int level_index, j;
5042         const int dct_coeff= ABS(block[ scantable[i] ]);
5043         const int zero_distoration= dct_coeff*dct_coeff;
5044         int best_score=256*256*256*120;
5045         for(level_index=0; level_index < coeff_count[i]; level_index++){
5046             int distoration;
5047             int level= coeff[level_index][i];
5048             const int alevel= ABS(level);
5049             int unquant_coeff;
5050             
5051             assert(level);
5052
5053             if(s->out_format == FMT_H263){
5054                 unquant_coeff= alevel*qmul + qadd;
5055             }else{ //MPEG1
5056                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5057                 if(s->mb_intra){
5058                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5059                         unquant_coeff =   (unquant_coeff - 1) | 1;
5060                 }else{
5061                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5062                         unquant_coeff =   (unquant_coeff - 1) | 1;
5063                 }
5064                 unquant_coeff<<= 3;
5065             }
5066
5067             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5068             level+=64;
5069             if((level&(~127)) == 0){
5070                 for(j=survivor_count-1; j>=0; j--){
5071                     int run= i - survivor[j];
5072                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5073                     score += score_tab[i-run];
5074                     
5075                     if(score < best_score){
5076                         best_score= score;
5077                         run_tab[i+1]= run;
5078                         level_tab[i+1]= level-64;
5079                     }
5080                 }
5081
5082                 if(s->out_format == FMT_H263){
5083                     for(j=survivor_count-1; j>=0; j--){
5084                         int run= i - survivor[j];
5085                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5086                         score += score_tab[i-run];
5087                         if(score < last_score){
5088                             last_score= score;
5089                             last_run= run;
5090                             last_level= level-64;
5091                             last_i= i+1;
5092                         }
5093                     }
5094                 }
5095             }else{
5096                 distoration += esc_length*lambda;
5097                 for(j=survivor_count-1; j>=0; j--){
5098                     int run= i - survivor[j];
5099                     int score= distoration + score_tab[i-run];
5100                     
5101                     if(score < best_score){
5102                         best_score= score;
5103                         run_tab[i+1]= run;
5104                         level_tab[i+1]= level-64;
5105                     }
5106                 }
5107
5108                 if(s->out_format == FMT_H263){
5109                   for(j=survivor_count-1; j>=0; j--){
5110                         int run= i - survivor[j];
5111                         int score= distoration + score_tab[i-run];
5112                         if(score < last_score){
5113                             last_score= score;
5114                             last_run= run;
5115                             last_level= level-64;
5116                             last_i= i+1;
5117                         }
5118                     }
5119                 }
5120             }
5121         }
5122         
5123         score_tab[i+1]= best_score;
5124
5125         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5126         if(last_non_zero <= 27){
5127             for(; survivor_count; survivor_count--){
5128                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5129                     break;
5130             }
5131         }else{
5132             for(; survivor_count; survivor_count--){
5133                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5134                     break;
5135             }
5136         }
5137
5138         survivor[ survivor_count++ ]= i+1;
5139     }
5140
5141     if(s->out_format != FMT_H263){
5142         last_score= 256*256*256*120;
5143         for(i= survivor[0]; i<=last_non_zero + 1; i++){
5144             int score= score_tab[i];
5145             if(i) score += lambda*2; //FIXME exacter?
5146
5147             if(score < last_score){
5148                 last_score= score;
5149                 last_i= i;
5150                 last_level= level_tab[i];
5151                 last_run= run_tab[i];
5152             }
5153         }
5154     }
5155
5156     s->coded_score[n] = last_score;
5157     
5158     dc= ABS(block[0]);
5159     last_non_zero= last_i - 1;
5160     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5161     
5162     if(last_non_zero < start_i)
5163         return last_non_zero;
5164
5165     if(last_non_zero == 0 && start_i == 0){
5166         int best_level= 0;
5167         int best_score= dc * dc;
5168         
5169         for(i=0; i<coeff_count[0]; i++){
5170             int level= coeff[i][0];
5171             int alevel= ABS(level);
5172             int unquant_coeff, score, distortion;
5173
5174             if(s->out_format == FMT_H263){
5175                     unquant_coeff= (alevel*qmul + qadd)>>3;
5176             }else{ //MPEG1
5177                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
5178                     unquant_coeff =   (unquant_coeff - 1) | 1;
5179             }
5180             unquant_coeff = (unquant_coeff + 4) >> 3;
5181             unquant_coeff<<= 3 + 3;
5182
5183             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
5184             level+=64;
5185             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
5186             else                    score= distortion + esc_length*lambda;
5187
5188             if(score < best_score){
5189                 best_score= score;
5190                 best_level= level - 64;
5191             }
5192         }
5193         block[0]= best_level;
5194         s->coded_score[n] = best_score - dc*dc;
5195         if(best_level == 0) return -1;
5196         else                return last_non_zero;
5197     }
5198
5199     i= last_i;
5200     assert(last_level);
5201
5202     block[ perm_scantable[last_non_zero] ]= last_level;
5203     i -= last_run + 1;
5204     
5205     for(; i>start_i; i -= run_tab[i] + 1){
5206         block[ perm_scantable[i-1] ]= level_tab[i];
5207     }
5208
5209     return last_non_zero;
5210 }
5211
5212 //#define REFINE_STATS 1
5213 static int16_t basis[64][64];
5214
5215 static void build_basis(uint8_t *perm){
5216     int i, j, x, y;
5217     emms_c();
5218     for(i=0; i<8; i++){
5219         for(j=0; j<8; j++){
5220             for(y=0; y<8; y++){
5221                 for(x=0; x<8; x++){
5222                     double s= 0.25*(1<<BASIS_SHIFT);
5223                     int index= 8*i + j;
5224                     int perm_index= perm[index];
5225                     if(i==0) s*= sqrt(0.5);
5226                     if(j==0) s*= sqrt(0.5);
5227                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
5228                 }
5229             }
5230         }
5231     }
5232 }
5233
5234 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
5235                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
5236                         int n, int qscale){
5237     int16_t rem[64];
5238     DCTELEM d1[64];
5239     const int *qmat;
5240     const uint8_t *scantable= s->intra_scantable.scantable;
5241     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5242 //    unsigned int threshold1, threshold2;
5243 //    int bias=0;
5244     int run_tab[65];
5245     int prev_run=0;
5246     int prev_level=0;
5247     int qmul, qadd, start_i, last_non_zero, i, dc;
5248     uint8_t * length;
5249     uint8_t * last_length;
5250     int lambda;
5251     int rle_index, run, q, sum;
5252 #ifdef REFINE_STATS
5253 static int count=0;
5254 static int after_last=0;
5255 static int to_zero=0;
5256 static int from_zero=0;
5257 static int raise=0;
5258 static int lower=0;
5259 static int messed_sign=0;
5260 #endif
5261
5262     if(basis[0][0] == 0)
5263         build_basis(s->dsp.idct_permutation);
5264     
5265     qmul= qscale*2;
5266     qadd= (qscale-1)|1;
5267     if (s->mb_intra) {
5268         if (!s->h263_aic) {
5269             if (n < 4)
5270                 q = s->y_dc_scale;
5271             else
5272                 q = s->c_dc_scale;
5273         } else{
5274             /* For AIC we skip quant/dequant of INTRADC */
5275             q = 1;
5276             qadd=0;
5277         }
5278         q <<= RECON_SHIFT-3;
5279         /* note: block[0] is assumed to be positive */
5280         dc= block[0]*q;
5281 //        block[0] = (block[0] + (q >> 1)) / q;
5282         start_i = 1;
5283         qmat = s->q_intra_matrix[qscale];
5284 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5285 //            bias= 1<<(QMAT_SHIFT-1);
5286         length     = s->intra_ac_vlc_length;
5287         last_length= s->intra_ac_vlc_last_length;
5288     } else {
5289         dc= 0;
5290         start_i = 0;
5291         qmat = s->q_inter_matrix[qscale];
5292         length     = s->inter_ac_vlc_length;
5293         last_length= s->inter_ac_vlc_last_length;
5294     }
5295     last_non_zero = s->block_last_index[n];
5296
5297 #ifdef REFINE_STATS
5298 {START_TIMER
5299 #endif
5300     dc += (1<<(RECON_SHIFT-1));
5301     for(i=0; i<64; i++){
5302         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly insteadof copying to rem[]
5303     }
5304 #ifdef REFINE_STATS
5305 STOP_TIMER("memset rem[]")}
5306 #endif
5307     sum=0;
5308     for(i=0; i<64; i++){
5309         int one= 36;
5310         int qns=4;
5311         int w;
5312
5313         w= ABS(weight[i]) + qns*one;
5314         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
5315
5316         weight[i] = w;
5317 //        w=weight[i] = (63*qns + (w/2)) / w;
5318          
5319         assert(w>0);
5320         assert(w<(1<<6));
5321         sum += w*w;
5322     }
5323     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
5324 #ifdef REFINE_STATS
5325 {START_TIMER
5326 #endif
5327     run=0;
5328     rle_index=0;
5329     for(i=start_i; i<=last_non_zero; i++){
5330         int j= perm_scantable[i];
5331         const int level= block[j];
5332         int coeff;
5333         
5334         if(level){
5335             if(level<0) coeff= qmul*level - qadd;
5336             else        coeff= qmul*level + qadd;
5337             run_tab[rle_index++]=run;
5338             run=0;
5339
5340             s->dsp.add_8x8basis(rem, basis[j], coeff);
5341         }else{
5342             run++;
5343         }
5344     }
5345 #ifdef REFINE_STATS
5346 if(last_non_zero>0){
5347 STOP_TIMER("init rem[]")
5348 }
5349 }
5350
5351 {START_TIMER
5352 #endif
5353     for(;;){
5354         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
5355         int best_coeff=0;
5356         int best_change=0;
5357         int run2, best_unquant_change=0, analyze_gradient;
5358 #ifdef REFINE_STATS
5359 {START_TIMER
5360 #endif
5361         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
5362
5363         if(analyze_gradient){
5364 #ifdef REFINE_STATS
5365 {START_TIMER
5366 #endif
5367             for(i=0; i<64; i++){
5368                 int w= weight[i];
5369             
5370                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
5371             }
5372 #ifdef REFINE_STATS
5373 STOP_TIMER("rem*w*w")}
5374 {START_TIMER
5375 #endif
5376             s->dsp.fdct(d1);
5377 #ifdef REFINE_STATS
5378 STOP_TIMER("dct")}
5379 #endif
5380         }
5381
5382         if(start_i){
5383             const int level= block[0];
5384             int change, old_coeff;
5385
5386             assert(s->mb_intra);
5387             
5388             old_coeff= q*level;
5389             
5390             for(change=-1; change<=1; change+=2){
5391                 int new_level= level + change;
5392                 int score, new_coeff;
5393                 
5394                 new_coeff= q*new_level;
5395                 if(new_coeff >= 2048 || new_coeff < 0)
5396                     continue;
5397
5398                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
5399                 if(score<best_score){
5400                     best_score= score;
5401                     best_coeff= 0;
5402                     best_change= change;
5403                     best_unquant_change= new_coeff - old_coeff;
5404                 }
5405             }
5406         }
5407         
5408         run=0;
5409         rle_index=0;
5410         run2= run_tab[rle_index++];
5411         prev_level=0;
5412         prev_run=0;
5413
5414         for(i=start_i; i<64; i++){
5415             int j= perm_scantable[i];
5416             const int level= block[j];
5417             int change, old_coeff;
5418
5419             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
5420                 break;
5421
5422             if(level){
5423                 if(level<0) old_coeff= qmul*level - qadd;
5424                 else        old_coeff= qmul*level + qadd;
5425                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
5426             }else{
5427                 old_coeff=0;
5428                 run2--;
5429                 assert(run2>=0 || i >= last_non_zero );
5430             }
5431             
5432             for(change=-1; change<=1; change+=2){
5433                 int new_level= level + change;
5434                 int score, new_coeff, unquant_change;
5435                 
5436                 score=0;
5437                 if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
5438                    continue;
5439
5440                 if(new_level){
5441                     if(new_level<0) new_coeff= qmul*new_level - qadd;
5442                     else            new_coeff= qmul*new_level + qadd;
5443                     if(new_coeff >= 2048 || new_coeff <= -2048)
5444                         continue;
5445                     //FIXME check for overflow
5446                     
5447                     if(level){
5448                         if(level < 63 && level > -63){
5449                             if(i < last_non_zero)
5450                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
5451                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
5452                             else
5453                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
5454                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
5455                         }
5456                     }else{
5457                         assert(ABS(new_level)==1);
5458                         
5459                         if(analyze_gradient){
5460                             int g= d1[ scantable[i] ];
5461                             if(g && (g^new_level) >= 0)
5462                                 continue;
5463                         }
5464
5465                         if(i < last_non_zero){
5466                             int next_i= i + run2 + 1;
5467                             int next_level= block[ perm_scantable[next_i] ] + 64;
5468                             
5469                             if(next_level&(~127))
5470                                 next_level= 0;
5471
5472                             if(next_i < last_non_zero)
5473                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
5474                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
5475                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
5476                             else
5477                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
5478                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
5479                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
5480                         }else{
5481                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
5482                             if(prev_level){
5483                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
5484                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
5485                             }
5486                         }
5487                     }
5488                 }else{
5489                     new_coeff=0;
5490                     assert(ABS(level)==1);
5491
5492                     if(i < last_non_zero){
5493                         int next_i= i + run2 + 1;
5494                         int next_level= block[ perm_scantable[next_i] ] + 64;
5495                             
5496                         if(next_level&(~127))
5497                             next_level= 0;
5498
5499                         if(next_i < last_non_zero)
5500                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
5501                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
5502                                      - length[UNI_AC_ENC_INDEX(run, 65)];
5503                         else
5504                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
5505                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
5506                                      - length[UNI_AC_ENC_INDEX(run, 65)];
5507                     }else{
5508                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
5509                         if(prev_level){
5510                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
5511                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
5512                         }
5513                     }
5514                 }
5515                 
5516                 score *= lambda;
5517
5518                 unquant_change= new_coeff - old_coeff;
5519                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
5520                 
5521                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
5522                 if(score<best_score){
5523                     best_score= score;
5524                     best_coeff= i;
5525                     best_change= change;
5526                     best_unquant_change= unquant_change;
5527                 }
5528             }
5529             if(level){
5530                 prev_level= level + 64;
5531                 if(prev_level&(~127))
5532                     prev_level= 0;
5533                 prev_run= run;
5534                 run=0;
5535             }else{
5536                 run++;
5537             }
5538         }
5539 #ifdef REFINE_STATS
5540 STOP_TIMER("iterative step")}
5541 #endif
5542
5543         if(best_change){
5544             int j= perm_scantable[ best_coeff ];
5545             
5546             block[j] += best_change;
5547             
5548             if(best_coeff > last_non_zero){
5549                 last_non_zero= best_coeff;
5550                 assert(block[j]);
5551 #ifdef REFINE_STATS
5552 after_last++;
5553 #endif
5554             }else{
5555 #ifdef REFINE_STATS
5556 if(block[j]){
5557     if(block[j] - best_change){
5558         if(ABS(block[j]) > ABS(block[j] - best_change)){
5559             raise++;
5560         }else{
5561             lower++;
5562         }
5563     }else{
5564         from_zero++;
5565     }
5566 }else{
5567     to_zero++;
5568 }
5569 #endif
5570                 for(; last_non_zero>=start_i; last_non_zero--){
5571                     if(block[perm_scantable[last_non_zero]])
5572                         break;
5573                 }
5574             }
5575 #ifdef REFINE_STATS
5576 count++;
5577 if(256*256*256*64 % count == 0){
5578     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
5579 }
5580 #endif
5581             run=0;
5582             rle_index=0;
5583             for(i=start_i; i<=last_non_zero; i++){
5584                 int j= perm_scantable[i];
5585                 const int level= block[j];
5586         
5587                  if(level){
5588                      run_tab[rle_index++]=run;
5589                      run=0;
5590                  }else{
5591                      run++;
5592                  }
5593             }
5594             
5595             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
5596         }else{
5597             break;
5598         }
5599     }
5600 #ifdef REFINE_STATS
5601 if(last_non_zero>0){
5602 STOP_TIMER("iterative search")
5603 }
5604 }
5605 #endif
5606
5607     return last_non_zero;
5608 }
5609
5610 static int dct_quantize_c(MpegEncContext *s, 
5611                         DCTELEM *block, int n,
5612                         int qscale, int *overflow)
5613 {
5614     int i, j, level, last_non_zero, q, start_i;
5615     const int *qmat;
5616     const uint8_t *scantable= s->intra_scantable.scantable;
5617     int bias;
5618     int max=0;
5619     unsigned int threshold1, threshold2;
5620
5621     s->dsp.fdct (block);
5622
5623     if(s->dct_error_sum)
5624         s->denoise_dct(s, block);
5625
5626     if (s->mb_intra) {
5627         if (!s->h263_aic) {
5628             if (n < 4)
5629                 q = s->y_dc_scale;
5630             else
5631                 q = s->c_dc_scale;
5632             q = q << 3;
5633         } else
5634             /* For AIC we skip quant/dequant of INTRADC */
5635             q = 1 << 3;
5636             
5637         /* note: block[0] is assumed to be positive */
5638         block[0] = (block[0] + (q >> 1)) / q;
5639         start_i = 1;
5640         last_non_zero = 0;
5641         qmat = s->q_intra_matrix[qscale];
5642         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
5643     } else {
5644         start_i = 0;
5645         last_non_zero = -1;
5646         qmat = s->q_inter_matrix[qscale];
5647         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
5648     }
5649     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5650     threshold2= (threshold1<<1);
5651     for(i=63;i>=start_i;i--) {
5652         j = scantable[i];
5653         level = block[j] * qmat[j];
5654
5655         if(((unsigned)(level+threshold1))>threshold2){
5656             last_non_zero = i;
5657             break;
5658         }else{
5659             block[j]=0;
5660         }
5661     }
5662     for(i=start_i; i<=last_non_zero; i++) {
5663         j = scantable[i];
5664         level = block[j] * qmat[j];
5665
5666 //        if(   bias+level >= (1<<QMAT_SHIFT)
5667 //           || bias-level >= (1<<QMAT_SHIFT)){
5668         if(((unsigned)(level+threshold1))>threshold2){
5669             if(level>0){
5670                 level= (bias + level)>>QMAT_SHIFT;
5671                 block[j]= level;
5672             }else{
5673                 level= (bias - level)>>QMAT_SHIFT;
5674                 block[j]= -level;
5675             }
5676             max |=level;
5677         }else{
5678             block[j]=0;
5679         }
5680     }
5681     *overflow= s->max_qcoeff < max; //overflow might have happend
5682     
5683     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
5684     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
5685         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
5686
5687     return last_non_zero;
5688 }
5689
5690 #endif //CONFIG_ENCODERS
5691
5692 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s, 
5693                                    DCTELEM *block, int n, int qscale)
5694 {
5695     int i, level, nCoeffs;
5696     const uint16_t *quant_matrix;
5697
5698     nCoeffs= s->block_last_index[n];
5699     
5700     if (n < 4) 
5701         block[0] = block[0] * s->y_dc_scale;
5702     else
5703         block[0] = block[0] * s->c_dc_scale;
5704     /* XXX: only mpeg1 */
5705     quant_matrix = s->intra_matrix;
5706     for(i=1;i<=nCoeffs;i++) {
5707         int j= s->intra_scantable.permutated[i];
5708         level = block[j];
5709         if (level) {
5710             if (level < 0) {
5711                 level = -level;
5712                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
5713                 level = (level - 1) | 1;
5714                 level = -level;
5715             } else {
5716                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
5717                 level = (level - 1) | 1;
5718             }
5719             block[j] = level;
5720         }
5721     }
5722 }
5723
5724 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s, 
5725                                    DCTELEM *block, int n, int qscale)
5726 {
5727     int i, level, nCoeffs;
5728     const uint16_t *quant_matrix;
5729
5730     nCoeffs= s->block_last_index[n];
5731     
5732     quant_matrix = s->inter_matrix;
5733     for(i=0; i<=nCoeffs; i++) {
5734         int j= s->intra_scantable.permutated[i];
5735         level = block[j];
5736         if (level) {
5737             if (level < 0) {
5738                 level = -level;
5739                 level = (((level << 1) + 1) * qscale *
5740                          ((int) (quant_matrix[j]))) >> 4;
5741                 level = (level - 1) | 1;
5742                 level = -level;
5743             } else {
5744                 level = (((level << 1) + 1) * qscale *
5745                          ((int) (quant_matrix[j]))) >> 4;
5746                 level = (level - 1) | 1;
5747             }
5748             block[j] = level;
5749         }
5750     }
5751 }
5752
5753 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s, 
5754                                    DCTELEM *block, int n, int qscale)
5755 {
5756     int i, level, nCoeffs;
5757     const uint16_t *quant_matrix;
5758
5759     if(s->alternate_scan) nCoeffs= 63;
5760     else nCoeffs= s->block_last_index[n];
5761     
5762     if (n < 4) 
5763         block[0] = block[0] * s->y_dc_scale;
5764     else
5765         block[0] = block[0] * s->c_dc_scale;
5766     quant_matrix = s->intra_matrix;
5767     for(i=1;i<=nCoeffs;i++) {
5768         int j= s->intra_scantable.permutated[i];
5769         level = block[j];
5770         if (level) {
5771             if (level < 0) {
5772                 level = -level;
5773                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
5774                 level = -level;
5775             } else {
5776                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
5777             }
5778             block[j] = level;
5779         }
5780     }
5781 }
5782
5783 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s, 
5784                                    DCTELEM *block, int n, int qscale)
5785 {
5786     int i, level, nCoeffs;
5787     const uint16_t *quant_matrix;
5788     int sum=-1;
5789
5790     if(s->alternate_scan) nCoeffs= 63;
5791     else nCoeffs= s->block_last_index[n];
5792     
5793     quant_matrix = s->inter_matrix;
5794     for(i=0; i<=nCoeffs; i++) {
5795         int j= s->intra_scantable.permutated[i];
5796         level = block[j];
5797         if (level) {
5798             if (level < 0) {
5799                 level = -level;
5800                 level = (((level << 1) + 1) * qscale *
5801                          ((int) (quant_matrix[j]))) >> 4;
5802                 level = -level;
5803             } else {
5804                 level = (((level << 1) + 1) * qscale *
5805                          ((int) (quant_matrix[j]))) >> 4;
5806             }
5807             block[j] = level;
5808             sum+=level;
5809         }
5810     }
5811     block[63]^=sum&1;
5812 }
5813
5814 static void dct_unquantize_h263_intra_c(MpegEncContext *s, 
5815                                   DCTELEM *block, int n, int qscale)
5816 {
5817     int i, level, qmul, qadd;
5818     int nCoeffs;
5819     
5820     assert(s->block_last_index[n]>=0);
5821     
5822     qmul = qscale << 1;
5823     
5824     if (!s->h263_aic) {
5825         if (n < 4) 
5826             block[0] = block[0] * s->y_dc_scale;
5827         else
5828             block[0] = block[0] * s->c_dc_scale;
5829         qadd = (qscale - 1) | 1;
5830     }else{
5831         qadd = 0;
5832     }
5833     if(s->ac_pred)
5834         nCoeffs=63;
5835     else
5836         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
5837
5838     for(i=1; i<=nCoeffs; i++) {
5839         level = block[i];
5840         if (level) {
5841             if (level < 0) {
5842                 level = level * qmul - qadd;
5843             } else {
5844                 level = level * qmul + qadd;
5845             }
5846             block[i] = level;
5847         }
5848     }
5849 }
5850
5851 static void dct_unquantize_h263_inter_c(MpegEncContext *s, 
5852                                   DCTELEM *block, int n, int qscale)
5853 {
5854     int i, level, qmul, qadd;
5855     int nCoeffs;
5856     
5857     assert(s->block_last_index[n]>=0);
5858     
5859     qadd = (qscale - 1) | 1;
5860     qmul = qscale << 1;
5861     
5862     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
5863
5864     for(i=0; i<=nCoeffs; i++) {
5865         level = block[i];
5866         if (level) {
5867             if (level < 0) {
5868                 level = level * qmul - qadd;
5869             } else {
5870                 level = level * qmul + qadd;
5871             }
5872             block[i] = level;
5873         }
5874     }
5875 }
5876
5877 static void dct_unquantize_h261_intra_c(MpegEncContext *s, 
5878                                   DCTELEM *block, int n, int qscale)
5879 {
5880     int i, level, even;
5881     int nCoeffs;
5882     
5883     assert(s->block_last_index[n]>=0);
5884     
5885     if (n < 4) 
5886         block[0] = block[0] * s->y_dc_scale;
5887     else
5888         block[0] = block[0] * s->c_dc_scale;
5889     even = (qscale & 1)^1;
5890     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
5891
5892     for(i=1; i<=nCoeffs; i++){
5893         level = block[i];
5894         if (level){
5895             if (level < 0){
5896                 level = qscale * ((level << 1) - 1) + even;
5897             }else{
5898                 level = qscale * ((level << 1) + 1) - even;
5899             }
5900         }
5901         block[i] = level;
5902     }
5903 }
5904
5905 static void dct_unquantize_h261_inter_c(MpegEncContext *s, 
5906                                   DCTELEM *block, int n, int qscale)
5907 {
5908     int i, level, even;
5909     int nCoeffs;
5910     
5911     assert(s->block_last_index[n]>=0);
5912
5913     even = (qscale & 1)^1;
5914     
5915     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
5916
5917     for(i=0; i<=nCoeffs; i++){
5918         level = block[i];
5919         if (level){
5920             if (level < 0){
5921                 level = qscale * ((level << 1) - 1) + even;
5922             }else{
5923                 level = qscale * ((level << 1) + 1) - even;
5924             }
5925         }
5926         block[i] = level;
5927     }
5928 }
5929
5930 static const AVOption mpeg4_options[] =
5931 {
5932     AVOPTION_CODEC_INT("bitrate", "desired video bitrate", bit_rate, 4, 240000000, 800000),
5933     AVOPTION_CODEC_INT("ratetol", "number of bits the bitstream is allowed to diverge from the reference"
5934                        "the reference can be CBR (for CBR pass1) or VBR (for pass2)",
5935                        bit_rate_tolerance, 4, 240000000, 8000),
5936     AVOPTION_CODEC_INT("qmin", "minimum quantizer", qmin, 1, 31, 2),
5937     AVOPTION_CODEC_INT("qmax", "maximum quantizer", qmax, 1, 31, 31),
5938     AVOPTION_CODEC_STRING("rc_eq", "rate control equation",
5939                           rc_eq, "tex^qComp,option1,options2", 0),
5940     AVOPTION_CODEC_INT("rc_minrate", "rate control minimum bitrate",
5941                        rc_min_rate, 4, 24000000, 0),
5942     AVOPTION_CODEC_INT("rc_maxrate", "rate control maximum bitrate",
5943                        rc_max_rate, 4, 24000000, 0),
5944     AVOPTION_CODEC_DOUBLE("rc_buf_aggresivity", "rate control buffer aggresivity",
5945                           rc_buffer_aggressivity, 4, 24000000, 0),
5946     AVOPTION_CODEC_DOUBLE("rc_initial_cplx", "initial complexity for pass1 ratecontrol",
5947                           rc_initial_cplx, 0., 9999999., 0),
5948     AVOPTION_CODEC_DOUBLE("i_quant_factor", "qscale factor between p and i frames",
5949                           i_quant_factor, 0., 0., 0),
5950     AVOPTION_CODEC_DOUBLE("i_quant_offset", "qscale offset between p and i frames",
5951                           i_quant_factor, -999999., 999999., 0),
5952     AVOPTION_CODEC_INT("dct_algo", "dct alghorithm",
5953                        dct_algo, 0, 5, 0), // fixme - "Auto,FastInt,Int,MMX,MLib,Altivec"
5954     AVOPTION_CODEC_DOUBLE("lumi_masking", "luminance masking",
5955                           lumi_masking, 0., 999999., 0),
5956     AVOPTION_CODEC_DOUBLE("temporal_cplx_masking", "temporary complexity masking",
5957                           temporal_cplx_masking, 0., 999999., 0),
5958     AVOPTION_CODEC_DOUBLE("spatial_cplx_masking", "spatial complexity masking",
5959                           spatial_cplx_masking, 0., 999999., 0),
5960     AVOPTION_CODEC_DOUBLE("p_masking", "p block masking",
5961                           p_masking, 0., 999999., 0),
5962     AVOPTION_CODEC_DOUBLE("dark_masking", "darkness masking",
5963                           dark_masking, 0., 999999., 0),
5964     AVOPTION_CODEC_INT("idct_algo", "idct alghorithm",
5965                        idct_algo, 0, 8, 0), // fixme - "Auto,Int,Simple,SimpleMMX,LibMPEG2MMX,PS2,MLib,ARM,Altivec"
5966
5967     AVOPTION_CODEC_INT("mb_qmin", "minimum MB quantizer",
5968                        mb_qmin, 0, 8, 0),
5969     AVOPTION_CODEC_INT("mb_qmax", "maximum MB quantizer",
5970                        mb_qmin, 0, 8, 0),
5971
5972     AVOPTION_CODEC_INT("me_cmp", "ME compare function",
5973                        me_cmp, 0, 24000000, 0),
5974     AVOPTION_CODEC_INT("me_sub_cmp", "subpixel ME compare function",
5975                        me_sub_cmp, 0, 24000000, 0),
5976
5977
5978     AVOPTION_CODEC_INT("dia_size", "ME diamond size & shape",
5979                        dia_size, 0, 24000000, 0),
5980     AVOPTION_CODEC_INT("last_predictor_count", "amount of previous MV predictors",
5981                        last_predictor_count, 0, 24000000, 0),
5982
5983     AVOPTION_CODEC_INT("pre_me", "pre pass for ME",
5984                        pre_me, 0, 24000000, 0),
5985     AVOPTION_CODEC_INT("me_pre_cmp", "ME pre pass compare function",
5986                        me_pre_cmp, 0, 24000000, 0),
5987
5988     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
5989                        me_range, 0, 24000000, 0),
5990     AVOPTION_CODEC_INT("pre_dia_size", "ME pre pass diamod size & shape",
5991                        pre_dia_size, 0, 24000000, 0),
5992     AVOPTION_CODEC_INT("me_subpel_quality", "subpel ME quality",
5993                        me_subpel_quality, 0, 24000000, 0),
5994     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
5995                        me_range, 0, 24000000, 0),
5996     AVOPTION_CODEC_FLAG("psnr", "calculate PSNR of compressed frames",
5997                         flags, CODEC_FLAG_PSNR, 0),
5998     AVOPTION_CODEC_RCOVERRIDE("rc_override", "ratecontrol override (=startframe,endframe,qscale,quality_factor)",
5999                               rc_override),
6000     AVOPTION_SUB(avoptions_common),
6001     AVOPTION_END()
6002 };
6003
6004 #ifdef CONFIG_ENCODERS
6005 #ifdef CONFIG_RISKY
6006 AVCodec h263_encoder = {
6007     "h263",
6008     CODEC_TYPE_VIDEO,
6009     CODEC_ID_H263,
6010     sizeof(MpegEncContext),
6011     MPV_encode_init,
6012     MPV_encode_picture,
6013     MPV_encode_end,
6014 };
6015
6016 AVCodec h263p_encoder = {
6017     "h263p",
6018     CODEC_TYPE_VIDEO,
6019     CODEC_ID_H263P,
6020     sizeof(MpegEncContext),
6021     MPV_encode_init,
6022     MPV_encode_picture,
6023     MPV_encode_end,
6024 };
6025
6026 AVCodec flv_encoder = {
6027     "flv",
6028     CODEC_TYPE_VIDEO,
6029     CODEC_ID_FLV1,
6030     sizeof(MpegEncContext),
6031     MPV_encode_init,
6032     MPV_encode_picture,
6033     MPV_encode_end,
6034 };
6035
6036 AVCodec rv10_encoder = {
6037     "rv10",
6038     CODEC_TYPE_VIDEO,
6039     CODEC_ID_RV10,
6040     sizeof(MpegEncContext),
6041     MPV_encode_init,
6042     MPV_encode_picture,
6043     MPV_encode_end,
6044 };
6045
6046 AVCodec mpeg4_encoder = {
6047     "mpeg4",
6048     CODEC_TYPE_VIDEO,
6049     CODEC_ID_MPEG4,
6050     sizeof(MpegEncContext),
6051     MPV_encode_init,
6052     MPV_encode_picture,
6053     MPV_encode_end,
6054     .options = mpeg4_options,
6055     .capabilities= CODEC_CAP_DELAY,
6056 };
6057
6058 AVCodec msmpeg4v1_encoder = {
6059     "msmpeg4v1",
6060     CODEC_TYPE_VIDEO,
6061     CODEC_ID_MSMPEG4V1,
6062     sizeof(MpegEncContext),
6063     MPV_encode_init,
6064     MPV_encode_picture,
6065     MPV_encode_end,
6066     .options = mpeg4_options,
6067 };
6068
6069 AVCodec msmpeg4v2_encoder = {
6070     "msmpeg4v2",
6071     CODEC_TYPE_VIDEO,
6072     CODEC_ID_MSMPEG4V2,
6073     sizeof(MpegEncContext),
6074     MPV_encode_init,
6075     MPV_encode_picture,
6076     MPV_encode_end,
6077     .options = mpeg4_options,
6078 };
6079
6080 AVCodec msmpeg4v3_encoder = {
6081     "msmpeg4",
6082     CODEC_TYPE_VIDEO,
6083     CODEC_ID_MSMPEG4V3,
6084     sizeof(MpegEncContext),
6085     MPV_encode_init,
6086     MPV_encode_picture,
6087     MPV_encode_end,
6088     .options = mpeg4_options,
6089 };
6090
6091 AVCodec wmv1_encoder = {
6092     "wmv1",
6093     CODEC_TYPE_VIDEO,
6094     CODEC_ID_WMV1,
6095     sizeof(MpegEncContext),
6096     MPV_encode_init,
6097     MPV_encode_picture,
6098     MPV_encode_end,
6099     .options = mpeg4_options,
6100 };
6101
6102 #endif
6103
6104 AVCodec mjpeg_encoder = {
6105     "mjpeg",
6106     CODEC_TYPE_VIDEO,
6107     CODEC_ID_MJPEG,
6108     sizeof(MpegEncContext),
6109     MPV_encode_init,
6110     MPV_encode_picture,
6111     MPV_encode_end,
6112 };
6113
6114 #endif //CONFIG_ENCODERS