]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
ASV1 codec
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  *
19  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
20  */
21  
22 /**
23  * @file mpegvideo.c
24  * The simplest mpeg encoder (well, it was the simplest!).
25  */ 
26  
27 #include <ctype.h>
28 #include <limits.h>
29 #include "avcodec.h"
30 #include "dsputil.h"
31 #include "mpegvideo.h"
32
33 #ifdef USE_FASTMEMCPY
34 #include "fastmemcpy.h"
35 #endif
36
37 //#undef NDEBUG
38 //#include <assert.h>
39
40 #ifdef CONFIG_ENCODERS
41 static void encode_picture(MpegEncContext *s, int picture_number);
42 #endif //CONFIG_ENCODERS
43 static void dct_unquantize_mpeg1_c(MpegEncContext *s, 
44                                    DCTELEM *block, int n, int qscale);
45 static void dct_unquantize_mpeg2_c(MpegEncContext *s,
46                                    DCTELEM *block, int n, int qscale);
47 static void dct_unquantize_h263_c(MpegEncContext *s, 
48                                   DCTELEM *block, int n, int qscale);
49 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
50 #ifdef CONFIG_ENCODERS
51 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
52 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
53 #endif //CONFIG_ENCODERS
54
55 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
56
57
58 /* enable all paranoid tests for rounding, overflows, etc... */
59 //#define PARANOID
60
61 //#define DEBUG
62
63
64 /* for jpeg fast DCT */
65 #define CONST_BITS 14
66
67 static const uint16_t aanscales[64] = {
68     /* precomputed values scaled up by 14 bits */
69     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
70     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
71     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
72     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
73     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
74     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
75     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
76     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
77 };
78
79 static const uint8_t h263_chroma_roundtab[16] = {
80 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
81     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
82 };
83
84 #ifdef CONFIG_ENCODERS
85 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
86 static uint8_t default_fcode_tab[MAX_MV*2+1];
87
88 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
89
90 static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64],
91                            const uint16_t *quant_matrix, int bias, int qmin, int qmax)
92 {
93     int qscale;
94
95     for(qscale=qmin; qscale<=qmax; qscale++){
96         int i;
97         if (s->dsp.fdct == ff_jpeg_fdct_islow) {
98             for(i=0;i<64;i++) {
99                 const int j= s->dsp.idct_permutation[i];
100                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
101                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
102                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
103                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
104                 
105                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / 
106                                 (qscale * quant_matrix[j]));
107             }
108         } else if (s->dsp.fdct == fdct_ifast) {
109             for(i=0;i<64;i++) {
110                 const int j= s->dsp.idct_permutation[i];
111                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
112                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
113                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
114                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
115                 
116                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) / 
117                                 (aanscales[i] * qscale * quant_matrix[j]));
118             }
119         } else {
120             for(i=0;i<64;i++) {
121                 const int j= s->dsp.idct_permutation[i];
122                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
123                    So 16           <= qscale * quant_matrix[i]             <= 7905
124                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
125                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
126                 */
127                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
128 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
129                 qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
130
131                 if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1;
132                 qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]);
133             }
134         }
135     }
136 }
137 #endif //CONFIG_ENCODERS
138
139 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
140     int i;
141     int end;
142     
143     st->scantable= src_scantable;
144
145     for(i=0; i<64; i++){
146         int j;
147         j = src_scantable[i];
148         st->permutated[i] = permutation[j];
149 #ifdef ARCH_POWERPC
150         st->inverse[j] = i;
151 #endif
152     }
153     
154     end=-1;
155     for(i=0; i<64; i++){
156         int j;
157         j = st->permutated[i];
158         if(j>end) end=j;
159         st->raster_end[i]= end;
160     }
161 }
162
163 /* init common dct for both encoder and decoder */
164 int DCT_common_init(MpegEncContext *s)
165 {
166     s->dct_unquantize_h263 = dct_unquantize_h263_c;
167     s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c;
168     s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c;
169
170 #ifdef CONFIG_ENCODERS
171     s->dct_quantize= dct_quantize_c;
172 #endif
173         
174 #ifdef HAVE_MMX
175     MPV_common_init_mmx(s);
176 #endif
177 #ifdef ARCH_ALPHA
178     MPV_common_init_axp(s);
179 #endif
180 #ifdef HAVE_MLIB
181     MPV_common_init_mlib(s);
182 #endif
183 #ifdef HAVE_MMI
184     MPV_common_init_mmi(s);
185 #endif
186 #ifdef ARCH_ARMV4L
187     MPV_common_init_armv4l(s);
188 #endif
189 #ifdef ARCH_POWERPC
190     MPV_common_init_ppc(s);
191 #endif
192
193 #ifdef CONFIG_ENCODERS
194     s->fast_dct_quantize= s->dct_quantize;
195
196     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
197         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
198     }
199
200 #endif //CONFIG_ENCODERS
201
202     /* load & permutate scantables
203        note: only wmv uses differnt ones 
204     */
205     ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
206     ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
207     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
208     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
209
210     s->picture_structure= PICT_FRAME;
211     
212     return 0;
213 }
214
215 /**
216  * allocates a Picture
217  * The pixels are allocated/set by calling get_buffer() if shared=0
218  */
219 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
220     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
221     const int mb_array_size= s->mb_stride*s->mb_height;
222     int i;
223     
224     if(shared){
225         assert(pic->data[0]);
226         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
227         pic->type= FF_BUFFER_TYPE_SHARED;
228     }else{
229         int r;
230         
231         assert(!pic->data[0]);
232         
233         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
234         
235         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
236             fprintf(stderr, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
237             return -1;
238         }
239
240         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
241             fprintf(stderr, "get_buffer() failed (stride changed)\n");
242             return -1;
243         }
244
245         if(pic->linesize[1] != pic->linesize[2]){
246             fprintf(stderr, "get_buffer() failed (uv stride missmatch)\n");
247             return -1;
248         }
249
250         s->linesize  = pic->linesize[0];
251         s->uvlinesize= pic->linesize[1];
252     }
253     
254     if(pic->qscale_table==NULL){
255         if (s->encoding) {        
256             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
257             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
258             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
259             CHECKED_ALLOCZ(pic->mb_cmp_score, mb_array_size * sizeof(int32_t))
260         }
261
262         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
263         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
264         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(int))
265         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
266         if(s->out_format == FMT_H264){
267             for(i=0; i<2; i++){
268                 CHECKED_ALLOCZ(pic->motion_val[i], 2 * 16 * s->mb_num * sizeof(uint16_t))
269                 CHECKED_ALLOCZ(pic->ref_index[i] , 4 * s->mb_num * sizeof(uint8_t))
270             }
271         }
272         pic->qstride= s->mb_stride;
273     }
274
275     //it might be nicer if the application would keep track of these but it would require a API change
276     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
277     s->prev_pict_types[0]= s->pict_type;
278     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
279         pic->age= INT_MAX; // skiped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
280     
281     return 0;
282 fail: //for the CHECKED_ALLOCZ macro
283     return -1;
284 }
285
286 /**
287  * deallocates a picture
288  */
289 static void free_picture(MpegEncContext *s, Picture *pic){
290     int i;
291
292     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
293         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
294     }
295
296     av_freep(&pic->mb_var);
297     av_freep(&pic->mc_mb_var);
298     av_freep(&pic->mb_mean);
299     av_freep(&pic->mb_cmp_score);
300     av_freep(&pic->mbskip_table);
301     av_freep(&pic->qscale_table);
302     av_freep(&pic->mb_type_base);
303     pic->mb_type= NULL;
304     for(i=0; i<2; i++){
305         av_freep(&pic->motion_val[i]);
306         av_freep(&pic->ref_index[i]);
307     }
308     
309     if(pic->type == FF_BUFFER_TYPE_SHARED){
310         for(i=0; i<4; i++){
311             pic->base[i]=
312             pic->data[i]= NULL;
313         }
314         pic->type= 0;        
315     }
316 }
317
318 /* init common structure for both encoder and decoder */
319 int MPV_common_init(MpegEncContext *s)
320 {
321     int y_size, c_size, yc_size, i, mb_array_size, x, y;
322
323     dsputil_init(&s->dsp, s->avctx);
324     DCT_common_init(s);
325
326     s->flags= s->avctx->flags;
327
328     s->mb_width  = (s->width  + 15) / 16;
329     s->mb_height = (s->height + 15) / 16;
330     s->mb_stride = s->mb_width + 1;
331     mb_array_size= s->mb_height * s->mb_stride;
332
333     /* set default edge pos, will be overriden in decode_header if needed */
334     s->h_edge_pos= s->mb_width*16;
335     s->v_edge_pos= s->mb_height*16;
336
337     s->mb_num = s->mb_width * s->mb_height;
338     
339     s->block_wrap[0]=
340     s->block_wrap[1]=
341     s->block_wrap[2]=
342     s->block_wrap[3]= s->mb_width*2 + 2;
343     s->block_wrap[4]=
344     s->block_wrap[5]= s->mb_width + 2;
345
346     y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
347     c_size = (s->mb_width + 2) * (s->mb_height + 2);
348     yc_size = y_size + 2 * c_size;
349
350     /* convert fourcc to upper case */
351     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)          
352                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
353                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16) 
354                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
355
356     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
357     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
358
359     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
360
361     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
362     for(y=0; y<s->mb_height; y++){
363         for(x=0; x<s->mb_width; x++){
364             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
365         }
366     }
367     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
368     
369     if (s->encoding) {
370         int mv_table_size= s->mb_stride * (s->mb_height+2) + 1;
371
372         /* Allocate MV tables */
373         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
374         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
375         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
376         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
377         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
378         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
379         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
380         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
381         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
382         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
383         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
384         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
385
386         //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
387         CHECKED_ALLOCZ(s->me.scratchpad,  s->width*2*16*3*sizeof(uint8_t)) 
388         
389         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
390         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
391
392         if(s->codec_id==CODEC_ID_MPEG4){
393             CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE);
394             CHECKED_ALLOCZ(   s->pb2_buffer, PB_BUFFER_SIZE);
395         }
396         
397         if(s->msmpeg4_version){
398             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
399         }
400         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
401
402         /* Allocate MB type table */
403         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint8_t)) //needed for encoding
404     }
405         
406     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
407     
408     if (s->out_format == FMT_H263 || s->encoding) {
409         int size;
410
411         /* MV prediction */
412         size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
413         CHECKED_ALLOCZ(s->motion_val, size * 2 * sizeof(int16_t));
414     }
415
416     if(s->codec_id==CODEC_ID_MPEG4){
417         /* interlaced direct mode decoding tables */
418         CHECKED_ALLOCZ(s->field_mv_table, mb_array_size*2*2 * sizeof(int16_t))
419         CHECKED_ALLOCZ(s->field_select_table, mb_array_size*2* sizeof(int8_t))
420     }
421     if (s->out_format == FMT_H263) {
422         /* ac values */
423         CHECKED_ALLOCZ(s->ac_val[0], yc_size * sizeof(int16_t) * 16);
424         s->ac_val[1] = s->ac_val[0] + y_size;
425         s->ac_val[2] = s->ac_val[1] + c_size;
426         
427         /* cbp values */
428         CHECKED_ALLOCZ(s->coded_block, y_size);
429         
430         /* divx501 bitstream reorder buffer */
431         CHECKED_ALLOCZ(s->bitstream_buffer, BITSTREAM_BUFFER_SIZE);
432
433         /* cbp, ac_pred, pred_dir */
434         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
435         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
436     }
437     
438     if (s->h263_pred || s->h263_plus || !s->encoding) {
439         /* dc values */
440         //MN: we need these for error resilience of intra-frames
441         CHECKED_ALLOCZ(s->dc_val[0], yc_size * sizeof(int16_t));
442         s->dc_val[1] = s->dc_val[0] + y_size;
443         s->dc_val[2] = s->dc_val[1] + c_size;
444         for(i=0;i<yc_size;i++)
445             s->dc_val[0][i] = 1024;
446     }
447
448     /* which mb is a intra block */
449     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
450     memset(s->mbintra_table, 1, mb_array_size);
451     
452     /* default structure is frame */
453     s->picture_structure = PICT_FRAME;
454     
455     /* init macroblock skip table */
456     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
457     //Note the +1 is for a quicker mpeg4 slice_end detection
458     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
459     
460     s->block= s->blocks[0];
461
462     s->parse_context.state= -1;
463
464     s->context_initialized = 1;
465     return 0;
466  fail:
467     MPV_common_end(s);
468     return -1;
469 }
470
471
472 //extern int sads;
473
474 /* init common structure for both encoder and decoder */
475 void MPV_common_end(MpegEncContext *s)
476 {
477     int i;
478
479     av_freep(&s->mb_type);
480     av_freep(&s->p_mv_table_base);
481     av_freep(&s->b_forw_mv_table_base);
482     av_freep(&s->b_back_mv_table_base);
483     av_freep(&s->b_bidir_forw_mv_table_base);
484     av_freep(&s->b_bidir_back_mv_table_base);
485     av_freep(&s->b_direct_mv_table_base);
486     s->p_mv_table= NULL;
487     s->b_forw_mv_table= NULL;
488     s->b_back_mv_table= NULL;
489     s->b_bidir_forw_mv_table= NULL;
490     s->b_bidir_back_mv_table= NULL;
491     s->b_direct_mv_table= NULL;
492     
493     av_freep(&s->motion_val);
494     av_freep(&s->dc_val[0]);
495     av_freep(&s->ac_val[0]);
496     av_freep(&s->coded_block);
497     av_freep(&s->mbintra_table);
498     av_freep(&s->cbp_table);
499     av_freep(&s->pred_dir_table);
500     av_freep(&s->me.scratchpad);
501     av_freep(&s->me.map);
502     av_freep(&s->me.score_map);
503     
504     av_freep(&s->mbskip_table);
505     av_freep(&s->prev_pict_types);
506     av_freep(&s->bitstream_buffer);
507     av_freep(&s->tex_pb_buffer);
508     av_freep(&s->pb2_buffer);
509     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
510     av_freep(&s->field_mv_table);
511     av_freep(&s->field_select_table);
512     av_freep(&s->avctx->stats_out);
513     av_freep(&s->ac_stats);
514     av_freep(&s->error_status_table);
515     av_freep(&s->mb_index2xy);
516
517     for(i=0; i<MAX_PICTURE_COUNT; i++){
518         free_picture(s, &s->picture[i]);
519     }
520     avcodec_default_free_buffers(s->avctx);
521     s->context_initialized = 0;
522 }
523
524 #ifdef CONFIG_ENCODERS
525
526 /* init video encoder */
527 int MPV_encode_init(AVCodecContext *avctx)
528 {
529     MpegEncContext *s = avctx->priv_data;
530     int i;
531
532     avctx->pix_fmt = PIX_FMT_YUV420P;
533
534     s->bit_rate = avctx->bit_rate;
535     s->bit_rate_tolerance = avctx->bit_rate_tolerance;
536     s->width = avctx->width;
537     s->height = avctx->height;
538     if(avctx->gop_size > 600){
539         fprintf(stderr, "Warning keyframe interval too large! reducing it ...\n");
540         avctx->gop_size=600;
541     }
542     s->gop_size = avctx->gop_size;
543     s->rtp_mode = avctx->rtp_mode;
544     s->rtp_payload_size = avctx->rtp_payload_size;
545     if (avctx->rtp_callback)
546         s->rtp_callback = avctx->rtp_callback;
547     s->max_qdiff= avctx->max_qdiff;
548     s->qcompress= avctx->qcompress;
549     s->qblur= avctx->qblur;
550     s->avctx = avctx;
551     s->flags= avctx->flags;
552     s->max_b_frames= avctx->max_b_frames;
553     s->b_frame_strategy= avctx->b_frame_strategy;
554     s->codec_id= avctx->codec->id;
555     s->luma_elim_threshold  = avctx->luma_elim_threshold;
556     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
557     s->strict_std_compliance= avctx->strict_std_compliance;
558     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
559     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
560     s->mpeg_quant= avctx->mpeg_quant;
561
562     if (s->gop_size <= 1) {
563         s->intra_only = 1;
564         s->gop_size = 12;
565     } else {
566         s->intra_only = 0;
567     }
568
569     s->me_method = avctx->me_method;
570
571     /* Fixed QSCALE */
572     s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE);
573     
574     s->adaptive_quant= (   s->avctx->lumi_masking
575                         || s->avctx->dark_masking
576                         || s->avctx->temporal_cplx_masking 
577                         || s->avctx->spatial_cplx_masking
578                         || s->avctx->p_masking)
579                        && !s->fixed_qscale;
580     
581     s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
582
583     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4){
584         fprintf(stderr, "4MV not supporetd by codec\n");
585         return -1;
586     }
587     
588     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
589         fprintf(stderr, "qpel not supporetd by codec\n");
590         return -1;
591     }
592
593     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
594         fprintf(stderr, "data partitioning not supporetd by codec\n");
595         return -1;
596     }
597     
598     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO){
599         fprintf(stderr, "b frames not supporetd by codec\n");
600         return -1;
601     }
602     
603     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
604         fprintf(stderr, "mpeg2 style quantization not supporetd by codec\n");
605         return -1;
606     }
607         
608     if(s->codec_id==CODEC_ID_MJPEG){
609         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
610         s->inter_quant_bias= 0;
611     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO){
612         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
613         s->inter_quant_bias= 0;
614     }else{
615         s->intra_quant_bias=0;
616         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
617     }
618     
619     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
620         s->intra_quant_bias= avctx->intra_quant_bias;
621     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
622         s->inter_quant_bias= avctx->inter_quant_bias;
623     
624     switch(avctx->codec->id) {
625     case CODEC_ID_MPEG1VIDEO:
626         s->out_format = FMT_MPEG1;
627         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
628         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
629         break;
630     case CODEC_ID_MJPEG:
631         s->out_format = FMT_MJPEG;
632         s->intra_only = 1; /* force intra only for jpeg */
633         s->mjpeg_write_tables = 1; /* write all tables */
634         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
635         s->mjpeg_vsample[0] = 2; /* set up default sampling factors */
636         s->mjpeg_vsample[1] = 1; /* the only currently supported values */
637         s->mjpeg_vsample[2] = 1; 
638         s->mjpeg_hsample[0] = 2;
639         s->mjpeg_hsample[1] = 1; 
640         s->mjpeg_hsample[2] = 1; 
641         if (mjpeg_init(s) < 0)
642             return -1;
643         avctx->delay=0;
644         s->low_delay=1;
645         break;
646 #ifdef CONFIG_RISKY
647     case CODEC_ID_H263:
648         if (h263_get_picture_format(s->width, s->height) == 7) {
649             printf("Input picture size isn't suitable for h263 codec! try h263+\n");
650             return -1;
651         }
652         s->out_format = FMT_H263;
653         avctx->delay=0;
654         s->low_delay=1;
655         break;
656     case CODEC_ID_H263P:
657         s->out_format = FMT_H263;
658         s->h263_plus = 1;
659         /* Fx */
660         s->unrestricted_mv=(avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
661         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
662         /* /Fx */
663         /* These are just to be sure */
664         s->umvplus = 1;
665         avctx->delay=0;
666         s->low_delay=1;
667         break;
668     case CODEC_ID_RV10:
669         s->out_format = FMT_H263;
670         s->h263_rv10 = 1;
671         avctx->delay=0;
672         s->low_delay=1;
673         break;
674     case CODEC_ID_MPEG4:
675         s->out_format = FMT_H263;
676         s->h263_pred = 1;
677         s->unrestricted_mv = 1;
678         s->low_delay= s->max_b_frames ? 0 : 1;
679         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
680         break;
681     case CODEC_ID_MSMPEG4V1:
682         s->out_format = FMT_H263;
683         s->h263_msmpeg4 = 1;
684         s->h263_pred = 1;
685         s->unrestricted_mv = 1;
686         s->msmpeg4_version= 1;
687         avctx->delay=0;
688         s->low_delay=1;
689         break;
690     case CODEC_ID_MSMPEG4V2:
691         s->out_format = FMT_H263;
692         s->h263_msmpeg4 = 1;
693         s->h263_pred = 1;
694         s->unrestricted_mv = 1;
695         s->msmpeg4_version= 2;
696         avctx->delay=0;
697         s->low_delay=1;
698         break;
699     case CODEC_ID_MSMPEG4V3:
700         s->out_format = FMT_H263;
701         s->h263_msmpeg4 = 1;
702         s->h263_pred = 1;
703         s->unrestricted_mv = 1;
704         s->msmpeg4_version= 3;
705         s->flipflop_rounding=1;
706         avctx->delay=0;
707         s->low_delay=1;
708         break;
709     case CODEC_ID_WMV1:
710         s->out_format = FMT_H263;
711         s->h263_msmpeg4 = 1;
712         s->h263_pred = 1;
713         s->unrestricted_mv = 1;
714         s->msmpeg4_version= 4;
715         s->flipflop_rounding=1;
716         avctx->delay=0;
717         s->low_delay=1;
718         break;
719     case CODEC_ID_WMV2:
720         s->out_format = FMT_H263;
721         s->h263_msmpeg4 = 1;
722         s->h263_pred = 1;
723         s->unrestricted_mv = 1;
724         s->msmpeg4_version= 5;
725         s->flipflop_rounding=1;
726         avctx->delay=0;
727         s->low_delay=1;
728         break;
729 #endif
730     default:
731         return -1;
732     }
733     
734     { /* set up some save defaults, some codecs might override them later */
735         static int done=0;
736         if(!done){
737             int i;
738             done=1;
739
740             default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
741             memset(default_mv_penalty, 0, sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1));
742             memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
743
744             for(i=-16; i<16; i++){
745                 default_fcode_tab[i + MAX_MV]= 1;
746             }
747         }
748     }
749     s->me.mv_penalty= default_mv_penalty;
750     s->fcode_tab= default_fcode_tab;
751     s->y_dc_scale_table=
752     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
753  
754     /* dont use mv_penalty table for crap MV as it would be confused */
755     //FIXME remove after fixing / removing old ME
756     if (s->me_method < ME_EPZS) s->me.mv_penalty = default_mv_penalty;
757
758     s->encoding = 1;
759
760     /* init */
761     if (MPV_common_init(s) < 0)
762         return -1;
763     
764     ff_init_me(s);
765
766 #ifdef CONFIG_ENCODERS
767 #ifdef CONFIG_RISKY
768     if (s->out_format == FMT_H263)
769         h263_encode_init(s);
770     if(s->msmpeg4_version)
771         ff_msmpeg4_encode_init(s);
772 #endif
773     if (s->out_format == FMT_MPEG1)
774         ff_mpeg1_encode_init(s);
775 #endif
776
777     /* init default q matrix */
778     for(i=0;i<64;i++) {
779         int j= s->dsp.idct_permutation[i];
780 #ifdef CONFIG_RISKY
781         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
782             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
783             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
784         }else if(s->out_format == FMT_H263){
785             s->intra_matrix[j] =
786             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
787         }else
788 #endif
789         { /* mpeg1 */
790             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
791             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
792         }
793     }
794
795     /* precompute matrix */
796     /* for mjpeg, we do include qscale in the matrix */
797     if (s->out_format != FMT_MJPEG) {
798         convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias, 
799                        s->intra_matrix, s->intra_quant_bias, 1, 31);
800         convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16, s->q_inter_matrix16_bias, 
801                        s->inter_matrix, s->inter_quant_bias, 1, 31);
802     }
803
804     if(ff_rate_control_init(s) < 0)
805         return -1;
806
807     s->picture_number = 0;
808     s->picture_in_gop_number = 0;
809     s->fake_picture_number = 0;
810     /* motion detector init */
811     s->f_code = 1;
812     s->b_code = 1;
813
814     return 0;
815 }
816
817 int MPV_encode_end(AVCodecContext *avctx)
818 {
819     MpegEncContext *s = avctx->priv_data;
820
821 #ifdef STATS
822     print_stats();
823 #endif
824
825     ff_rate_control_uninit(s);
826
827     MPV_common_end(s);
828     if (s->out_format == FMT_MJPEG)
829         mjpeg_close(s);
830       
831     return 0;
832 }
833
834 #endif //CONFIG_ENCODERS
835
836 void init_rl(RLTable *rl)
837 {
838     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
839     uint8_t index_run[MAX_RUN+1];
840     int last, run, level, start, end, i;
841
842     /* compute max_level[], max_run[] and index_run[] */
843     for(last=0;last<2;last++) {
844         if (last == 0) {
845             start = 0;
846             end = rl->last;
847         } else {
848             start = rl->last;
849             end = rl->n;
850         }
851
852         memset(max_level, 0, MAX_RUN + 1);
853         memset(max_run, 0, MAX_LEVEL + 1);
854         memset(index_run, rl->n, MAX_RUN + 1);
855         for(i=start;i<end;i++) {
856             run = rl->table_run[i];
857             level = rl->table_level[i];
858             if (index_run[run] == rl->n)
859                 index_run[run] = i;
860             if (level > max_level[run])
861                 max_level[run] = level;
862             if (run > max_run[level])
863                 max_run[level] = run;
864         }
865         rl->max_level[last] = av_malloc(MAX_RUN + 1);
866         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
867         rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
868         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
869         rl->index_run[last] = av_malloc(MAX_RUN + 1);
870         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
871     }
872 }
873
874 /* draw the edges of width 'w' of an image of size width, height */
875 //FIXME check that this is ok for mpeg4 interlaced
876 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
877 {
878     uint8_t *ptr, *last_line;
879     int i;
880
881     last_line = buf + (height - 1) * wrap;
882     for(i=0;i<w;i++) {
883         /* top and bottom */
884         memcpy(buf - (i + 1) * wrap, buf, width);
885         memcpy(last_line + (i + 1) * wrap, last_line, width);
886     }
887     /* left and right */
888     ptr = buf;
889     for(i=0;i<height;i++) {
890         memset(ptr - w, ptr[0], w);
891         memset(ptr + width, ptr[width-1], w);
892         ptr += wrap;
893     }
894     /* corners */
895     for(i=0;i<w;i++) {
896         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
897         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
898         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
899         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
900     }
901 }
902
903 static int find_unused_picture(MpegEncContext *s, int shared){
904     int i;
905     
906     if(shared){
907         for(i=0; i<MAX_PICTURE_COUNT; i++){
908             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) break;
909         }
910     }else{
911         for(i=0; i<MAX_PICTURE_COUNT; i++){
912             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) break; //FIXME
913         }
914         for(i=0; i<MAX_PICTURE_COUNT; i++){
915             if(s->picture[i].data[0]==NULL) break;
916         }
917     }
918
919     assert(i<MAX_PICTURE_COUNT);
920     return i;
921 }
922
923 /* generic function for encode/decode called before a frame is coded/decoded */
924 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
925 {
926     int i;
927     AVFrame *pic;
928
929     s->mb_skiped = 0;
930
931     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
932
933     /* mark&release old frames */
934     if (s->pict_type != B_TYPE && s->last_picture_ptr) {
935         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
936
937         /* release forgotten pictures */
938         /* if(mpeg124/h263) */
939         if(!s->encoding){
940             for(i=0; i<MAX_PICTURE_COUNT; i++){
941                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
942                     fprintf(stderr, "releasing zombie picture\n");
943                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);                
944                 }
945             }
946         }
947     }
948 alloc:
949     if(!s->encoding){
950         /* release non refernce frames */
951         for(i=0; i<MAX_PICTURE_COUNT; i++){
952             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
953                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
954             }
955         }
956
957         i= find_unused_picture(s, 0);
958     
959         pic= (AVFrame*)&s->picture[i];
960         pic->reference= s->pict_type != B_TYPE ? 3 : 0;
961
962         if(s->current_picture_ptr)
963             pic->coded_picture_number= s->current_picture_ptr->coded_picture_number+1;
964         
965         alloc_picture(s, (Picture*)pic, 0);
966
967         s->current_picture_ptr= &s->picture[i];
968     }
969
970     s->current_picture_ptr->pict_type= s->pict_type;
971     s->current_picture_ptr->quality= s->qscale;
972     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
973
974     s->current_picture= *s->current_picture_ptr;
975   
976   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
977     if (s->pict_type != B_TYPE) {
978         s->last_picture_ptr= s->next_picture_ptr;
979         s->next_picture_ptr= s->current_picture_ptr;
980     }
981     
982     if(s->last_picture_ptr) s->last_picture= *s->last_picture_ptr;
983     if(s->next_picture_ptr) s->next_picture= *s->next_picture_ptr;
984     if(s->new_picture_ptr ) s->new_picture = *s->new_picture_ptr;
985     
986     if(s->picture_structure!=PICT_FRAME){
987         int i;
988         for(i=0; i<4; i++){
989             if(s->picture_structure == PICT_BOTTOM_FIELD){
990                  s->current_picture.data[i] += s->current_picture.linesize[i];
991             } 
992             s->current_picture.linesize[i] *= 2;
993             s->last_picture.linesize[i] *=2;
994             s->next_picture.linesize[i] *=2;
995         }
996     }
997     
998     if(s->pict_type != I_TYPE && s->last_picture_ptr==NULL){
999         fprintf(stderr, "warning: first frame is no keyframe\n");
1000         assert(s->pict_type != B_TYPE); //these should have been dropped if we dont have a reference
1001         goto alloc;
1002     }
1003   }
1004    
1005     s->hurry_up= s->avctx->hurry_up;
1006     s->error_resilience= avctx->error_resilience;
1007
1008     /* set dequantizer, we cant do it during init as it might change for mpeg4
1009        and we cant do it in the header decode as init isnt called for mpeg4 there yet */
1010     if(s->out_format == FMT_H263){
1011         if(s->mpeg_quant)
1012             s->dct_unquantize = s->dct_unquantize_mpeg2;
1013         else
1014             s->dct_unquantize = s->dct_unquantize_h263;
1015     }else 
1016         s->dct_unquantize = s->dct_unquantize_mpeg1;
1017
1018     return 0;
1019 }
1020
1021 /* generic function for encode/decode called after a frame has been coded/decoded */
1022 void MPV_frame_end(MpegEncContext *s)
1023 {
1024     int i;
1025     /* draw edge for correct motion prediction if outside */
1026     if(s->codec_id!=CODEC_ID_SVQ1){
1027         if (s->pict_type != B_TYPE && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1028             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1029             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1030             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1031         }
1032     }
1033     emms_c();
1034     
1035     s->last_pict_type    = s->pict_type;
1036     if(s->pict_type!=B_TYPE){
1037         s->last_non_b_pict_type= s->pict_type;
1038     }
1039 #if 0
1040         /* copy back current_picture variables */
1041     for(i=0; i<MAX_PICTURE_COUNT; i++){
1042         if(s->picture[i].data[0] == s->current_picture.data[0]){
1043             s->picture[i]= s->current_picture;
1044             break;
1045         }    
1046     }
1047     assert(i<MAX_PICTURE_COUNT);
1048 #endif    
1049
1050     if(s->encoding){
1051         /* release non refernce frames */
1052         for(i=0; i<MAX_PICTURE_COUNT; i++){
1053             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1054                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1055             }
1056         }
1057     }
1058     // clear copies, to avoid confusion
1059 #if 0
1060     memset(&s->last_picture, 0, sizeof(Picture));
1061     memset(&s->next_picture, 0, sizeof(Picture));
1062     memset(&s->current_picture, 0, sizeof(Picture));
1063 #endif
1064 }
1065
1066 /**
1067  * prints debuging info for the given picture.
1068  */
1069 void ff_print_debug_info(MpegEncContext *s, Picture *pict){
1070
1071     if(!pict || !pict->mb_type) return;
1072
1073     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1074         int x,y;
1075
1076         for(y=0; y<s->mb_height; y++){
1077             for(x=0; x<s->mb_width; x++){
1078                 if(s->avctx->debug&FF_DEBUG_SKIP){
1079                     int count= s->mbskip_table[x + y*s->mb_stride];
1080                     if(count>9) count=9;
1081                     printf("%1d", count);
1082                 }
1083                 if(s->avctx->debug&FF_DEBUG_QP){
1084                     printf("%2d", pict->qscale_table[x + y*s->mb_stride]);
1085                 }
1086                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1087                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1088                     
1089                     //Type & MV direction
1090                     if(IS_PCM(mb_type))
1091                         printf("P");
1092                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1093                         printf("A");
1094                     else if(IS_INTRA4x4(mb_type))
1095                         printf("i");
1096                     else if(IS_INTRA16x16(mb_type))
1097                         printf("I");
1098                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1099                         printf("d");
1100                     else if(IS_DIRECT(mb_type))
1101                         printf("D");
1102                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1103                         printf("g");
1104                     else if(IS_GMC(mb_type))
1105                         printf("G");
1106                     else if(IS_SKIP(mb_type))
1107                         printf("S");
1108                     else if(!USES_LIST(mb_type, 1))
1109                         printf(">");
1110                     else if(!USES_LIST(mb_type, 0))
1111                         printf("<");
1112                     else{
1113                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1114                         printf("X");
1115                     }
1116                     
1117                     //segmentation
1118                     if(IS_8X8(mb_type))
1119                         printf("+");
1120                     else if(IS_16X8(mb_type))
1121                         printf("-");
1122                     else if(IS_8X16(mb_type))
1123                         printf("¦");
1124                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1125                         printf(" ");
1126                     else
1127                         printf("?");
1128                     
1129                         
1130                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1131                         printf("=");
1132                     else
1133                         printf(" ");
1134                 }
1135 //                printf(" ");
1136             }
1137             printf("\n");
1138         }
1139     }
1140 }
1141
1142 #ifdef CONFIG_ENCODERS
1143
1144 static int get_sae(uint8_t *src, int ref, int stride){
1145     int x,y;
1146     int acc=0;
1147     
1148     for(y=0; y<16; y++){
1149         for(x=0; x<16; x++){
1150             acc+= ABS(src[x+y*stride] - ref);
1151         }
1152     }
1153     
1154     return acc;
1155 }
1156
1157 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
1158     int x, y, w, h;
1159     int acc=0;
1160     
1161     w= s->width &~15;
1162     h= s->height&~15;
1163     
1164     for(y=0; y<h; y+=16){
1165         for(x=0; x<w; x+=16){
1166             int offset= x + y*stride;
1167             int sad = s->dsp.pix_abs16x16(src + offset, ref + offset, stride);
1168             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1169             int sae = get_sae(src + offset, mean, stride);
1170             
1171             acc+= sae + 500 < sad;
1172         }
1173     }
1174     return acc;
1175 }
1176
1177
1178 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
1179     AVFrame *pic;
1180     int i;
1181     const int encoding_delay= s->max_b_frames;
1182     int direct=1;
1183
1184     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
1185     if(pic_arg->linesize[0] != s->linesize) direct=0;
1186     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
1187     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
1188   
1189 //    printf("%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
1190     
1191     if(direct){
1192         i= find_unused_picture(s, 1);
1193
1194         pic= (AVFrame*)&s->picture[i];
1195         pic->reference= 3;
1196     
1197         for(i=0; i<4; i++){
1198             pic->data[i]= pic_arg->data[i];
1199             pic->linesize[i]= pic_arg->linesize[i];
1200         }
1201         alloc_picture(s, (Picture*)pic, 1);
1202     }else{
1203         i= find_unused_picture(s, 0);
1204
1205         pic= (AVFrame*)&s->picture[i];
1206         pic->reference= 3;
1207
1208         alloc_picture(s, (Picture*)pic, 0);
1209         for(i=0; i<4; i++){
1210             /* the input will be 16 pixels to the right relative to the actual buffer start
1211              * and the current_pic, so the buffer can be reused, yes its not beatifull 
1212              */
1213             pic->data[i]+= 16; 
1214         }
1215
1216         if(   pic->data[0] == pic_arg->data[0] 
1217            && pic->data[1] == pic_arg->data[1]
1218            && pic->data[2] == pic_arg->data[2]){
1219        // empty
1220         }else{
1221             int h_chroma_shift, v_chroma_shift;
1222         
1223             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1224         
1225             for(i=0; i<3; i++){
1226                 int src_stride= pic_arg->linesize[i];
1227                 int dst_stride= i ? s->uvlinesize : s->linesize;
1228                 int h_shift= i ? h_chroma_shift : 0;
1229                 int v_shift= i ? v_chroma_shift : 0;
1230                 int w= s->width >>h_shift;
1231                 int h= s->height>>v_shift;
1232                 uint8_t *src= pic_arg->data[i];
1233                 uint8_t *dst= pic->data[i];
1234             
1235                 if(src_stride==dst_stride)
1236                     memcpy(dst, src, src_stride*h);
1237                 else{
1238                     while(h--){
1239                         memcpy(dst, src, w);
1240                         dst += dst_stride;
1241                         src += src_stride;
1242                     }
1243                 }
1244             }
1245         }
1246     }
1247     pic->quality= pic_arg->quality;
1248     pic->pict_type= pic_arg->pict_type;
1249     pic->pts = pic_arg->pts;
1250     
1251     if(s->input_picture[encoding_delay])
1252         pic->display_picture_number= s->input_picture[encoding_delay]->display_picture_number + 1;
1253
1254     /* shift buffer entries */
1255     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
1256         s->input_picture[i-1]= s->input_picture[i];
1257         
1258     s->input_picture[encoding_delay]= (Picture*)pic;
1259
1260     return 0;
1261 }
1262
1263 static void select_input_picture(MpegEncContext *s){
1264     int i;
1265     const int encoding_delay= s->max_b_frames;
1266     int coded_pic_num=0;    
1267
1268     if(s->reordered_input_picture[0])
1269         coded_pic_num= s->reordered_input_picture[0]->coded_picture_number + 1;
1270
1271     for(i=1; i<MAX_PICTURE_COUNT; i++)
1272         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
1273     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
1274
1275     /* set next picture types & ordering */
1276     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
1277         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
1278             s->reordered_input_picture[0]= s->input_picture[0];
1279             s->reordered_input_picture[0]->pict_type= I_TYPE;
1280             s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
1281         }else{
1282             int b_frames;
1283             
1284             if(s->flags&CODEC_FLAG_PASS2){
1285                 for(i=0; i<s->max_b_frames+1; i++){
1286                     int pict_num= s->input_picture[0]->display_picture_number + i;
1287                     int pict_type= s->rc_context.entry[pict_num].new_pict_type;
1288                     s->input_picture[i]->pict_type= pict_type;
1289                     
1290                     if(i + 1 >= s->rc_context.num_entries) break;
1291                 }
1292             }
1293
1294             if(s->input_picture[0]->pict_type){
1295                 /* user selected pict_type */
1296                 for(b_frames=0; b_frames<s->max_b_frames+1; b_frames++){
1297                     if(s->input_picture[b_frames]->pict_type!=B_TYPE) break;
1298                 }
1299             
1300                 if(b_frames > s->max_b_frames){
1301                     fprintf(stderr, "warning, too many bframes in a row\n");
1302                     b_frames = s->max_b_frames;
1303                 }
1304             }else if(s->b_frame_strategy==0){
1305                 b_frames= s->max_b_frames;
1306             }else if(s->b_frame_strategy==1){
1307                 for(i=1; i<s->max_b_frames+1; i++){
1308                     if(s->input_picture[i]->b_frame_score==0){
1309                         s->input_picture[i]->b_frame_score= 
1310                             get_intra_count(s, s->input_picture[i  ]->data[0], 
1311                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
1312                     }
1313                 }
1314                 for(i=0; i<s->max_b_frames; i++){
1315                     if(s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
1316                 }
1317                                 
1318                 b_frames= FFMAX(0, i-1);
1319                 
1320                 /* reset scores */
1321                 for(i=0; i<b_frames+1; i++){
1322                     s->input_picture[i]->b_frame_score=0;
1323                 }
1324             }else{
1325                 fprintf(stderr, "illegal b frame strategy\n");
1326                 b_frames=0;
1327             }
1328
1329             emms_c();
1330 //static int b_count=0;
1331 //b_count+= b_frames;
1332 //printf("b_frames: %d\n", b_count);
1333                         
1334             s->reordered_input_picture[0]= s->input_picture[b_frames];
1335             if(   s->picture_in_gop_number + b_frames >= s->gop_size 
1336                || s->reordered_input_picture[0]->pict_type== I_TYPE)
1337                 s->reordered_input_picture[0]->pict_type= I_TYPE;
1338             else
1339                 s->reordered_input_picture[0]->pict_type= P_TYPE;
1340             s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
1341             for(i=0; i<b_frames; i++){
1342                 coded_pic_num++;
1343                 s->reordered_input_picture[i+1]= s->input_picture[i];
1344                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
1345                 s->reordered_input_picture[i+1]->coded_picture_number= coded_pic_num;
1346             }
1347         }
1348     }
1349     
1350     if(s->reordered_input_picture[0]){
1351         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
1352
1353         s->new_picture= *s->reordered_input_picture[0];
1354
1355         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
1356             // input is a shared pix, so we cant modifiy it -> alloc a new one & ensure that the shared one is reuseable
1357         
1358             int i= find_unused_picture(s, 0);
1359             Picture *pic= &s->picture[i];
1360
1361             /* mark us unused / free shared pic */
1362             for(i=0; i<4; i++)
1363                 s->reordered_input_picture[0]->data[i]= NULL;
1364             s->reordered_input_picture[0]->type= 0;
1365             
1366             //FIXME bad, copy * except
1367             pic->pict_type = s->reordered_input_picture[0]->pict_type;
1368             pic->quality   = s->reordered_input_picture[0]->quality;
1369             pic->coded_picture_number = s->reordered_input_picture[0]->coded_picture_number;
1370             pic->reference = s->reordered_input_picture[0]->reference;
1371             
1372             alloc_picture(s, pic, 0);
1373
1374             s->current_picture_ptr= pic;
1375         }else{
1376             // input is not a shared pix -> reuse buffer for current_pix
1377
1378             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER 
1379                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
1380             
1381             s->current_picture_ptr= s->reordered_input_picture[0];
1382             for(i=0; i<4; i++){
1383                 //reverse the +16 we did before storing the input
1384                 s->current_picture_ptr->data[i]-=16;
1385             }
1386         }
1387         s->current_picture= *s->current_picture_ptr;
1388     
1389         s->picture_number= s->new_picture.display_picture_number;
1390 //printf("dpn:%d\n", s->picture_number);
1391     }else{
1392        memset(&s->new_picture, 0, sizeof(Picture));
1393     }
1394 }
1395
1396 int MPV_encode_picture(AVCodecContext *avctx,
1397                        unsigned char *buf, int buf_size, void *data)
1398 {
1399     MpegEncContext *s = avctx->priv_data;
1400     AVFrame *pic_arg = data;
1401     int i;
1402
1403     init_put_bits(&s->pb, buf, buf_size, NULL, NULL);
1404
1405     s->picture_in_gop_number++;
1406
1407     load_input_picture(s, pic_arg);
1408     
1409     select_input_picture(s);
1410     
1411     /* output? */
1412     if(s->new_picture.data[0]){
1413
1414         s->pict_type= s->new_picture.pict_type;
1415         if (s->fixed_qscale){ /* the ratecontrol needs the last qscale so we dont touch it for CBR */
1416             s->qscale= (int)(s->new_picture.quality+0.5);
1417             assert(s->qscale);
1418         }
1419 //emms_c();
1420 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
1421         MPV_frame_start(s, avctx);
1422
1423         encode_picture(s, s->picture_number);
1424         
1425         avctx->real_pict_num  = s->picture_number;
1426         avctx->header_bits = s->header_bits;
1427         avctx->mv_bits     = s->mv_bits;
1428         avctx->misc_bits   = s->misc_bits;
1429         avctx->i_tex_bits  = s->i_tex_bits;
1430         avctx->p_tex_bits  = s->p_tex_bits;
1431         avctx->i_count     = s->i_count;
1432         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
1433         avctx->skip_count  = s->skip_count;
1434
1435         MPV_frame_end(s);
1436
1437         if (s->out_format == FMT_MJPEG)
1438             mjpeg_picture_trailer(s);
1439         
1440         if(s->flags&CODEC_FLAG_PASS1)
1441             ff_write_pass1_stats(s);
1442
1443         for(i=0; i<4; i++){
1444             avctx->error[i] += s->current_picture_ptr->error[i];
1445         }
1446     }
1447
1448     s->input_picture_number++;
1449
1450     flush_put_bits(&s->pb);
1451     s->frame_bits  = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
1452     
1453     s->total_bits += s->frame_bits;
1454     avctx->frame_bits  = s->frame_bits;
1455     
1456     return pbBufPtr(&s->pb) - s->pb.buf;
1457 }
1458
1459 #endif //CONFIG_ENCODERS
1460
1461 static inline void gmc1_motion(MpegEncContext *s,
1462                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1463                                int dest_offset,
1464                                uint8_t **ref_picture, int src_offset)
1465 {
1466     uint8_t *ptr;
1467     int offset, src_x, src_y, linesize, uvlinesize;
1468     int motion_x, motion_y;
1469     int emu=0;
1470
1471     motion_x= s->sprite_offset[0][0];
1472     motion_y= s->sprite_offset[0][1];
1473     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
1474     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
1475     motion_x<<=(3-s->sprite_warping_accuracy);
1476     motion_y<<=(3-s->sprite_warping_accuracy);
1477     src_x = clip(src_x, -16, s->width);
1478     if (src_x == s->width)
1479         motion_x =0;
1480     src_y = clip(src_y, -16, s->height);
1481     if (src_y == s->height)
1482         motion_y =0;
1483
1484     linesize = s->linesize;
1485     uvlinesize = s->uvlinesize;
1486     
1487     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
1488
1489     dest_y+=dest_offset;
1490     if(s->flags&CODEC_FLAG_EMU_EDGE){
1491         if(src_x<0 || src_y<0 || src_x + 17 >= s->h_edge_pos
1492                               || src_y + 17 >= s->v_edge_pos){
1493             ff_emulated_edge_mc(s, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1494             ptr= s->edge_emu_buffer;
1495         }
1496     }
1497     
1498     if((motion_x|motion_y)&7){
1499         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1500         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1501     }else{
1502         int dxy;
1503         
1504         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
1505         if (s->no_rounding){
1506             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
1507         }else{
1508             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
1509         }
1510     }
1511     
1512     if(s->flags&CODEC_FLAG_GRAY) return;
1513
1514     motion_x= s->sprite_offset[1][0];
1515     motion_y= s->sprite_offset[1][1];
1516     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
1517     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
1518     motion_x<<=(3-s->sprite_warping_accuracy);
1519     motion_y<<=(3-s->sprite_warping_accuracy);
1520     src_x = clip(src_x, -8, s->width>>1);
1521     if (src_x == s->width>>1)
1522         motion_x =0;
1523     src_y = clip(src_y, -8, s->height>>1);
1524     if (src_y == s->height>>1)
1525         motion_y =0;
1526
1527     offset = (src_y * uvlinesize) + src_x + (src_offset>>1);
1528     ptr = ref_picture[1] + offset;
1529     if(s->flags&CODEC_FLAG_EMU_EDGE){
1530         if(src_x<0 || src_y<0 || src_x + 9 >= s->h_edge_pos>>1
1531                               || src_y + 9 >= s->v_edge_pos>>1){
1532             ff_emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1533             ptr= s->edge_emu_buffer;
1534             emu=1;
1535         }
1536     }
1537     s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1538     
1539     ptr = ref_picture[2] + offset;
1540     if(emu){
1541         ff_emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1542         ptr= s->edge_emu_buffer;
1543     }
1544     s->dsp.gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1545     
1546     return;
1547 }
1548
1549 static inline void gmc_motion(MpegEncContext *s,
1550                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1551                                int dest_offset,
1552                                uint8_t **ref_picture, int src_offset)
1553 {
1554     uint8_t *ptr;
1555     int linesize, uvlinesize;
1556     const int a= s->sprite_warping_accuracy;
1557     int ox, oy;
1558
1559     linesize = s->linesize;
1560     uvlinesize = s->uvlinesize;
1561
1562     ptr = ref_picture[0] + src_offset;
1563
1564     dest_y+=dest_offset;
1565     
1566     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
1567     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
1568
1569     s->dsp.gmc(dest_y, ptr, linesize, 16,
1570            ox, 
1571            oy, 
1572            s->sprite_delta[0][0], s->sprite_delta[0][1],
1573            s->sprite_delta[1][0], s->sprite_delta[1][1], 
1574            a+1, (1<<(2*a+1)) - s->no_rounding,
1575            s->h_edge_pos, s->v_edge_pos);
1576     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
1577            ox + s->sprite_delta[0][0]*8, 
1578            oy + s->sprite_delta[1][0]*8, 
1579            s->sprite_delta[0][0], s->sprite_delta[0][1],
1580            s->sprite_delta[1][0], s->sprite_delta[1][1], 
1581            a+1, (1<<(2*a+1)) - s->no_rounding,
1582            s->h_edge_pos, s->v_edge_pos);
1583
1584     if(s->flags&CODEC_FLAG_GRAY) return;
1585
1586
1587     dest_cb+=dest_offset>>1;
1588     dest_cr+=dest_offset>>1;
1589     
1590     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
1591     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
1592
1593     ptr = ref_picture[1] + (src_offset>>1);
1594     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
1595            ox, 
1596            oy, 
1597            s->sprite_delta[0][0], s->sprite_delta[0][1],
1598            s->sprite_delta[1][0], s->sprite_delta[1][1], 
1599            a+1, (1<<(2*a+1)) - s->no_rounding,
1600            s->h_edge_pos>>1, s->v_edge_pos>>1);
1601     
1602     ptr = ref_picture[2] + (src_offset>>1);
1603     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
1604            ox, 
1605            oy, 
1606            s->sprite_delta[0][0], s->sprite_delta[0][1],
1607            s->sprite_delta[1][0], s->sprite_delta[1][1], 
1608            a+1, (1<<(2*a+1)) - s->no_rounding,
1609            s->h_edge_pos>>1, s->v_edge_pos>>1);
1610 }
1611
1612
1613 void ff_emulated_edge_mc(MpegEncContext *s, uint8_t *src, int linesize, int block_w, int block_h, 
1614                                     int src_x, int src_y, int w, int h){
1615     int x, y;
1616     int start_y, start_x, end_y, end_x;
1617     uint8_t *buf= s->edge_emu_buffer;
1618
1619     if(src_y>= h){
1620         src+= (h-1-src_y)*linesize;
1621         src_y=h-1;
1622     }else if(src_y<=-block_h){
1623         src+= (1-block_h-src_y)*linesize;
1624         src_y=1-block_h;
1625     }
1626     if(src_x>= w){
1627         src+= (w-1-src_x);
1628         src_x=w-1;
1629     }else if(src_x<=-block_w){
1630         src+= (1-block_w-src_x);
1631         src_x=1-block_w;
1632     }
1633
1634     start_y= FFMAX(0, -src_y);
1635     start_x= FFMAX(0, -src_x);
1636     end_y= FFMIN(block_h, h-src_y);
1637     end_x= FFMIN(block_w, w-src_x);
1638
1639     // copy existing part
1640     for(y=start_y; y<end_y; y++){
1641         for(x=start_x; x<end_x; x++){
1642             buf[x + y*linesize]= src[x + y*linesize];
1643         }
1644     }
1645
1646     //top
1647     for(y=0; y<start_y; y++){
1648         for(x=start_x; x<end_x; x++){
1649             buf[x + y*linesize]= buf[x + start_y*linesize];
1650         }
1651     }
1652
1653     //bottom
1654     for(y=end_y; y<block_h; y++){
1655         for(x=start_x; x<end_x; x++){
1656             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
1657         }
1658     }
1659                                     
1660     for(y=0; y<block_h; y++){
1661        //left
1662         for(x=0; x<start_x; x++){
1663             buf[x + y*linesize]= buf[start_x + y*linesize];
1664         }
1665        
1666        //right
1667         for(x=end_x; x<block_w; x++){
1668             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
1669         }
1670     }
1671 }
1672
1673
1674 /* apply one mpeg motion vector to the three components */
1675 static inline void mpeg_motion(MpegEncContext *s,
1676                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1677                                int dest_offset,
1678                                uint8_t **ref_picture, int src_offset,
1679                                int field_based, op_pixels_func (*pix_op)[4],
1680                                int motion_x, int motion_y, int h)
1681 {
1682     uint8_t *ptr;
1683     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
1684     int emu=0;
1685 #if 0    
1686 if(s->quarter_sample)
1687 {
1688     motion_x>>=1;
1689     motion_y>>=1;
1690 }
1691 #endif
1692     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
1693     src_x = s->mb_x * 16 + (motion_x >> 1);
1694     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 1);
1695                 
1696     /* WARNING: do no forget half pels */
1697     height = s->height >> field_based;
1698     v_edge_pos = s->v_edge_pos >> field_based;
1699     src_x = clip(src_x, -16, s->width);
1700     if (src_x == s->width)
1701         dxy &= ~1;
1702     src_y = clip(src_y, -16, height);
1703     if (src_y == height)
1704         dxy &= ~2;
1705     linesize   = s->current_picture.linesize[0] << field_based;
1706     uvlinesize = s->current_picture.linesize[1] << field_based;
1707     ptr = ref_picture[0] + (src_y * linesize) + (src_x) + src_offset;
1708     dest_y += dest_offset;
1709
1710     if(s->flags&CODEC_FLAG_EMU_EDGE){
1711         if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 16 > s->h_edge_pos
1712                               || src_y + (motion_y&1) + h  > v_edge_pos){
1713             ff_emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based,  //FIXME linesize? and uv below
1714                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
1715             ptr= s->edge_emu_buffer + src_offset;
1716             emu=1;
1717         }
1718     }
1719     pix_op[0][dxy](dest_y, ptr, linesize, h);
1720
1721     if(s->flags&CODEC_FLAG_GRAY) return;
1722
1723     if (s->out_format == FMT_H263) {
1724         dxy = 0;
1725         if ((motion_x & 3) != 0)
1726             dxy |= 1;
1727         if ((motion_y & 3) != 0)
1728             dxy |= 2;
1729         mx = motion_x >> 2;
1730         my = motion_y >> 2;
1731     } else {
1732         mx = motion_x / 2;
1733         my = motion_y / 2;
1734         dxy = ((my & 1) << 1) | (mx & 1);
1735         mx >>= 1;
1736         my >>= 1;
1737     }
1738     
1739     src_x = s->mb_x * 8 + mx;
1740     src_y = s->mb_y * (8 >> field_based) + my;
1741     src_x = clip(src_x, -8, s->width >> 1);
1742     if (src_x == (s->width >> 1))
1743         dxy &= ~1;
1744     src_y = clip(src_y, -8, height >> 1);
1745     if (src_y == (height >> 1))
1746         dxy &= ~2;
1747     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
1748     ptr = ref_picture[1] + offset;
1749     if(emu){
1750         ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, 
1751                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1752         ptr= s->edge_emu_buffer + (src_offset >> 1);
1753     }
1754     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
1755
1756     ptr = ref_picture[2] + offset;
1757     if(emu){
1758         ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, 
1759                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1760         ptr= s->edge_emu_buffer + (src_offset >> 1);
1761     }
1762     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
1763 }
1764
1765 static inline void qpel_motion(MpegEncContext *s,
1766                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1767                                int dest_offset,
1768                                uint8_t **ref_picture, int src_offset,
1769                                int field_based, op_pixels_func (*pix_op)[4],
1770                                qpel_mc_func (*qpix_op)[16],
1771                                int motion_x, int motion_y, int h)
1772 {
1773     uint8_t *ptr;
1774     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
1775     int emu=0;
1776
1777     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
1778     src_x = s->mb_x * 16 + (motion_x >> 2);
1779     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
1780
1781     height = s->height >> field_based;
1782     v_edge_pos = s->v_edge_pos >> field_based;
1783     src_x = clip(src_x, -16, s->width);
1784     if (src_x == s->width)
1785         dxy &= ~3;
1786     src_y = clip(src_y, -16, height);
1787     if (src_y == height)
1788         dxy &= ~12;
1789     linesize = s->linesize << field_based;
1790     uvlinesize = s->uvlinesize << field_based;
1791     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
1792     dest_y += dest_offset;
1793 //printf("%d %d %d\n", src_x, src_y, dxy);
1794     
1795     if(s->flags&CODEC_FLAG_EMU_EDGE){
1796         if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 16 > s->h_edge_pos
1797                               || src_y + (motion_y&3) + h  > v_edge_pos){
1798             ff_emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based, 
1799                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
1800             ptr= s->edge_emu_buffer + src_offset;
1801             emu=1;
1802         }
1803     }
1804     if(!field_based)
1805         qpix_op[0][dxy](dest_y, ptr, linesize);
1806     else{
1807         //damn interlaced mode
1808         //FIXME boundary mirroring is not exactly correct here
1809         qpix_op[1][dxy](dest_y  , ptr  , linesize);
1810         qpix_op[1][dxy](dest_y+8, ptr+8, linesize);
1811     }
1812
1813     if(s->flags&CODEC_FLAG_GRAY) return;
1814
1815     if(field_based){
1816         mx= motion_x/2;
1817         my= motion_y>>1;
1818     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
1819         static const int rtab[8]= {0,0,1,1,0,0,0,1};
1820         mx= (motion_x>>1) + rtab[motion_x&7];
1821         my= (motion_y>>1) + rtab[motion_y&7];
1822     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
1823         mx= (motion_x>>1)|(motion_x&1);
1824         my= (motion_y>>1)|(motion_y&1);
1825     }else{
1826         mx= motion_x/2;
1827         my= motion_y/2;
1828     }
1829     mx= (mx>>1)|(mx&1);
1830     my= (my>>1)|(my&1);
1831
1832     dxy= (mx&1) | ((my&1)<<1);
1833     mx>>=1;
1834     my>>=1;
1835
1836     src_x = s->mb_x * 8 + mx;
1837     src_y = s->mb_y * (8 >> field_based) + my;
1838     src_x = clip(src_x, -8, s->width >> 1);
1839     if (src_x == (s->width >> 1))
1840         dxy &= ~1;
1841     src_y = clip(src_y, -8, height >> 1);
1842     if (src_y == (height >> 1))
1843         dxy &= ~2;
1844
1845     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
1846     ptr = ref_picture[1] + offset;
1847     if(emu){
1848         ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, 
1849                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1850         ptr= s->edge_emu_buffer + (src_offset >> 1);
1851     }
1852     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
1853     
1854     ptr = ref_picture[2] + offset;
1855     if(emu){
1856         ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, 
1857                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1858         ptr= s->edge_emu_buffer + (src_offset >> 1);
1859     }
1860     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
1861 }
1862
1863 inline int ff_h263_round_chroma(int x){
1864     if (x >= 0)
1865         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
1866     else {
1867         x = -x;
1868         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
1869     }
1870 }
1871
1872 /**
1873  * motion compesation of a single macroblock
1874  * @param s context
1875  * @param dest_y luma destination pointer
1876  * @param dest_cb chroma cb/u destination pointer
1877  * @param dest_cr chroma cr/v destination pointer
1878  * @param dir direction (0->forward, 1->backward)
1879  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
1880  * @param pic_op halfpel motion compensation function (average or put normally)
1881  * @param pic_op qpel motion compensation function (average or put normally)
1882  * the motion vectors are taken from s->mv and the MV type from s->mv_type
1883  */
1884 static inline void MPV_motion(MpegEncContext *s, 
1885                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1886                               int dir, uint8_t **ref_picture, 
1887                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
1888 {
1889     int dxy, offset, mx, my, src_x, src_y, motion_x, motion_y;
1890     int mb_x, mb_y, i;
1891     uint8_t *ptr, *dest;
1892     int emu=0;
1893
1894     mb_x = s->mb_x;
1895     mb_y = s->mb_y;
1896
1897     switch(s->mv_type) {
1898     case MV_TYPE_16X16:
1899 #ifdef CONFIG_RISKY
1900         if(s->mcsel){
1901             if(s->real_sprite_warping_points==1){
1902                 gmc1_motion(s, dest_y, dest_cb, dest_cr, 0,
1903                             ref_picture, 0);
1904             }else{
1905                 gmc_motion(s, dest_y, dest_cb, dest_cr, 0,
1906                             ref_picture, 0);
1907             }
1908         }else if(s->quarter_sample){
1909             qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
1910                         ref_picture, 0,
1911                         0, pix_op, qpix_op,
1912                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
1913         }else if(s->mspel){
1914             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
1915                         ref_picture, pix_op,
1916                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
1917         }else
1918 #endif
1919         {
1920             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
1921                         ref_picture, 0,
1922                         0, pix_op,
1923                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
1924         }           
1925         break;
1926     case MV_TYPE_8X8:
1927         mx = 0;
1928         my = 0;
1929         if(s->quarter_sample){
1930             for(i=0;i<4;i++) {
1931                 motion_x = s->mv[dir][i][0];
1932                 motion_y = s->mv[dir][i][1];
1933
1934                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
1935                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
1936                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
1937                     
1938                 /* WARNING: do no forget half pels */
1939                 src_x = clip(src_x, -16, s->width);
1940                 if (src_x == s->width)
1941                     dxy &= ~3;
1942                 src_y = clip(src_y, -16, s->height);
1943                 if (src_y == s->height)
1944                     dxy &= ~12;
1945                     
1946                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
1947                 if(s->flags&CODEC_FLAG_EMU_EDGE){
1948                     if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 8 > s->h_edge_pos
1949                                           || src_y + (motion_y&3) + 8 > s->v_edge_pos){
1950                         ff_emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1951                         ptr= s->edge_emu_buffer;
1952                     }
1953                 }
1954                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
1955                 qpix_op[1][dxy](dest, ptr, s->linesize);
1956
1957                 mx += s->mv[dir][i][0]/2;
1958                 my += s->mv[dir][i][1]/2;
1959             }
1960         }else{
1961             for(i=0;i<4;i++) {
1962                 motion_x = s->mv[dir][i][0];
1963                 motion_y = s->mv[dir][i][1];
1964
1965                 dxy = ((motion_y & 1) << 1) | (motion_x & 1);
1966                 src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8;
1967                 src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8;
1968                     
1969                 /* WARNING: do no forget half pels */
1970                 src_x = clip(src_x, -16, s->width);
1971                 if (src_x == s->width)
1972                     dxy &= ~1;
1973                 src_y = clip(src_y, -16, s->height);
1974                 if (src_y == s->height)
1975                     dxy &= ~2;
1976                     
1977                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
1978                 if(s->flags&CODEC_FLAG_EMU_EDGE){
1979                     if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 8 > s->h_edge_pos
1980                                           || src_y + (motion_y&1) + 8 > s->v_edge_pos){
1981                         ff_emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1982                         ptr= s->edge_emu_buffer;
1983                     }
1984                 }
1985                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
1986                 pix_op[1][dxy](dest, ptr, s->linesize, 8);
1987
1988                 mx += s->mv[dir][i][0];
1989                 my += s->mv[dir][i][1];
1990             }
1991         }
1992
1993         if(s->flags&CODEC_FLAG_GRAY) break;
1994         /* In case of 8X8, we construct a single chroma motion vector
1995            with a special rounding */
1996         mx= ff_h263_round_chroma(mx);
1997         my= ff_h263_round_chroma(my);
1998         dxy = ((my & 1) << 1) | (mx & 1);
1999         mx >>= 1;
2000         my >>= 1;
2001
2002         src_x = mb_x * 8 + mx;
2003         src_y = mb_y * 8 + my;
2004         src_x = clip(src_x, -8, s->width/2);
2005         if (src_x == s->width/2)
2006             dxy &= ~1;
2007         src_y = clip(src_y, -8, s->height/2);
2008         if (src_y == s->height/2)
2009             dxy &= ~2;
2010         
2011         offset = (src_y * (s->uvlinesize)) + src_x;
2012         ptr = ref_picture[1] + offset;
2013         if(s->flags&CODEC_FLAG_EMU_EDGE){
2014                 if(src_x<0 || src_y<0 || src_x + (dxy &1) + 8 > s->h_edge_pos>>1
2015                                       || src_y + (dxy>>1) + 8 > s->v_edge_pos>>1){
2016                     ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2017                     ptr= s->edge_emu_buffer;
2018                     emu=1;
2019                 }
2020             }
2021         pix_op[1][dxy](dest_cb, ptr, s->uvlinesize, 8);
2022
2023         ptr = ref_picture[2] + offset;
2024         if(emu){
2025             ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2026             ptr= s->edge_emu_buffer;
2027         }
2028         pix_op[1][dxy](dest_cr, ptr, s->uvlinesize, 8);
2029         break;
2030     case MV_TYPE_FIELD:
2031         if (s->picture_structure == PICT_FRAME) {
2032             if(s->quarter_sample){
2033                 /* top field */
2034                 qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
2035                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
2036                             1, pix_op, qpix_op,
2037                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
2038                 /* bottom field */
2039                 qpel_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2040                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
2041                             1, pix_op, qpix_op,
2042                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
2043             }else{
2044                 /* top field */       
2045                 mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2046                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
2047                             1, pix_op,
2048                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
2049                 /* bottom field */
2050                 mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2051                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
2052                             1, pix_op,
2053                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
2054             }
2055         } else {
2056             int offset;
2057             if(s->picture_structure == s->field_select[dir][0] + 1 || s->pict_type == B_TYPE || s->first_field){
2058                 offset= s->field_select[dir][0] ? s->linesize : 0;
2059             }else{
2060                 ref_picture= s->current_picture.data;
2061                 offset= s->field_select[dir][0] ? s->linesize : -s->linesize; 
2062             } 
2063
2064             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2065                         ref_picture, offset,
2066                         0, pix_op,
2067                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2068         }
2069         break;
2070     }
2071 }
2072
2073
2074 /* put block[] to dest[] */
2075 static inline void put_dct(MpegEncContext *s, 
2076                            DCTELEM *block, int i, uint8_t *dest, int line_size)
2077 {
2078     s->dct_unquantize(s, block, i, s->qscale);
2079     s->dsp.idct_put (dest, line_size, block);
2080 }
2081
2082 /* add block[] to dest[] */
2083 static inline void add_dct(MpegEncContext *s, 
2084                            DCTELEM *block, int i, uint8_t *dest, int line_size)
2085 {
2086     if (s->block_last_index[i] >= 0) {
2087         s->dsp.idct_add (dest, line_size, block);
2088     }
2089 }
2090
2091 static inline void add_dequant_dct(MpegEncContext *s, 
2092                            DCTELEM *block, int i, uint8_t *dest, int line_size)
2093 {
2094     if (s->block_last_index[i] >= 0) {
2095         s->dct_unquantize(s, block, i, s->qscale);
2096
2097         s->dsp.idct_add (dest, line_size, block);
2098     }
2099 }
2100
2101 /**
2102  * cleans dc, ac, coded_block for the current non intra MB
2103  */
2104 void ff_clean_intra_table_entries(MpegEncContext *s)
2105 {
2106     int wrap = s->block_wrap[0];
2107     int xy = s->block_index[0];
2108     
2109     s->dc_val[0][xy           ] = 
2110     s->dc_val[0][xy + 1       ] = 
2111     s->dc_val[0][xy     + wrap] =
2112     s->dc_val[0][xy + 1 + wrap] = 1024;
2113     /* ac pred */
2114     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
2115     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
2116     if (s->msmpeg4_version>=3) {
2117         s->coded_block[xy           ] =
2118         s->coded_block[xy + 1       ] =
2119         s->coded_block[xy     + wrap] =
2120         s->coded_block[xy + 1 + wrap] = 0;
2121     }
2122     /* chroma */
2123     wrap = s->block_wrap[4];
2124     xy = s->mb_x + 1 + (s->mb_y + 1) * wrap;
2125     s->dc_val[1][xy] =
2126     s->dc_val[2][xy] = 1024;
2127     /* ac pred */
2128     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
2129     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
2130     
2131     s->mbintra_table[s->mb_x + s->mb_y*s->mb_stride]= 0;
2132 }
2133
2134 /* generic function called after a macroblock has been parsed by the
2135    decoder or after it has been encoded by the encoder.
2136
2137    Important variables used:
2138    s->mb_intra : true if intra macroblock
2139    s->mv_dir   : motion vector direction
2140    s->mv_type  : motion vector type
2141    s->mv       : motion vector
2142    s->interlaced_dct : true if interlaced dct used (mpeg2)
2143  */
2144 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
2145 {
2146     int mb_x, mb_y;
2147     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
2148
2149     mb_x = s->mb_x;
2150     mb_y = s->mb_y;
2151
2152     s->current_picture.qscale_table[mb_xy]= s->qscale;
2153
2154     /* update DC predictors for P macroblocks */
2155     if (!s->mb_intra) {
2156         if (s->h263_pred || s->h263_aic) {
2157             if(s->mbintra_table[mb_xy])
2158                 ff_clean_intra_table_entries(s);
2159         } else {
2160             s->last_dc[0] =
2161             s->last_dc[1] =
2162             s->last_dc[2] = 128 << s->intra_dc_precision;
2163         }
2164     }
2165     else if (s->h263_pred || s->h263_aic)
2166         s->mbintra_table[mb_xy]=1;
2167
2168     /* update motion predictor, not for B-frames as they need the motion_val from the last P/S-Frame */
2169     if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE) { //FIXME move into h263.c if possible, format specific stuff shouldnt be here
2170         //FIXME a lot of thet is only needed for !low_delay
2171         const int wrap = s->block_wrap[0];
2172         const int xy = s->block_index[0];
2173         if(s->mv_type != MV_TYPE_8X8){
2174             int motion_x, motion_y;
2175             if (s->mb_intra) {
2176                 motion_x = 0;
2177                 motion_y = 0;
2178             } else if (s->mv_type == MV_TYPE_16X16) {
2179                 motion_x = s->mv[0][0][0];
2180                 motion_y = s->mv[0][0][1];
2181             } else /*if (s->mv_type == MV_TYPE_FIELD)*/ {
2182                 int i;
2183                 motion_x = s->mv[0][0][0] + s->mv[0][1][0];
2184                 motion_y = s->mv[0][0][1] + s->mv[0][1][1];
2185                 motion_x = (motion_x>>1) | (motion_x&1);
2186                 for(i=0; i<2; i++){
2187                     s->field_mv_table[mb_xy][i][0]= s->mv[0][i][0];
2188                     s->field_mv_table[mb_xy][i][1]= s->mv[0][i][1];
2189                     s->field_select_table[mb_xy][i]= s->field_select[0][i];
2190                 }
2191             }
2192             
2193             /* no update if 8X8 because it has been done during parsing */
2194             s->motion_val[xy][0] = motion_x;
2195             s->motion_val[xy][1] = motion_y;
2196             s->motion_val[xy + 1][0] = motion_x;
2197             s->motion_val[xy + 1][1] = motion_y;
2198             s->motion_val[xy + wrap][0] = motion_x;
2199             s->motion_val[xy + wrap][1] = motion_y;
2200             s->motion_val[xy + 1 + wrap][0] = motion_x;
2201             s->motion_val[xy + 1 + wrap][1] = motion_y;
2202         }
2203
2204         if(s->encoding){ //FIXME encoding MUST be cleaned up
2205             if (s->mv_type == MV_TYPE_8X8) 
2206                 s->current_picture.mb_type[mb_xy]= MB_TYPE_L0 | MB_TYPE_8x8;
2207             else
2208                 s->current_picture.mb_type[mb_xy]= MB_TYPE_L0 | MB_TYPE_16x16;
2209         }
2210     }
2211     
2212     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
2213         uint8_t *dest_y, *dest_cb, *dest_cr;
2214         int dct_linesize, dct_offset;
2215         op_pixels_func (*op_pix)[4];
2216         qpel_mc_func (*op_qpix)[16];
2217         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
2218         const int uvlinesize= s->current_picture.linesize[1];
2219
2220         /* avoid copy if macroblock skipped in last frame too */
2221         if (s->pict_type != B_TYPE) {
2222             s->current_picture.mbskip_table[mb_xy]= s->mb_skiped;
2223         }
2224
2225         /* skip only during decoding as we might trash the buffers during encoding a bit */
2226         if(!s->encoding){
2227             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
2228             const int age= s->current_picture.age;
2229
2230             assert(age);
2231
2232             if (s->mb_skiped) {
2233                 s->mb_skiped= 0;
2234                 assert(s->pict_type!=I_TYPE);
2235  
2236                 (*mbskip_ptr) ++; /* indicate that this time we skiped it */
2237                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
2238
2239                 /* if previous was skipped too, then nothing to do !  */
2240                 if (*mbskip_ptr >= age && s->current_picture.reference){
2241                     return;
2242                 }
2243             } else if(!s->current_picture.reference){
2244                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
2245                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
2246             } else{
2247                 *mbskip_ptr = 0; /* not skipped */
2248             }
2249         }else
2250             s->mb_skiped= 0;
2251
2252         if(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME){ //FIXME precalc
2253             dest_y  = s->current_picture.data[0] + mb_x * 16;
2254             dest_cb = s->current_picture.data[1] + mb_x * 8;
2255             dest_cr = s->current_picture.data[2] + mb_x * 8;
2256         }else{
2257             dest_y  = s->current_picture.data[0] + (mb_y * 16* linesize  ) + mb_x * 16;
2258             dest_cb = s->current_picture.data[1] + (mb_y * 8 * uvlinesize) + mb_x * 8;
2259             dest_cr = s->current_picture.data[2] + (mb_y * 8 * uvlinesize) + mb_x * 8;
2260         }
2261
2262         if (s->interlaced_dct) {
2263             dct_linesize = linesize * 2;
2264             dct_offset = linesize;
2265         } else {
2266             dct_linesize = linesize;
2267             dct_offset = linesize * 8;
2268         }
2269
2270         if (!s->mb_intra) {
2271             /* motion handling */
2272             /* decoding or more than one mb_type (MC was allready done otherwise) */
2273             if((!s->encoding) || (s->mb_type[mb_xy]&(s->mb_type[mb_xy]-1))){
2274                 if ((!s->no_rounding) || s->pict_type==B_TYPE){                
2275                     op_pix = s->dsp.put_pixels_tab;
2276                     op_qpix= s->dsp.put_qpel_pixels_tab;
2277                 }else{
2278                     op_pix = s->dsp.put_no_rnd_pixels_tab;
2279                     op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
2280                 }
2281
2282                 if (s->mv_dir & MV_DIR_FORWARD) {
2283                     MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
2284                     op_pix = s->dsp.avg_pixels_tab;
2285                     op_qpix= s->dsp.avg_qpel_pixels_tab;
2286                 }
2287                 if (s->mv_dir & MV_DIR_BACKWARD) {
2288                     MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
2289                 }
2290             }
2291
2292             /* skip dequant / idct if we are really late ;) */
2293             if(s->hurry_up>1) return;
2294
2295             /* add dct residue */
2296             if(s->encoding || !(   s->mpeg2 || s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO 
2297                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
2298                 add_dequant_dct(s, block[0], 0, dest_y, dct_linesize);
2299                 add_dequant_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2300                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2301                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2302
2303                 if(!(s->flags&CODEC_FLAG_GRAY)){
2304                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize);
2305                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize);
2306                 }
2307             } else if(s->codec_id != CODEC_ID_WMV2){
2308                 add_dct(s, block[0], 0, dest_y, dct_linesize);
2309                 add_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2310                 add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2311                 add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2312
2313                 if(!(s->flags&CODEC_FLAG_GRAY)){
2314                     add_dct(s, block[4], 4, dest_cb, uvlinesize);
2315                     add_dct(s, block[5], 5, dest_cr, uvlinesize);
2316                 }
2317             } 
2318 #ifdef CONFIG_RISKY
2319             else{
2320                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
2321             }
2322 #endif
2323         } else {
2324             /* dct only in intra block */
2325             if(s->encoding || !(s->mpeg2 || s->codec_id==CODEC_ID_MPEG1VIDEO)){
2326                 put_dct(s, block[0], 0, dest_y, dct_linesize);
2327                 put_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2328                 put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2329                 put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2330
2331                 if(!(s->flags&CODEC_FLAG_GRAY)){
2332                     put_dct(s, block[4], 4, dest_cb, uvlinesize);
2333                     put_dct(s, block[5], 5, dest_cr, uvlinesize);
2334                 }
2335             }else{
2336                 s->dsp.idct_put(dest_y                 , dct_linesize, block[0]);
2337                 s->dsp.idct_put(dest_y              + 8, dct_linesize, block[1]);
2338                 s->dsp.idct_put(dest_y + dct_offset    , dct_linesize, block[2]);
2339                 s->dsp.idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]);
2340
2341                 if(!(s->flags&CODEC_FLAG_GRAY)){
2342                     s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
2343                     s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
2344                 }
2345             }
2346         }
2347     }
2348 }
2349
2350 #ifdef CONFIG_ENCODERS
2351
2352 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
2353 {
2354     static const char tab[64]=
2355         {3,2,2,1,1,1,1,1,
2356          1,1,1,1,1,1,1,1,
2357          1,1,1,1,1,1,1,1,
2358          0,0,0,0,0,0,0,0,
2359          0,0,0,0,0,0,0,0,
2360          0,0,0,0,0,0,0,0,
2361          0,0,0,0,0,0,0,0,
2362          0,0,0,0,0,0,0,0};
2363     int score=0;
2364     int run=0;
2365     int i;
2366     DCTELEM *block= s->block[n];
2367     const int last_index= s->block_last_index[n];
2368     int skip_dc;
2369
2370     if(threshold<0){
2371         skip_dc=0;
2372         threshold= -threshold;
2373     }else
2374         skip_dc=1;
2375
2376     /* are all which we could set to zero are allready zero? */
2377     if(last_index<=skip_dc - 1) return;
2378
2379     for(i=0; i<=last_index; i++){
2380         const int j = s->intra_scantable.permutated[i];
2381         const int level = ABS(block[j]);
2382         if(level==1){
2383             if(skip_dc && i==0) continue;
2384             score+= tab[run];
2385             run=0;
2386         }else if(level>1){
2387             return;
2388         }else{
2389             run++;
2390         }
2391     }
2392     if(score >= threshold) return;
2393     for(i=skip_dc; i<=last_index; i++){
2394         const int j = s->intra_scantable.permutated[i];
2395         block[j]=0;
2396     }
2397     if(block[0]) s->block_last_index[n]= 0;
2398     else         s->block_last_index[n]= -1;
2399 }
2400
2401 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
2402 {
2403     int i;
2404     const int maxlevel= s->max_qcoeff;
2405     const int minlevel= s->min_qcoeff;
2406     
2407     if(s->mb_intra){
2408         i=1; //skip clipping of intra dc
2409     }else
2410         i=0;
2411     
2412     for(;i<=last_index; i++){
2413         const int j= s->intra_scantable.permutated[i];
2414         int level = block[j];
2415        
2416         if     (level>maxlevel) level=maxlevel;
2417         else if(level<minlevel) level=minlevel;
2418
2419         block[j]= level;
2420     }
2421 }
2422
2423 #if 0
2424 static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize
2425     int score=0;
2426     int x,y;
2427     
2428     for(y=0; y<7; y++){
2429         for(x=0; x<16; x+=4){
2430             score+= ABS(s[x  ] - s[x  +stride]) + ABS(s[x+1] - s[x+1+stride]) 
2431                    +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
2432         }
2433         s+= stride;
2434     }
2435     
2436     return score;
2437 }
2438
2439 static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize
2440     int score=0;
2441     int x,y;
2442     
2443     for(y=0; y<7; y++){
2444         for(x=0; x<16; x++){
2445             score+= ABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2446         }
2447         s1+= stride;
2448         s2+= stride;
2449     }
2450     
2451     return score;
2452 }
2453 #else
2454 #define SQ(a) ((a)*(a))
2455
2456 static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize
2457     int score=0;
2458     int x,y;
2459     
2460     for(y=0; y<7; y++){
2461         for(x=0; x<16; x+=4){
2462             score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride]) 
2463                    +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
2464         }
2465         s+= stride;
2466     }
2467     
2468     return score;
2469 }
2470
2471 static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize
2472     int score=0;
2473     int x,y;
2474     
2475     for(y=0; y<7; y++){
2476         for(x=0; x<16; x++){
2477             score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2478         }
2479         s1+= stride;
2480         s2+= stride;
2481     }
2482     
2483     return score;
2484 }
2485
2486 #endif
2487
2488 #endif //CONFIG_ENCODERS
2489
2490 /**
2491  *
2492  * @param h is the normal height, this will be reduced automatically if needed for the last row
2493  */
2494 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
2495     if (    s->avctx->draw_horiz_band 
2496         && (s->last_picture_ptr || s->low_delay) ) {
2497         uint8_t *src_ptr[3];
2498         int offset;
2499         h= FFMIN(h, s->height - y);
2500
2501         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME)
2502             offset = 0;
2503         else
2504             offset = y * s->linesize;
2505
2506         if(s->pict_type==B_TYPE || s->low_delay){
2507             src_ptr[0] = s->current_picture.data[0] + offset;
2508             src_ptr[1] = s->current_picture.data[1] + (offset >> 2);
2509             src_ptr[2] = s->current_picture.data[2] + (offset >> 2);
2510         } else {
2511             src_ptr[0] = s->last_picture.data[0] + offset;
2512             src_ptr[1] = s->last_picture.data[1] + (offset >> 2);
2513             src_ptr[2] = s->last_picture.data[2] + (offset >> 2);
2514         }
2515         emms_c();
2516
2517         s->avctx->draw_horiz_band(s->avctx, src_ptr, s->linesize,
2518                                y, s->width, h);
2519     }
2520 }
2521
2522 #ifdef CONFIG_ENCODERS
2523
2524 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2525 {
2526     const int mb_x= s->mb_x;
2527     const int mb_y= s->mb_y;
2528     int i;
2529     int skip_dct[6];
2530     int dct_offset   = s->linesize*8; //default for progressive frames
2531     
2532     for(i=0; i<6; i++) skip_dct[i]=0;
2533     
2534     if(s->adaptive_quant){
2535         s->dquant= s->current_picture.qscale_table[mb_x + mb_y*s->mb_stride] - s->qscale;
2536
2537         if(s->out_format==FMT_H263){
2538             if     (s->dquant> 2) s->dquant= 2;
2539             else if(s->dquant<-2) s->dquant=-2;
2540         }
2541             
2542         if(s->codec_id==CODEC_ID_MPEG4){        
2543             if(!s->mb_intra){
2544                 if(s->mv_dir&MV_DIRECT)
2545                     s->dquant=0;
2546
2547                 assert(s->dquant==0 || s->mv_type!=MV_TYPE_8X8);
2548             }
2549         }
2550         s->qscale+= s->dquant;
2551         s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
2552         s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
2553     }
2554
2555     if (s->mb_intra) {
2556         uint8_t *ptr;
2557         int wrap_y;
2558         int emu=0;
2559
2560         wrap_y = s->linesize;
2561         ptr = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
2562
2563         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
2564             ff_emulated_edge_mc(s, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2565             ptr= s->edge_emu_buffer;
2566             emu=1;
2567         }
2568         
2569         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
2570             int progressive_score, interlaced_score;
2571             
2572             progressive_score= pix_vcmp16x8(ptr, wrap_y  ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y );
2573             interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y  , wrap_y*2);
2574             
2575             if(progressive_score > interlaced_score + 100){
2576                 s->interlaced_dct=1;
2577             
2578                 dct_offset= wrap_y;
2579                 wrap_y<<=1;
2580             }else
2581                 s->interlaced_dct=0;
2582         }
2583         
2584         s->dsp.get_pixels(s->block[0], ptr                 , wrap_y);
2585         s->dsp.get_pixels(s->block[1], ptr              + 8, wrap_y);
2586         s->dsp.get_pixels(s->block[2], ptr + dct_offset    , wrap_y);
2587         s->dsp.get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y);
2588
2589         if(s->flags&CODEC_FLAG_GRAY){
2590             skip_dct[4]= 1;
2591             skip_dct[5]= 1;
2592         }else{
2593             int wrap_c = s->uvlinesize;
2594             ptr = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
2595             if(emu){
2596                 ff_emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2597                 ptr= s->edge_emu_buffer;
2598             }
2599             s->dsp.get_pixels(s->block[4], ptr, wrap_c);
2600
2601             ptr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
2602             if(emu){
2603                 ff_emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2604                 ptr= s->edge_emu_buffer;
2605             }
2606             s->dsp.get_pixels(s->block[5], ptr, wrap_c);
2607         }
2608     }else{
2609         op_pixels_func (*op_pix)[4];
2610         qpel_mc_func (*op_qpix)[16];
2611         uint8_t *dest_y, *dest_cb, *dest_cr;
2612         uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2613         int wrap_y, wrap_c;
2614         int emu=0;
2615
2616         dest_y  = s->current_picture.data[0] + (mb_y * 16 * s->linesize    ) + mb_x * 16;
2617         dest_cb = s->current_picture.data[1] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
2618         dest_cr = s->current_picture.data[2] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
2619         wrap_y = s->linesize;
2620         wrap_c = s->uvlinesize;
2621         ptr_y  = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
2622         ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
2623         ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
2624
2625         if ((!s->no_rounding) || s->pict_type==B_TYPE){
2626             op_pix = s->dsp.put_pixels_tab;
2627             op_qpix= s->dsp.put_qpel_pixels_tab;
2628         }else{
2629             op_pix = s->dsp.put_no_rnd_pixels_tab;
2630             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
2631         }
2632
2633         if (s->mv_dir & MV_DIR_FORWARD) {
2634             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
2635             op_pix = s->dsp.avg_pixels_tab;
2636             op_qpix= s->dsp.avg_qpel_pixels_tab;
2637         }
2638         if (s->mv_dir & MV_DIR_BACKWARD) {
2639             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
2640         }
2641
2642         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
2643             ff_emulated_edge_mc(s, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2644             ptr_y= s->edge_emu_buffer;
2645             emu=1;
2646         }
2647         
2648         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
2649             int progressive_score, interlaced_score;
2650             
2651             progressive_score= pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y  ) 
2652                              + pix_diff_vcmp16x8(ptr_y + wrap_y*8, dest_y + wrap_y*8, wrap_y  );
2653             interlaced_score = pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y*2)
2654                              + pix_diff_vcmp16x8(ptr_y + wrap_y  , dest_y + wrap_y  , wrap_y*2);
2655             
2656             if(progressive_score > interlaced_score + 600){
2657                 s->interlaced_dct=1;
2658             
2659                 dct_offset= wrap_y;
2660                 wrap_y<<=1;
2661             }else
2662                 s->interlaced_dct=0;
2663         }
2664         
2665         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
2666         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
2667         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
2668         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
2669         
2670         if(s->flags&CODEC_FLAG_GRAY){
2671             skip_dct[4]= 1;
2672             skip_dct[5]= 1;
2673         }else{
2674             if(emu){
2675                 ff_emulated_edge_mc(s, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2676                 ptr_cb= s->edge_emu_buffer;
2677             }
2678             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2679             if(emu){
2680                 ff_emulated_edge_mc(s, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2681                 ptr_cr= s->edge_emu_buffer;
2682             }
2683             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2684         }
2685         /* pre quantization */         
2686         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
2687             //FIXME optimize
2688             if(s->dsp.pix_abs8x8(ptr_y               , dest_y               , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
2689             if(s->dsp.pix_abs8x8(ptr_y            + 8, dest_y            + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
2690             if(s->dsp.pix_abs8x8(ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
2691             if(s->dsp.pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
2692             if(s->dsp.pix_abs8x8(ptr_cb              , dest_cb              , wrap_c) < 20*s->qscale) skip_dct[4]= 1;
2693             if(s->dsp.pix_abs8x8(ptr_cr              , dest_cr              , wrap_c) < 20*s->qscale) skip_dct[5]= 1;
2694 #if 0
2695 {
2696  static int stat[7];
2697  int num=0;
2698  for(i=0; i<6; i++)
2699   if(skip_dct[i]) num++;
2700  stat[num]++;
2701  
2702  if(s->mb_x==0 && s->mb_y==0){
2703   for(i=0; i<7; i++){
2704    printf("%6d %1d\n", stat[i], i);
2705   }
2706  }
2707 }
2708 #endif
2709         }
2710
2711     }
2712             
2713 #if 0
2714             {
2715                 float adap_parm;
2716                 
2717                 adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_stride*mb_y+mb_x] + 1.0) /
2718                             ((s->mb_var[s->mb_stride*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0);
2719             
2720                 printf("\ntype=%c qscale=%2d adap=%0.2f dquant=%4.2f var=%4d avgvar=%4d", 
2721                         (s->mb_type[s->mb_stride*mb_y+mb_x] > 0) ? 'I' : 'P', 
2722                         s->qscale, adap_parm, s->qscale*adap_parm,
2723                         s->mb_var[s->mb_stride*mb_y+mb_x], s->avg_mb_var);
2724             }
2725 #endif
2726     /* DCT & quantize */
2727     if(s->out_format==FMT_MJPEG){
2728         for(i=0;i<6;i++) {
2729             int overflow;
2730             s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, 8, &overflow);
2731             if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
2732         }
2733     }else{
2734         for(i=0;i<6;i++) {
2735             if(!skip_dct[i]){
2736                 int overflow;
2737                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2738             // FIXME we could decide to change to quantizer instead of clipping
2739             // JS: I don't think that would be a good idea it could lower quality instead
2740             //     of improve it. Just INTRADC clipping deserves changes in quantizer
2741                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
2742             }else
2743                 s->block_last_index[i]= -1;
2744         }
2745         if(s->luma_elim_threshold && !s->mb_intra)
2746             for(i=0; i<4; i++)
2747                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2748         if(s->chroma_elim_threshold && !s->mb_intra)
2749             for(i=4; i<6; i++)
2750                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2751     }
2752
2753     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
2754         s->block_last_index[4]=
2755         s->block_last_index[5]= 0;
2756         s->block[4][0]=
2757         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
2758     }
2759
2760     /* huffman encode */
2761     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2762     case CODEC_ID_MPEG1VIDEO:
2763         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
2764 #ifdef CONFIG_RISKY
2765     case CODEC_ID_MPEG4:
2766         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
2767     case CODEC_ID_MSMPEG4V2:
2768     case CODEC_ID_MSMPEG4V3:
2769     case CODEC_ID_WMV1:
2770         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
2771     case CODEC_ID_WMV2:
2772          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
2773     case CODEC_ID_H263:
2774     case CODEC_ID_H263P:
2775     case CODEC_ID_RV10:
2776         h263_encode_mb(s, s->block, motion_x, motion_y); break;
2777 #endif
2778     case CODEC_ID_MJPEG:
2779         mjpeg_encode_mb(s, s->block); break;
2780     default:
2781         assert(0);
2782     }
2783 }
2784
2785 #endif //CONFIG_ENCODERS
2786
2787 /**
2788  * combines the (truncated) bitstream to a complete frame
2789  * @returns -1 if no complete frame could be created
2790  */
2791 int ff_combine_frame( MpegEncContext *s, int next, uint8_t **buf, int *buf_size){
2792     ParseContext *pc= &s->parse_context;
2793
2794 #if 0
2795     if(pc->overread){
2796         printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
2797         printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
2798     }
2799 #endif
2800
2801     /* copy overreaded byes from last frame into buffer */
2802     for(; pc->overread>0; pc->overread--){
2803         pc->buffer[pc->index++]= pc->buffer[pc->overread_index++];
2804     }
2805     
2806     pc->last_index= pc->index;
2807
2808     /* copy into buffer end return */
2809     if(next == END_NOT_FOUND){
2810         pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, (*buf_size) + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
2811
2812         memcpy(&pc->buffer[pc->index], *buf, *buf_size);
2813         pc->index += *buf_size;
2814         return -1;
2815     }
2816     
2817     pc->overread_index= pc->index + next;
2818     
2819     /* append to buffer */
2820     if(pc->index){
2821         pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, next + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
2822
2823         memcpy(&pc->buffer[pc->index], *buf, next + FF_INPUT_BUFFER_PADDING_SIZE );
2824         pc->index = 0;
2825         *buf= pc->buffer;
2826         *buf_size= pc->last_index + next;
2827     }
2828
2829     /* store overread bytes */
2830     for(;next < 0; next++){
2831         pc->state = (pc->state<<8) | pc->buffer[pc->last_index + next];
2832         pc->overread++;
2833     }
2834
2835 #if 0
2836     if(pc->overread){
2837         printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
2838         printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
2839     }
2840 #endif
2841
2842     return 0;
2843 }
2844
2845 #ifdef CONFIG_ENCODERS
2846 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
2847 {
2848     int bytes= length>>4;
2849     int bits= length&15;
2850     int i;
2851
2852     if(length==0) return;
2853
2854     for(i=0; i<bytes; i++) put_bits(pb, 16, be2me_16(((uint16_t*)src)[i]));
2855     put_bits(pb, bits, be2me_16(((uint16_t*)src)[i])>>(16-bits));
2856 }
2857
2858 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2859     int i;
2860
2861     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
2862
2863     /* mpeg1 */
2864     d->mb_skip_run= s->mb_skip_run;
2865     for(i=0; i<3; i++)
2866         d->last_dc[i]= s->last_dc[i];
2867     
2868     /* statistics */
2869     d->mv_bits= s->mv_bits;
2870     d->i_tex_bits= s->i_tex_bits;
2871     d->p_tex_bits= s->p_tex_bits;
2872     d->i_count= s->i_count;
2873     d->f_count= s->f_count;
2874     d->b_count= s->b_count;
2875     d->skip_count= s->skip_count;
2876     d->misc_bits= s->misc_bits;
2877     d->last_bits= 0;
2878
2879     d->mb_skiped= s->mb_skiped;
2880     d->qscale= s->qscale;
2881 }
2882
2883 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2884     int i;
2885
2886     memcpy(d->mv, s->mv, 2*4*2*sizeof(int)); 
2887     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
2888     
2889     /* mpeg1 */
2890     d->mb_skip_run= s->mb_skip_run;
2891     for(i=0; i<3; i++)
2892         d->last_dc[i]= s->last_dc[i];
2893     
2894     /* statistics */
2895     d->mv_bits= s->mv_bits;
2896     d->i_tex_bits= s->i_tex_bits;
2897     d->p_tex_bits= s->p_tex_bits;
2898     d->i_count= s->i_count;
2899     d->f_count= s->f_count;
2900     d->b_count= s->b_count;
2901     d->skip_count= s->skip_count;
2902     d->misc_bits= s->misc_bits;
2903
2904     d->mb_intra= s->mb_intra;
2905     d->mb_skiped= s->mb_skiped;
2906     d->mv_type= s->mv_type;
2907     d->mv_dir= s->mv_dir;
2908     d->pb= s->pb;
2909     if(s->data_partitioning){
2910         d->pb2= s->pb2;
2911         d->tex_pb= s->tex_pb;
2912     }
2913     d->block= s->block;
2914     for(i=0; i<6; i++)
2915         d->block_last_index[i]= s->block_last_index[i];
2916     d->interlaced_dct= s->interlaced_dct;
2917     d->qscale= s->qscale;
2918 }
2919
2920 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type, 
2921                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2922                            int *dmin, int *next_block, int motion_x, int motion_y)
2923 {
2924     int bits_count;
2925     
2926     copy_context_before_encode(s, backup, type);
2927
2928     s->block= s->blocks[*next_block];
2929     s->pb= pb[*next_block];
2930     if(s->data_partitioning){
2931         s->pb2   = pb2   [*next_block];
2932         s->tex_pb= tex_pb[*next_block];
2933     }
2934
2935     encode_mb(s, motion_x, motion_y);
2936
2937     bits_count= get_bit_count(&s->pb);
2938     if(s->data_partitioning){
2939         bits_count+= get_bit_count(&s->pb2);
2940         bits_count+= get_bit_count(&s->tex_pb);
2941     }
2942
2943     if(bits_count<*dmin){
2944         *dmin= bits_count;
2945         *next_block^=1;
2946
2947         copy_context_after_encode(best, s, type);
2948     }
2949 }
2950                 
2951 static inline int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2952     uint32_t *sq = squareTbl + 256;
2953     int acc=0;
2954     int x,y;
2955     
2956     if(w==16 && h==16) 
2957         return s->dsp.sse[0](NULL, src1, src2, stride);
2958     else if(w==8 && h==8)
2959         return s->dsp.sse[1](NULL, src1, src2, stride);
2960     
2961     for(y=0; y<h; y++){
2962         for(x=0; x<w; x++){
2963             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2964         } 
2965     }
2966     
2967     assert(acc>=0);
2968     
2969     return acc;
2970 }
2971
2972 static void encode_picture(MpegEncContext *s, int picture_number)
2973 {
2974     int mb_x, mb_y, pdif = 0;
2975     int i;
2976     int bits;
2977     MpegEncContext best_s, backup_s;
2978     uint8_t bit_buf[2][3000];
2979     uint8_t bit_buf2[2][3000];
2980     uint8_t bit_buf_tex[2][3000];
2981     PutBitContext pb[2], pb2[2], tex_pb[2];
2982
2983     for(i=0; i<2; i++){
2984         init_put_bits(&pb    [i], bit_buf    [i], 3000, NULL, NULL);
2985         init_put_bits(&pb2   [i], bit_buf2   [i], 3000, NULL, NULL);
2986         init_put_bits(&tex_pb[i], bit_buf_tex[i], 3000, NULL, NULL);
2987     }
2988
2989     s->picture_number = picture_number;
2990     
2991     /* Reset the average MB variance */
2992     s->current_picture.mb_var_sum = 0;
2993     s->current_picture.mc_mb_var_sum = 0;
2994
2995 #ifdef CONFIG_RISKY
2996     /* we need to initialize some time vars before we can encode b-frames */
2997     // RAL: Condition added for MPEG1VIDEO
2998     if (s->codec_id == CODEC_ID_MPEG1VIDEO || (s->h263_pred && !s->h263_msmpeg4))
2999         ff_set_mpeg4_time(s, s->picture_number); 
3000 #endif
3001         
3002     s->scene_change_score=0;
3003     
3004     s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration
3005     
3006     if(s->pict_type==I_TYPE){
3007         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3008         else                        s->no_rounding=0;
3009     }else if(s->pict_type!=B_TYPE){
3010         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3011             s->no_rounding ^= 1;          
3012     }
3013     
3014     /* Estimate motion for every MB */
3015     s->mb_intra=0; //for the rate distoration & bit compare functions
3016     if(s->pict_type != I_TYPE){
3017         if(s->pict_type != B_TYPE){
3018             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
3019                 s->me.pre_pass=1;
3020                 s->me.dia_size= s->avctx->pre_dia_size;
3021
3022                 for(mb_y=s->mb_height-1; mb_y >=0 ; mb_y--) {
3023                     for(mb_x=s->mb_width-1; mb_x >=0 ; mb_x--) {
3024                         s->mb_x = mb_x;
3025                         s->mb_y = mb_y;
3026                         ff_pre_estimate_p_frame_motion(s, mb_x, mb_y);
3027                     }
3028                 }
3029                 s->me.pre_pass=0;
3030             }
3031         }
3032
3033         s->me.dia_size= s->avctx->dia_size;
3034         for(mb_y=0; mb_y < s->mb_height; mb_y++) {
3035             s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
3036             s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
3037             s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1;
3038             s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2);
3039             for(mb_x=0; mb_x < s->mb_width; mb_x++) {
3040                 s->mb_x = mb_x;
3041                 s->mb_y = mb_y;
3042                 s->block_index[0]+=2;
3043                 s->block_index[1]+=2;
3044                 s->block_index[2]+=2;
3045                 s->block_index[3]+=2;
3046                 
3047                 /* compute motion vector & mb_type and store in context */
3048                 if(s->pict_type==B_TYPE)
3049                     ff_estimate_b_frame_motion(s, mb_x, mb_y);
3050                 else
3051                     ff_estimate_p_frame_motion(s, mb_x, mb_y);
3052             }
3053         }
3054     }else /* if(s->pict_type == I_TYPE) */{
3055         /* I-Frame */
3056         //FIXME do we need to zero them?
3057         memset(s->motion_val[0], 0, sizeof(int16_t)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
3058         memset(s->p_mv_table   , 0, sizeof(int16_t)*(s->mb_stride)*s->mb_height*2);
3059         memset(s->mb_type      , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
3060         
3061         if(!s->fixed_qscale){
3062             /* finding spatial complexity for I-frame rate control */
3063             for(mb_y=0; mb_y < s->mb_height; mb_y++) {
3064                 for(mb_x=0; mb_x < s->mb_width; mb_x++) {
3065                     int xx = mb_x * 16;
3066                     int yy = mb_y * 16;
3067                     uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
3068                     int varc;
3069                     int sum = s->dsp.pix_sum(pix, s->linesize);
3070     
3071                     varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
3072
3073                     s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
3074                     s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
3075                     s->current_picture.mb_var_sum    += varc;
3076                 }
3077             }
3078         }
3079     }
3080     emms_c();
3081
3082     if(s->scene_change_score > 0 && s->pict_type == P_TYPE){
3083         s->pict_type= I_TYPE;
3084         memset(s->mb_type   , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
3085 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3086     }
3087
3088     if(!s->umvplus){
3089         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
3090             s->f_code= ff_get_best_fcode(s, s->p_mv_table, MB_TYPE_INTER);
3091         
3092             ff_fix_long_p_mvs(s);
3093         }
3094
3095         if(s->pict_type==B_TYPE){
3096             int a, b;
3097
3098             a = ff_get_best_fcode(s, s->b_forw_mv_table, MB_TYPE_FORWARD);
3099             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, MB_TYPE_BIDIR);
3100             s->f_code = FFMAX(a, b);
3101
3102             a = ff_get_best_fcode(s, s->b_back_mv_table, MB_TYPE_BACKWARD);
3103             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, MB_TYPE_BIDIR);
3104             s->b_code = FFMAX(a, b);
3105
3106             ff_fix_long_b_mvs(s, s->b_forw_mv_table, s->f_code, MB_TYPE_FORWARD);
3107             ff_fix_long_b_mvs(s, s->b_back_mv_table, s->b_code, MB_TYPE_BACKWARD);
3108             ff_fix_long_b_mvs(s, s->b_bidir_forw_mv_table, s->f_code, MB_TYPE_BIDIR);
3109             ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR);
3110         }
3111     }
3112     
3113     if (s->fixed_qscale) 
3114         s->frame_qscale = s->current_picture.quality;
3115     else
3116         s->frame_qscale = ff_rate_estimate_qscale(s);
3117
3118     if(s->adaptive_quant){
3119 #ifdef CONFIG_RISKY
3120         switch(s->codec_id){
3121         case CODEC_ID_MPEG4:
3122             ff_clean_mpeg4_qscales(s);
3123             break;
3124         case CODEC_ID_H263:
3125         case CODEC_ID_H263P:
3126             ff_clean_h263_qscales(s);
3127             break;
3128         }
3129 #endif
3130
3131         s->qscale= s->current_picture.qscale_table[0];
3132     }else
3133         s->qscale= (int)(s->frame_qscale + 0.5);
3134         
3135     if (s->out_format == FMT_MJPEG) {
3136         /* for mjpeg, we do include qscale in the matrix */
3137         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
3138         for(i=1;i<64;i++){
3139             int j= s->dsp.idct_permutation[i];
3140
3141             s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3142         }
3143         convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, 
3144                        s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias, 8, 8);
3145     }
3146     
3147     //FIXME var duplication
3148     s->current_picture.key_frame= s->pict_type == I_TYPE;
3149     s->current_picture.pict_type= s->pict_type;
3150
3151     if(s->current_picture.key_frame)
3152         s->picture_in_gop_number=0;
3153
3154     s->last_bits= get_bit_count(&s->pb);
3155     switch(s->out_format) {
3156     case FMT_MJPEG:
3157         mjpeg_picture_header(s);
3158         break;
3159 #ifdef CONFIG_RISKY
3160     case FMT_H263:
3161         if (s->codec_id == CODEC_ID_WMV2) 
3162             ff_wmv2_encode_picture_header(s, picture_number);
3163         else if (s->h263_msmpeg4) 
3164             msmpeg4_encode_picture_header(s, picture_number);
3165         else if (s->h263_pred)
3166             mpeg4_encode_picture_header(s, picture_number);
3167         else if (s->h263_rv10) 
3168             rv10_encode_picture_header(s, picture_number);
3169         else
3170             h263_encode_picture_header(s, picture_number);
3171         break;
3172 #endif
3173     case FMT_MPEG1:
3174         mpeg1_encode_picture_header(s, picture_number);
3175         break;
3176     }
3177     bits= get_bit_count(&s->pb);
3178     s->header_bits= bits - s->last_bits;
3179     s->last_bits= bits;
3180     s->mv_bits=0;
3181     s->misc_bits=0;
3182     s->i_tex_bits=0;
3183     s->p_tex_bits=0;
3184     s->i_count=0;
3185     s->f_count=0;
3186     s->b_count=0;
3187     s->skip_count=0;
3188
3189     for(i=0; i<3; i++){
3190         /* init last dc values */
3191         /* note: quant matrix value (8) is implied here */
3192         s->last_dc[i] = 128;
3193         
3194         s->current_picture_ptr->error[i] = 0;
3195     }
3196     s->mb_skip_run = 0;
3197     s->last_mv[0][0][0] = 0;
3198     s->last_mv[0][0][1] = 0;
3199     s->last_mv[1][0][0] = 0;
3200     s->last_mv[1][0][1] = 0;
3201      
3202     s->last_mv_dir = 0;
3203
3204 #ifdef CONFIG_RISKY
3205     if (s->codec_id==CODEC_ID_H263 || s->codec_id==CODEC_ID_H263P)
3206         s->gob_index = ff_h263_get_gob_height(s);
3207
3208     if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame)
3209         ff_mpeg4_init_partitions(s);
3210 #endif
3211
3212     s->resync_mb_x=0;
3213     s->resync_mb_y=0;
3214     s->first_slice_line = 1;
3215     s->ptr_lastgob = s->pb.buf;
3216     for(mb_y=0; mb_y < s->mb_height; mb_y++) {
3217         s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
3218         s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
3219         
3220         s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
3221         s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
3222         s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1;
3223         s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2);
3224         s->block_index[4]= s->block_wrap[4]*(mb_y + 1)                    + s->block_wrap[0]*(s->mb_height*2 + 2);
3225         s->block_index[5]= s->block_wrap[4]*(mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2);
3226         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
3227             const int xy= mb_y*s->mb_stride + mb_x;
3228             int mb_type= s->mb_type[xy];
3229 //            int d;
3230             int dmin=10000000;
3231
3232             s->mb_x = mb_x;
3233             s->mb_y = mb_y;
3234             s->block_index[0]+=2;
3235             s->block_index[1]+=2;
3236             s->block_index[2]+=2;
3237             s->block_index[3]+=2;
3238             s->block_index[4]++;
3239             s->block_index[5]++;
3240
3241             /* write gob / video packet header  */
3242 #ifdef CONFIG_RISKY
3243             if(s->rtp_mode){
3244                 int current_packet_size, is_gob_start;
3245                 
3246                 current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
3247                 is_gob_start=0;
3248                 
3249                 if(s->codec_id==CODEC_ID_MPEG4){
3250                     if(current_packet_size >= s->rtp_payload_size
3251                        && s->mb_y + s->mb_x>0){
3252
3253                         if(s->partitioned_frame){
3254                             ff_mpeg4_merge_partitions(s);
3255                             ff_mpeg4_init_partitions(s);
3256                         }
3257                         ff_mpeg4_encode_video_packet_header(s);
3258
3259                         if(s->flags&CODEC_FLAG_PASS1){
3260                             int bits= get_bit_count(&s->pb);
3261                             s->misc_bits+= bits - s->last_bits;
3262                             s->last_bits= bits;
3263                         }
3264                         ff_mpeg4_clean_buffers(s);
3265                         is_gob_start=1;
3266                     }
3267                 }else if(s->codec_id==CODEC_ID_MPEG1VIDEO){
3268                     if(   current_packet_size >= s->rtp_payload_size 
3269                        && s->mb_y + s->mb_x>0 && s->mb_skip_run==0){
3270                         ff_mpeg1_encode_slice_header(s);
3271                         ff_mpeg1_clean_buffers(s);
3272                         is_gob_start=1;
3273                     }
3274                 }else{
3275                     if(current_packet_size >= s->rtp_payload_size
3276                        && s->mb_x==0 && s->mb_y>0 && s->mb_y%s->gob_index==0){
3277                        
3278                         h263_encode_gob_header(s, mb_y);                       
3279                         is_gob_start=1;
3280                     }
3281                 }
3282
3283                 if(is_gob_start){
3284                     s->ptr_lastgob = pbBufPtr(&s->pb);
3285                     s->first_slice_line=1;
3286                     s->resync_mb_x=mb_x;
3287                     s->resync_mb_y=mb_y;
3288                 }
3289             }
3290 #endif
3291
3292             if(  (s->resync_mb_x   == s->mb_x)
3293                && s->resync_mb_y+1 == s->mb_y){
3294                 s->first_slice_line=0; 
3295             }
3296
3297             if(mb_type & (mb_type-1)){ // more than 1 MB type possible
3298                 int next_block=0;
3299                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
3300
3301                 copy_context_before_encode(&backup_s, s, -1);
3302                 backup_s.pb= s->pb;
3303                 best_s.data_partitioning= s->data_partitioning;
3304                 best_s.partitioned_frame= s->partitioned_frame;
3305                 if(s->data_partitioning){
3306                     backup_s.pb2= s->pb2;
3307                     backup_s.tex_pb= s->tex_pb;
3308                 }
3309
3310                 if(mb_type&MB_TYPE_INTER){
3311                     s->mv_dir = MV_DIR_FORWARD;
3312                     s->mv_type = MV_TYPE_16X16;
3313                     s->mb_intra= 0;
3314                     s->mv[0][0][0] = s->p_mv_table[xy][0];
3315                     s->mv[0][0][1] = s->p_mv_table[xy][1];
3316                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER, pb, pb2, tex_pb, 
3317                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3318                 }
3319                 if(mb_type&MB_TYPE_INTER4V){                 
3320                     s->mv_dir = MV_DIR_FORWARD;
3321                     s->mv_type = MV_TYPE_8X8;
3322                     s->mb_intra= 0;
3323                     for(i=0; i<4; i++){
3324                         s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
3325                         s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
3326                     }
3327                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER4V, pb, pb2, tex_pb, 
3328                                  &dmin, &next_block, 0, 0);
3329                 }
3330                 if(mb_type&MB_TYPE_FORWARD){
3331                     s->mv_dir = MV_DIR_FORWARD;
3332                     s->mv_type = MV_TYPE_16X16;
3333                     s->mb_intra= 0;
3334                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3335                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3336                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_FORWARD, pb, pb2, tex_pb, 
3337                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3338                 }
3339                 if(mb_type&MB_TYPE_BACKWARD){
3340                     s->mv_dir = MV_DIR_BACKWARD;
3341                     s->mv_type = MV_TYPE_16X16;
3342                     s->mb_intra= 0;
3343                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3344                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3345                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BACKWARD, pb, pb2, tex_pb, 
3346                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3347                 }
3348                 if(mb_type&MB_TYPE_BIDIR){
3349                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3350                     s->mv_type = MV_TYPE_16X16;
3351                     s->mb_intra= 0;
3352                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3353                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3354                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3355                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3356                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BIDIR, pb, pb2, tex_pb, 
3357                                  &dmin, &next_block, 0, 0);
3358                 }
3359                 if(mb_type&MB_TYPE_DIRECT){
3360                     int mx= s->b_direct_mv_table[xy][0];
3361                     int my= s->b_direct_mv_table[xy][1];
3362                     
3363                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3364                     s->mb_intra= 0;
3365 #ifdef CONFIG_RISKY
3366                     ff_mpeg4_set_direct_mv(s, mx, my);
3367 #endif
3368                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb, 
3369                                  &dmin, &next_block, mx, my);
3370                 }
3371                 if(mb_type&MB_TYPE_INTRA){
3372                     s->mv_dir = 0;
3373                     s->mv_type = MV_TYPE_16X16;
3374                     s->mb_intra= 1;
3375                     s->mv[0][0][0] = 0;
3376                     s->mv[0][0][1] = 0;
3377                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTRA, pb, pb2, tex_pb, 
3378                                  &dmin, &next_block, 0, 0);
3379                     /* force cleaning of ac/dc pred stuff if needed ... */
3380                     if(s->h263_pred || s->h263_aic)
3381                         s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3382                 }
3383                 copy_context_after_encode(s, &best_s, -1);
3384                 
3385                 pb_bits_count= get_bit_count(&s->pb);
3386                 flush_put_bits(&s->pb);
3387                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3388                 s->pb= backup_s.pb;
3389                 
3390                 if(s->data_partitioning){
3391                     pb2_bits_count= get_bit_count(&s->pb2);
3392                     flush_put_bits(&s->pb2);
3393                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3394                     s->pb2= backup_s.pb2;
3395                     
3396                     tex_pb_bits_count= get_bit_count(&s->tex_pb);
3397                     flush_put_bits(&s->tex_pb);
3398                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3399                     s->tex_pb= backup_s.tex_pb;
3400                 }
3401                 s->last_bits= get_bit_count(&s->pb);
3402             } else {
3403                 int motion_x, motion_y;
3404                 int intra_score;
3405                 int inter_score= s->current_picture.mb_cmp_score[mb_x + mb_y*s->mb_stride];
3406                 
3407               if(!(s->flags&CODEC_FLAG_HQ) && s->pict_type==P_TYPE){
3408                 /* get luma score */
3409                 if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
3410                     intra_score= (s->current_picture.mb_var[mb_x + mb_y*s->mb_stride]<<8) - 500; //FIXME dont scale it down so we dont have to fix it
3411                 }else{
3412                     uint8_t *dest_y;
3413
3414                     int mean= s->current_picture.mb_mean[mb_x + mb_y*s->mb_stride]; //FIXME
3415                     mean*= 0x01010101;
3416                     
3417                     dest_y  = s->new_picture.data[0] + (mb_y * 16 * s->linesize    ) + mb_x * 16;
3418                 
3419                     for(i=0; i<16; i++){
3420                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 0]) = mean;
3421                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 4]) = mean;
3422                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 8]) = mean;
3423                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+12]) = mean;
3424                     }
3425
3426                     s->mb_intra=1;
3427                     intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, dest_y, s->linesize);
3428                                         
3429 /*                    printf("intra:%7d inter:%7d var:%7d mc_var.%7d\n", intra_score>>8, inter_score>>8, 
3430                         s->current_picture.mb_var[mb_x + mb_y*s->mb_stride],
3431                         s->current_picture.mc_mb_var[mb_x + mb_y*s->mb_stride]);*/
3432                 }
3433                 
3434                 /* get chroma score */
3435                 if(s->avctx->mb_cmp&FF_CMP_CHROMA){
3436                     int i;
3437                     
3438                     s->mb_intra=1;
3439                     for(i=1; i<3; i++){
3440                         uint8_t *dest_c;
3441                         int mean;
3442                         
3443                         if(s->out_format == FMT_H263){
3444                             mean= (s->dc_val[i][mb_x + (mb_y+1)*(s->mb_width+2)] + 4)>>3; //FIXME not exact but simple ;)
3445                         }else{
3446                             mean= (s->last_dc[i] + 4)>>3;
3447                         }
3448                         dest_c = s->new_picture.data[i] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
3449                         
3450                         mean*= 0x01010101;
3451                         for(i=0; i<8; i++){
3452                             *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 0]) = mean;
3453                             *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 4]) = mean;
3454                         }
3455                         
3456                         intra_score+= s->dsp.mb_cmp[1](s, s->me.scratchpad, dest_c, s->uvlinesize);
3457                     }                
3458                 }
3459
3460                 /* bias */
3461                 switch(s->avctx->mb_cmp&0xFF){
3462                 default:
3463                 case FF_CMP_SAD:
3464                     intra_score+= 32*s->qscale;
3465                     break;
3466                 case FF_CMP_SSE:
3467                     intra_score+= 24*s->qscale*s->qscale;
3468                     break;
3469                 case FF_CMP_SATD:
3470                     intra_score+= 96*s->qscale;
3471                     break;
3472                 case FF_CMP_DCT:
3473                     intra_score+= 48*s->qscale;
3474                     break;
3475                 case FF_CMP_BIT:
3476                     intra_score+= 16;
3477                     break;
3478                 case FF_CMP_PSNR:
3479                 case FF_CMP_RD:
3480                     intra_score+= (s->qscale*s->qscale*109*8 + 64)>>7;
3481                     break;
3482                 }
3483
3484                 if(intra_score < inter_score)
3485                     mb_type= MB_TYPE_INTRA;
3486               }  
3487                 
3488                 s->mv_type=MV_TYPE_16X16;
3489                 // only one MB-Type possible
3490                 
3491                 switch(mb_type){
3492                 case MB_TYPE_INTRA:
3493                     s->mv_dir = 0;
3494                     s->mb_intra= 1;
3495                     motion_x= s->mv[0][0][0] = 0;
3496                     motion_y= s->mv[0][0][1] = 0;
3497                     break;
3498                 case MB_TYPE_INTER:
3499                     s->mv_dir = MV_DIR_FORWARD;
3500                     s->mb_intra= 0;
3501                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3502                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3503                     break;
3504                 case MB_TYPE_INTER4V:
3505                     s->mv_dir = MV_DIR_FORWARD;
3506                     s->mv_type = MV_TYPE_8X8;
3507                     s->mb_intra= 0;
3508                     for(i=0; i<4; i++){
3509                         s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
3510                         s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
3511                     }
3512                     motion_x= motion_y= 0;
3513                     break;
3514                 case MB_TYPE_DIRECT:
3515                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3516                     s->mb_intra= 0;
3517                     motion_x=s->b_direct_mv_table[xy][0];
3518                     motion_y=s->b_direct_mv_table[xy][1];
3519 #ifdef CONFIG_RISKY
3520                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3521 #endif
3522                     break;
3523                 case MB_TYPE_BIDIR:
3524                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3525                     s->mb_intra= 0;
3526                     motion_x=0;
3527                     motion_y=0;
3528                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3529                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3530                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3531                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3532                     break;
3533                 case MB_TYPE_BACKWARD:
3534                     s->mv_dir = MV_DIR_BACKWARD;
3535                     s->mb_intra= 0;
3536                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3537                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3538                     break;
3539                 case MB_TYPE_FORWARD:
3540                     s->mv_dir = MV_DIR_FORWARD;
3541                     s->mb_intra= 0;
3542                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3543                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3544 //                    printf(" %d %d ", motion_x, motion_y);
3545                     break;
3546                 default:
3547                     motion_x=motion_y=0; //gcc warning fix
3548                     printf("illegal MB type\n");
3549                 }
3550
3551                 encode_mb(s, motion_x, motion_y);
3552
3553                 // RAL: Update last macrobloc type
3554                 s->last_mv_dir = s->mv_dir;
3555             }
3556
3557             /* clean the MV table in IPS frames for direct mode in B frames */
3558             if(s->mb_intra /* && I,P,S_TYPE */){
3559                 s->p_mv_table[xy][0]=0;
3560                 s->p_mv_table[xy][1]=0;
3561             }
3562
3563             MPV_decode_mb(s, s->block);
3564             
3565             if(s->flags&CODEC_FLAG_PSNR){
3566                 int w= 16;
3567                 int h= 16;
3568
3569                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3570                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3571
3572                 s->current_picture_ptr->error[0] += sse(
3573                     s,
3574                     s->new_picture    .data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3575                     s->current_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3576                     w, h, s->linesize);
3577                 s->current_picture_ptr->error[1] += sse(
3578                     s,
3579                     s->new_picture    .data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3580                     s->current_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3581                     w>>1, h>>1, s->uvlinesize);
3582                 s->current_picture_ptr->error[2] += sse(
3583                     s,
3584                     s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3585                     s->current_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3586                     w>>1, h>>1, s->uvlinesize);
3587             }
3588 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, get_bit_count(&s->pb));
3589         }
3590     }
3591     emms_c();
3592
3593 #ifdef CONFIG_RISKY
3594     if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame)
3595         ff_mpeg4_merge_partitions(s);
3596
3597     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
3598         msmpeg4_encode_ext_header(s);
3599
3600     if(s->codec_id==CODEC_ID_MPEG4) 
3601         ff_mpeg4_stuffing(&s->pb);
3602 #endif
3603
3604     //if (s->gob_number)
3605     //    fprintf(stderr,"\nNumber of GOB: %d", s->gob_number);
3606     
3607     /* Send the last GOB if RTP */    
3608     if (s->rtp_mode) {
3609         flush_put_bits(&s->pb);
3610         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
3611         /* Call the RTP callback to send the last GOB */
3612         if (s->rtp_callback)
3613             s->rtp_callback(s->ptr_lastgob, pdif, s->gob_number);
3614         s->ptr_lastgob = pbBufPtr(&s->pb);
3615         //fprintf(stderr,"\nGOB: %2d size: %d (last)", s->gob_number, pdif);
3616     }
3617 }
3618
3619 static int dct_quantize_trellis_c(MpegEncContext *s, 
3620                         DCTELEM *block, int n,
3621                         int qscale, int *overflow){
3622     const int *qmat;
3623     const uint8_t *scantable= s->intra_scantable.scantable;
3624     int max=0;
3625     unsigned int threshold1, threshold2;
3626     int bias=0;
3627     int run_tab[65];
3628     int level_tab[65];
3629     int score_tab[65];
3630     int last_run=0;
3631     int last_level=0;
3632     int last_score= 0;
3633     int last_i= 0;
3634     int coeff[3][64];
3635     int coeff_count[64];
3636     int lambda, qmul, qadd, start_i, last_non_zero, i;
3637     const int esc_length= s->ac_esc_length;
3638     uint8_t * length;
3639     uint8_t * last_length;
3640     int score_limit=0;
3641     int left_limit= 0;
3642         
3643     s->dsp.fdct (block);
3644
3645     qmul= qscale*16;
3646     qadd= ((qscale-1)|1)*8;
3647
3648     if (s->mb_intra) {
3649         int q;
3650         if (!s->h263_aic) {
3651             if (n < 4)
3652                 q = s->y_dc_scale;
3653             else
3654                 q = s->c_dc_scale;
3655             q = q << 3;
3656         } else{
3657             /* For AIC we skip quant/dequant of INTRADC */
3658             q = 1 << 3;
3659             qadd=0;
3660         }
3661             
3662         /* note: block[0] is assumed to be positive */
3663         block[0] = (block[0] + (q >> 1)) / q;
3664         start_i = 1;
3665         last_non_zero = 0;
3666         qmat = s->q_intra_matrix[qscale];
3667         if(s->mpeg_quant || s->codec_id== CODEC_ID_MPEG1VIDEO)
3668             bias= 1<<(QMAT_SHIFT-1);
3669         length     = s->intra_ac_vlc_length;
3670         last_length= s->intra_ac_vlc_last_length;
3671     } else {
3672         start_i = 0;
3673         last_non_zero = -1;
3674         qmat = s->q_inter_matrix[qscale];
3675         length     = s->inter_ac_vlc_length;
3676         last_length= s->inter_ac_vlc_last_length;
3677     }
3678
3679     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3680     threshold2= (threshold1<<1);
3681
3682     for(i=start_i; i<64; i++) {
3683         const int j = scantable[i];
3684         const int k= i-start_i;
3685         int level = block[j];
3686         level = level * qmat[j];
3687
3688 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3689 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3690         if(((unsigned)(level+threshold1))>threshold2){
3691             if(level>0){
3692                 level= (bias + level)>>QMAT_SHIFT;
3693                 coeff[0][k]= level;
3694                 coeff[1][k]= level-1;
3695 //                coeff[2][k]= level-2;
3696             }else{
3697                 level= (bias - level)>>QMAT_SHIFT;
3698                 coeff[0][k]= -level;
3699                 coeff[1][k]= -level+1;
3700 //                coeff[2][k]= -level+2;
3701             }
3702             coeff_count[k]= FFMIN(level, 2);
3703             max |=level;
3704             last_non_zero = i;
3705         }else{
3706             coeff[0][k]= (level>>31)|1;
3707             coeff_count[k]= 1;
3708         }
3709     }
3710     
3711     *overflow= s->max_qcoeff < max; //overflow might have happend
3712     
3713     if(last_non_zero < start_i){
3714         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3715         return last_non_zero;
3716     }
3717
3718     lambda= (qscale*qscale*64*105 + 64)>>7; //FIXME finetune
3719         
3720     score_tab[0]= 0;
3721     for(i=0; i<=last_non_zero - start_i; i++){
3722         int level_index, run, j;
3723         const int dct_coeff= block[ scantable[i + start_i] ];
3724         const int zero_distoration= dct_coeff*dct_coeff;
3725         int best_score=256*256*256*120;
3726
3727         last_score += zero_distoration;
3728         for(level_index=0; level_index < coeff_count[i]; level_index++){
3729             int distoration;
3730             int level= coeff[level_index][i];
3731             int unquant_coeff;
3732             
3733             assert(level);
3734
3735             if(s->out_format == FMT_H263){
3736                 if(level>0){
3737                     unquant_coeff= level*qmul + qadd;
3738                 }else{
3739                     unquant_coeff= level*qmul - qadd;
3740                 }
3741             }else{ //MPEG1
3742                 j= s->dsp.idct_permutation[ scantable[i + start_i] ]; //FIXME optimize
3743                 if(s->mb_intra){
3744                     if (level < 0) {
3745                         unquant_coeff = (int)((-level) * qscale * s->intra_matrix[j]) >> 3;
3746                         unquant_coeff = -((unquant_coeff - 1) | 1);
3747                     } else {
3748                         unquant_coeff = (int)(  level  * qscale * s->intra_matrix[j]) >> 3;
3749                         unquant_coeff =   (unquant_coeff - 1) | 1;
3750                     }
3751                 }else{
3752                     if (level < 0) {
3753                         unquant_coeff = ((((-level) << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3754                         unquant_coeff = -((unquant_coeff - 1) | 1);
3755                     } else {
3756                         unquant_coeff = (((  level  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3757                         unquant_coeff =   (unquant_coeff - 1) | 1;
3758                     }
3759                 }
3760                 unquant_coeff<<= 3;
3761             }
3762
3763             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff);
3764             level+=64;
3765             if((level&(~127)) == 0){
3766                 for(run=0; run<=i - left_limit; run++){
3767                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3768                     score += score_tab[i-run];
3769                     
3770                     if(score < best_score){
3771                         best_score= 
3772                         score_tab[i+1]= score;
3773                         run_tab[i+1]= run;
3774                         level_tab[i+1]= level-64;
3775                     }
3776                 }
3777
3778                 if(s->out_format == FMT_H263){
3779                     for(run=0; run<=i - left_limit; run++){
3780                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3781                         score += score_tab[i-run];
3782                         if(score < last_score){
3783                             last_score= score;
3784                             last_run= run;
3785                             last_level= level-64;
3786                             last_i= i+1;
3787                         }
3788                     }
3789                 }
3790             }else{
3791                 distoration += esc_length*lambda;
3792                 for(run=0; run<=i - left_limit; run++){
3793                     int score= distoration + score_tab[i-run];
3794                     
3795                     if(score < best_score){
3796                         best_score= 
3797                         score_tab[i+1]= score;
3798                         run_tab[i+1]= run;
3799                         level_tab[i+1]= level-64;
3800                     }
3801                 }
3802
3803                 if(s->out_format == FMT_H263){
3804                     for(run=0; run<=i - left_limit; run++){
3805                         int score= distoration + score_tab[i-run];
3806                         if(score < last_score){
3807                             last_score= score;
3808                             last_run= run;
3809                             last_level= level-64;
3810                             last_i= i+1;
3811                         }
3812                     }
3813                 }
3814             }
3815         }
3816
3817         for(j=left_limit; j<=i; j++){
3818             score_tab[j] += zero_distoration;
3819         }
3820         score_limit+= zero_distoration;
3821         if(score_tab[i+1] < score_limit)
3822             score_limit= score_tab[i+1];
3823         
3824         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3825         while(score_tab[ left_limit ] > score_limit + lambda) left_limit++;
3826     }
3827
3828         //FIXME add some cbp penalty
3829
3830     if(s->out_format != FMT_H263){
3831         last_score= 256*256*256*120;
3832         for(i= left_limit; i<=last_non_zero - start_i + 1; i++){
3833             int score= score_tab[i];
3834             if(i) score += lambda*2; //FIXME exacter?
3835
3836             if(score < last_score){
3837                 last_score= score;
3838                 last_i= i;
3839                 last_level= level_tab[i];
3840                 last_run= run_tab[i];
3841             }
3842         }
3843     }
3844     
3845     last_non_zero= last_i - 1 + start_i;
3846     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3847     
3848     if(last_non_zero < start_i)
3849         return last_non_zero;
3850     
3851     i= last_i;
3852     assert(last_level);
3853 //FIXME use permutated scantable
3854     block[ s->dsp.idct_permutation[ scantable[last_non_zero] ] ]= last_level;
3855     i -= last_run + 1;
3856     
3857     for(;i>0 ; i -= run_tab[i] + 1){
3858         const int j= s->dsp.idct_permutation[ scantable[i - 1 + start_i] ];
3859     
3860         block[j]= level_tab[i];
3861         assert(block[j]);
3862     }
3863
3864     return last_non_zero;
3865 }
3866
3867 static int dct_quantize_c(MpegEncContext *s, 
3868                         DCTELEM *block, int n,
3869                         int qscale, int *overflow)
3870 {
3871     int i, j, level, last_non_zero, q;
3872     const int *qmat;
3873     const uint8_t *scantable= s->intra_scantable.scantable;
3874     int bias;
3875     int max=0;
3876     unsigned int threshold1, threshold2;
3877
3878     s->dsp.fdct (block);
3879
3880     if (s->mb_intra) {
3881         if (!s->h263_aic) {
3882             if (n < 4)
3883                 q = s->y_dc_scale;
3884             else
3885                 q = s->c_dc_scale;
3886             q = q << 3;
3887         } else
3888             /* For AIC we skip quant/dequant of INTRADC */
3889             q = 1 << 3;
3890             
3891         /* note: block[0] is assumed to be positive */
3892         block[0] = (block[0] + (q >> 1)) / q;
3893         i = 1;
3894         last_non_zero = 0;
3895         qmat = s->q_intra_matrix[qscale];
3896         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
3897     } else {
3898         i = 0;
3899         last_non_zero = -1;
3900         qmat = s->q_inter_matrix[qscale];
3901         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
3902     }
3903     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3904     threshold2= (threshold1<<1);
3905
3906     for(;i<64;i++) {
3907         j = scantable[i];
3908         level = block[j];
3909         level = level * qmat[j];
3910
3911 //        if(   bias+level >= (1<<QMAT_SHIFT)
3912 //           || bias-level >= (1<<QMAT_SHIFT)){
3913         if(((unsigned)(level+threshold1))>threshold2){
3914             if(level>0){
3915                 level= (bias + level)>>QMAT_SHIFT;
3916                 block[j]= level;
3917             }else{
3918                 level= (bias - level)>>QMAT_SHIFT;
3919                 block[j]= -level;
3920             }
3921             max |=level;
3922             last_non_zero = i;
3923         }else{
3924             block[j]=0;
3925         }
3926     }
3927     *overflow= s->max_qcoeff < max; //overflow might have happend
3928     
3929     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
3930     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
3931         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
3932
3933     return last_non_zero;
3934 }
3935
3936 #endif //CONFIG_ENCODERS
3937
3938 static void dct_unquantize_mpeg1_c(MpegEncContext *s, 
3939                                    DCTELEM *block, int n, int qscale)
3940 {
3941     int i, level, nCoeffs;
3942     const uint16_t *quant_matrix;
3943
3944     nCoeffs= s->block_last_index[n];
3945     
3946     if (s->mb_intra) {
3947         if (n < 4) 
3948             block[0] = block[0] * s->y_dc_scale;
3949         else
3950             block[0] = block[0] * s->c_dc_scale;
3951         /* XXX: only mpeg1 */
3952         quant_matrix = s->intra_matrix;
3953         for(i=1;i<=nCoeffs;i++) {
3954             int j= s->intra_scantable.permutated[i];
3955             level = block[j];
3956             if (level) {
3957                 if (level < 0) {
3958                     level = -level;
3959                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
3960                     level = (level - 1) | 1;
3961                     level = -level;
3962                 } else {
3963                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
3964                     level = (level - 1) | 1;
3965                 }
3966 #ifdef PARANOID
3967                 if (level < -2048 || level > 2047)
3968                     fprintf(stderr, "unquant error %d %d\n", i, level);
3969 #endif
3970                 block[j] = level;
3971             }
3972         }
3973     } else {
3974         i = 0;
3975         quant_matrix = s->inter_matrix;
3976         for(;i<=nCoeffs;i++) {
3977             int j= s->intra_scantable.permutated[i];
3978             level = block[j];
3979             if (level) {
3980                 if (level < 0) {
3981                     level = -level;
3982                     level = (((level << 1) + 1) * qscale *
3983                              ((int) (quant_matrix[j]))) >> 4;
3984                     level = (level - 1) | 1;
3985                     level = -level;
3986                 } else {
3987                     level = (((level << 1) + 1) * qscale *
3988                              ((int) (quant_matrix[j]))) >> 4;
3989                     level = (level - 1) | 1;
3990                 }
3991 #ifdef PARANOID
3992                 if (level < -2048 || level > 2047)
3993                     fprintf(stderr, "unquant error %d %d\n", i, level);
3994 #endif
3995                 block[j] = level;
3996             }
3997         }
3998     }
3999 }
4000
4001 static void dct_unquantize_mpeg2_c(MpegEncContext *s, 
4002                                    DCTELEM *block, int n, int qscale)
4003 {
4004     int i, level, nCoeffs;
4005     const uint16_t *quant_matrix;
4006
4007     if(s->alternate_scan) nCoeffs= 63;
4008     else nCoeffs= s->block_last_index[n];
4009     
4010     if (s->mb_intra) {
4011         if (n < 4) 
4012             block[0] = block[0] * s->y_dc_scale;
4013         else
4014             block[0] = block[0] * s->c_dc_scale;
4015         quant_matrix = s->intra_matrix;
4016         for(i=1;i<=nCoeffs;i++) {
4017             int j= s->intra_scantable.permutated[i];
4018             level = block[j];
4019             if (level) {
4020                 if (level < 0) {
4021                     level = -level;
4022                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4023                     level = -level;
4024                 } else {
4025                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4026                 }
4027 #ifdef PARANOID
4028                 if (level < -2048 || level > 2047)
4029                     fprintf(stderr, "unquant error %d %d\n", i, level);
4030 #endif
4031                 block[j] = level;
4032             }
4033         }
4034     } else {
4035         int sum=-1;
4036         i = 0;
4037         quant_matrix = s->inter_matrix;
4038         for(;i<=nCoeffs;i++) {
4039             int j= s->intra_scantable.permutated[i];
4040             level = block[j];
4041             if (level) {
4042                 if (level < 0) {
4043                     level = -level;
4044                     level = (((level << 1) + 1) * qscale *
4045                              ((int) (quant_matrix[j]))) >> 4;
4046                     level = -level;
4047                 } else {
4048                     level = (((level << 1) + 1) * qscale *
4049                              ((int) (quant_matrix[j]))) >> 4;
4050                 }
4051 #ifdef PARANOID
4052                 if (level < -2048 || level > 2047)
4053                     fprintf(stderr, "unquant error %d %d\n", i, level);
4054 #endif
4055                 block[j] = level;
4056                 sum+=level;
4057             }
4058         }
4059         block[63]^=sum&1;
4060     }
4061 }
4062
4063
4064 static void dct_unquantize_h263_c(MpegEncContext *s, 
4065                                   DCTELEM *block, int n, int qscale)
4066 {
4067     int i, level, qmul, qadd;
4068     int nCoeffs;
4069     
4070     assert(s->block_last_index[n]>=0);
4071     
4072     qadd = (qscale - 1) | 1;
4073     qmul = qscale << 1;
4074     
4075     if (s->mb_intra) {
4076         if (!s->h263_aic) {
4077             if (n < 4) 
4078                 block[0] = block[0] * s->y_dc_scale;
4079             else
4080                 block[0] = block[0] * s->c_dc_scale;
4081         }else
4082             qadd = 0;
4083         i = 1;
4084         nCoeffs= 63; //does not allways use zigzag table 
4085     } else {
4086         i = 0;
4087         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
4088     }
4089
4090     for(;i<=nCoeffs;i++) {
4091         level = block[i];
4092         if (level) {
4093             if (level < 0) {
4094                 level = level * qmul - qadd;
4095             } else {
4096                 level = level * qmul + qadd;
4097             }
4098 #ifdef PARANOID
4099                 if (level < -2048 || level > 2047)
4100                     fprintf(stderr, "unquant error %d %d\n", i, level);
4101 #endif
4102             block[i] = level;
4103         }
4104     }
4105 }
4106
4107
4108 static const AVOption mpeg4_options[] =
4109 {
4110     AVOPTION_CODEC_INT("bitrate", "desired video bitrate", bit_rate, 4, 240000000, 800000),
4111     AVOPTION_CODEC_FLAG("vhq", "very high quality", flags, CODEC_FLAG_HQ, 0),
4112     AVOPTION_CODEC_INT("ratetol", "number of bits the bitstream is allowed to diverge from the reference"
4113                        "the reference can be CBR (for CBR pass1) or VBR (for pass2)",
4114                        bit_rate_tolerance, 4, 240000000, 8000),
4115     AVOPTION_CODEC_INT("qmin", "minimum quantizer", qmin, 1, 31, 2),
4116     AVOPTION_CODEC_INT("qmax", "maximum quantizer", qmax, 1, 31, 31),
4117     AVOPTION_CODEC_STRING("rc_eq", "rate control equation",
4118                           rc_eq, "tex^qComp,option1,options2", 0),
4119     AVOPTION_CODEC_INT("rc_minrate", "rate control minimum bitrate",
4120                        rc_min_rate, 4, 24000000, 0),
4121     AVOPTION_CODEC_INT("rc_maxrate", "rate control maximum bitrate",
4122                        rc_max_rate, 4, 24000000, 0),
4123     AVOPTION_CODEC_DOUBLE("rc_buf_aggresivity", "rate control buffer aggresivity",
4124                           rc_buffer_aggressivity, 4, 24000000, 0),
4125     AVOPTION_CODEC_DOUBLE("rc_initial_cplx", "initial complexity for pass1 ratecontrol",
4126                           rc_initial_cplx, 0., 9999999., 0),
4127     AVOPTION_CODEC_DOUBLE("i_quant_factor", "qscale factor between p and i frames",
4128                           i_quant_factor, 0., 0., 0),
4129     AVOPTION_CODEC_DOUBLE("i_quant_offset", "qscale offset between p and i frames",
4130                           i_quant_factor, -999999., 999999., 0),
4131     AVOPTION_CODEC_INT("dct_algo", "dct alghorithm",
4132                        dct_algo, 0, 5, 0), // fixme - "Auto,FastInt,Int,MMX,MLib,Altivec"
4133     AVOPTION_CODEC_DOUBLE("lumi_masking", "luminance masking",
4134                           lumi_masking, 0., 999999., 0),
4135     AVOPTION_CODEC_DOUBLE("temporal_cplx_masking", "temporary complexity masking",
4136                           temporal_cplx_masking, 0., 999999., 0),
4137     AVOPTION_CODEC_DOUBLE("spatial_cplx_masking", "spatial complexity masking",
4138                           spatial_cplx_masking, 0., 999999., 0),
4139     AVOPTION_CODEC_DOUBLE("p_masking", "p block masking",
4140                           p_masking, 0., 999999., 0),
4141     AVOPTION_CODEC_DOUBLE("dark_masking", "darkness masking",
4142                           dark_masking, 0., 999999., 0),
4143     AVOPTION_CODEC_INT("idct_algo", "idct alghorithm",
4144                        idct_algo, 0, 8, 0), // fixme - "Auto,Int,Simple,SimpleMMX,LibMPEG2MMX,PS2,MLib,ARM,Altivec"
4145
4146     AVOPTION_CODEC_INT("mb_qmin", "minimum MB quantizer",
4147                        mb_qmin, 0, 8, 0),
4148     AVOPTION_CODEC_INT("mb_qmax", "maximum MB quantizer",
4149                        mb_qmin, 0, 8, 0),
4150
4151     AVOPTION_CODEC_INT("me_cmp", "ME compare function",
4152                        me_cmp, 0, 24000000, 0),
4153     AVOPTION_CODEC_INT("me_sub_cmp", "subpixel ME compare function",
4154                        me_sub_cmp, 0, 24000000, 0),
4155
4156
4157     AVOPTION_CODEC_INT("dia_size", "ME diamond size & shape",
4158                        dia_size, 0, 24000000, 0),
4159     AVOPTION_CODEC_INT("last_predictor_count", "amount of previous MV predictors",
4160                        last_predictor_count, 0, 24000000, 0),
4161
4162     AVOPTION_CODEC_INT("pre_me", "pre pass for ME",
4163                        pre_me, 0, 24000000, 0),
4164     AVOPTION_CODEC_INT("me_pre_cmp", "ME pre pass compare function",
4165                        me_pre_cmp, 0, 24000000, 0),
4166
4167     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
4168                        me_range, 0, 24000000, 0),
4169     AVOPTION_CODEC_INT("pre_dia_size", "ME pre pass diamod size & shape",
4170                        pre_dia_size, 0, 24000000, 0),
4171     AVOPTION_CODEC_INT("me_subpel_quality", "subpel ME quality",
4172                        me_subpel_quality, 0, 24000000, 0),
4173     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
4174                        me_range, 0, 24000000, 0),
4175     AVOPTION_CODEC_FLAG("psnr", "calculate PSNR of compressed frames",
4176                         flags, CODEC_FLAG_PSNR, 0),
4177     AVOPTION_CODEC_RCOVERRIDE("rc_override", "ratecontrol override (=startframe,endframe,qscale,quality_factor)",
4178                               rc_override),
4179     AVOPTION_SUB(avoptions_common),
4180     AVOPTION_END()
4181 };
4182
4183 #ifdef CONFIG_ENCODERS
4184
4185 AVCodec mpeg1video_encoder = {
4186     "mpeg1video",
4187     CODEC_TYPE_VIDEO,
4188     CODEC_ID_MPEG1VIDEO,
4189     sizeof(MpegEncContext),
4190     MPV_encode_init,
4191     MPV_encode_picture,
4192     MPV_encode_end,
4193 };
4194
4195 #ifdef CONFIG_RISKY
4196
4197 AVCodec h263_encoder = {
4198     "h263",
4199     CODEC_TYPE_VIDEO,
4200     CODEC_ID_H263,
4201     sizeof(MpegEncContext),
4202     MPV_encode_init,
4203     MPV_encode_picture,
4204     MPV_encode_end,
4205 };
4206
4207 AVCodec h263p_encoder = {
4208     "h263p",
4209     CODEC_TYPE_VIDEO,
4210     CODEC_ID_H263P,
4211     sizeof(MpegEncContext),
4212     MPV_encode_init,
4213     MPV_encode_picture,
4214     MPV_encode_end,
4215 };
4216
4217 AVCodec rv10_encoder = {
4218     "rv10",
4219     CODEC_TYPE_VIDEO,
4220     CODEC_ID_RV10,
4221     sizeof(MpegEncContext),
4222     MPV_encode_init,
4223     MPV_encode_picture,
4224     MPV_encode_end,
4225 };
4226
4227 AVCodec mpeg4_encoder = {
4228     "mpeg4",
4229     CODEC_TYPE_VIDEO,
4230     CODEC_ID_MPEG4,
4231     sizeof(MpegEncContext),
4232     MPV_encode_init,
4233     MPV_encode_picture,
4234     MPV_encode_end,
4235     .options = mpeg4_options,
4236 };
4237
4238 AVCodec msmpeg4v1_encoder = {
4239     "msmpeg4v1",
4240     CODEC_TYPE_VIDEO,
4241     CODEC_ID_MSMPEG4V1,
4242     sizeof(MpegEncContext),
4243     MPV_encode_init,
4244     MPV_encode_picture,
4245     MPV_encode_end,
4246     .options = mpeg4_options,
4247 };
4248
4249 AVCodec msmpeg4v2_encoder = {
4250     "msmpeg4v2",
4251     CODEC_TYPE_VIDEO,
4252     CODEC_ID_MSMPEG4V2,
4253     sizeof(MpegEncContext),
4254     MPV_encode_init,
4255     MPV_encode_picture,
4256     MPV_encode_end,
4257     .options = mpeg4_options,
4258 };
4259
4260 AVCodec msmpeg4v3_encoder = {
4261     "msmpeg4",
4262     CODEC_TYPE_VIDEO,
4263     CODEC_ID_MSMPEG4V3,
4264     sizeof(MpegEncContext),
4265     MPV_encode_init,
4266     MPV_encode_picture,
4267     MPV_encode_end,
4268     .options = mpeg4_options,
4269 };
4270
4271 AVCodec wmv1_encoder = {
4272     "wmv1",
4273     CODEC_TYPE_VIDEO,
4274     CODEC_ID_WMV1,
4275     sizeof(MpegEncContext),
4276     MPV_encode_init,
4277     MPV_encode_picture,
4278     MPV_encode_end,
4279     .options = mpeg4_options,
4280 };
4281
4282 #endif
4283
4284 AVCodec mjpeg_encoder = {
4285     "mjpeg",
4286     CODEC_TYPE_VIDEO,
4287     CODEC_ID_MJPEG,
4288     sizeof(MpegEncContext),
4289     MPV_encode_init,
4290     MPV_encode_picture,
4291     MPV_encode_end,
4292 };
4293
4294 #endif //CONFIG_ENCODERS
4295