]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
new PSNR code (now works with chroma, b frames, ...)
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  *
19  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
20  */
21  
22 #include <ctype.h>
23 #include "avcodec.h"
24 #include "dsputil.h"
25 #include "mpegvideo.h"
26 #include "simple_idct.h"
27
28 #ifdef USE_FASTMEMCPY
29 #include "fastmemcpy.h"
30 #endif
31
32 //#undef NDEBUG
33 //#include <assert.h>
34
35 static void encode_picture(MpegEncContext *s, int picture_number);
36 static void dct_unquantize_mpeg1_c(MpegEncContext *s, 
37                                    DCTELEM *block, int n, int qscale);
38 static void dct_unquantize_mpeg2_c(MpegEncContext *s,
39                                    DCTELEM *block, int n, int qscale);
40 static void dct_unquantize_h263_c(MpegEncContext *s, 
41                                   DCTELEM *block, int n, int qscale);
42 static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w);
43 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
44
45 void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w)= draw_edges_c;
46 static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h, 
47                                     int src_x, int src_y, int w, int h);
48
49
50 /* enable all paranoid tests for rounding, overflows, etc... */
51 //#define PARANOID
52
53 //#define DEBUG
54
55
56 /* for jpeg fast DCT */
57 #define CONST_BITS 14
58
59 static const uint16_t aanscales[64] = {
60     /* precomputed values scaled up by 14 bits */
61     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
62     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
63     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
64     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
65     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
66     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
67     8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
68     4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
69 };
70
71 /* Input permutation for the simple_idct_mmx */
72 static const uint8_t simple_mmx_permutation[64]={
73         0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, 
74         0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, 
75         0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, 
76         0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, 
77         0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, 
78         0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, 
79         0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, 
80         0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
81 };
82
83 static const uint8_t h263_chroma_roundtab[16] = {
84     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
85 };
86
87 static UINT16 (*default_mv_penalty)[MAX_MV*2+1]=NULL;
88 static UINT8 default_fcode_tab[MAX_MV*2+1];
89
90 /* default motion estimation */
91 int motion_estimation_method = ME_EPZS;
92
93 static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64],
94                            const UINT16 *quant_matrix, int bias, int qmin, int qmax)
95 {
96     int qscale;
97
98     for(qscale=qmin; qscale<=qmax; qscale++){
99         int i;
100         if (s->fdct == ff_jpeg_fdct_islow) {
101             for(i=0;i<64;i++) {
102                 const int j= s->idct_permutation[i];
103                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
104                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
105                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
106                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
107                 
108                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / 
109                                 (qscale * quant_matrix[j]));
110             }
111         } else if (s->fdct == fdct_ifast) {
112             for(i=0;i<64;i++) {
113                 const int j= s->idct_permutation[i];
114                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
115                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
116                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
117                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
118                 
119                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / 
120                                 (aanscales[i] * qscale * quant_matrix[j]));
121             }
122         } else {
123             for(i=0;i<64;i++) {
124                 const int j= s->idct_permutation[i];
125                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
126                    So 16           <= qscale * quant_matrix[i]             <= 7905
127                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
128                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
129                 */
130                 qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
131                 qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
132
133                 if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1;
134                 qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]);
135             }
136         }
137     }
138 }
139 // move into common.c perhaps 
140 #define CHECKED_ALLOCZ(p, size)\
141 {\
142     p= av_mallocz(size);\
143     if(p==NULL){\
144         perror("malloc");\
145         goto fail;\
146     }\
147 }
148
149 void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable){
150     int i;
151     int end;
152     
153     st->scantable= src_scantable;
154
155     for(i=0; i<64; i++){
156         int j;
157         j = src_scantable[i];
158         st->permutated[i] = s->idct_permutation[j];
159 #ifdef ARCH_POWERPC
160         st->inverse[j] = i;
161 #endif
162     }
163     
164     end=-1;
165     for(i=0; i<64; i++){
166         int j;
167         j = st->permutated[i];
168         if(j>end) end=j;
169         st->raster_end[i]= end;
170     }
171 }
172
173 /* XXX: those functions should be suppressed ASAP when all IDCTs are
174  converted */
175 // *FIXME* this is ugly hack using local static
176 static void (*ff_put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
177 static void (*ff_add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
178 static void ff_jref_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
179 {
180     j_rev_dct (block);
181     ff_put_pixels_clamped(block, dest, line_size);
182 }
183 static void ff_jref_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
184 {
185     j_rev_dct (block);
186     ff_add_pixels_clamped(block, dest, line_size);
187 }
188
189 /* init common dct for both encoder and decoder */
190 int DCT_common_init(MpegEncContext *s)
191 {
192     int i;
193
194     ff_put_pixels_clamped = s->dsp.put_pixels_clamped;
195     ff_add_pixels_clamped = s->dsp.add_pixels_clamped;
196
197     s->dct_unquantize_h263 = dct_unquantize_h263_c;
198     s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c;
199     s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c;
200     s->dct_quantize= dct_quantize_c;
201
202     if(s->avctx->dct_algo==FF_DCT_FASTINT)
203         s->fdct = fdct_ifast;
204     else
205         s->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
206
207     if(s->avctx->idct_algo==FF_IDCT_INT){
208         s->idct_put= ff_jref_idct_put;
209         s->idct_add= ff_jref_idct_add;
210         s->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
211     }else{ //accurate/default
212         s->idct_put= simple_idct_put;
213         s->idct_add= simple_idct_add;
214         s->idct_permutation_type= FF_NO_IDCT_PERM;
215     }
216         
217 #ifdef HAVE_MMX
218     MPV_common_init_mmx(s);
219 #endif
220 #ifdef ARCH_ALPHA
221     MPV_common_init_axp(s);
222 #endif
223 #ifdef HAVE_MLIB
224     MPV_common_init_mlib(s);
225 #endif
226 #ifdef HAVE_MMI
227     MPV_common_init_mmi(s);
228 #endif
229 #ifdef ARCH_ARMV4L
230     MPV_common_init_armv4l(s);
231 #endif
232 #ifdef ARCH_POWERPC
233     MPV_common_init_ppc(s);
234 #endif
235
236     switch(s->idct_permutation_type){
237     case FF_NO_IDCT_PERM:
238         for(i=0; i<64; i++)
239             s->idct_permutation[i]= i;
240         break;
241     case FF_LIBMPEG2_IDCT_PERM:
242         for(i=0; i<64; i++)
243             s->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
244         break;
245     case FF_SIMPLE_IDCT_PERM:
246         for(i=0; i<64; i++)
247             s->idct_permutation[i]= simple_mmx_permutation[i];
248         break;
249     case FF_TRANSPOSE_IDCT_PERM:
250         for(i=0; i<64; i++)
251             s->idct_permutation[i]= ((i&7)<<3) | (i>>3);
252         break;
253     default:
254         fprintf(stderr, "Internal error, IDCT permutation not set\n");
255         return -1;
256     }
257
258
259     /* load & permutate scantables
260        note: only wmv uses differnt ones 
261     */
262     ff_init_scantable(s, &s->inter_scantable  , ff_zigzag_direct);
263     ff_init_scantable(s, &s->intra_scantable  , ff_zigzag_direct);
264     ff_init_scantable(s, &s->intra_h_scantable, ff_alternate_horizontal_scan);
265     ff_init_scantable(s, &s->intra_v_scantable, ff_alternate_vertical_scan);
266
267     return 0;
268 }
269
270 /**
271  * allocates various arrays for a Picture structure, except the pixels themself.
272  * The pixels are allocated/set in te get_buffer()
273  */
274 static int alloc_picture(MpegEncContext *s, Picture *pic){
275     if (s->encoding) {        
276         CHECKED_ALLOCZ(pic->mb_var   , s->mb_num * sizeof(INT16))
277         CHECKED_ALLOCZ(pic->mc_mb_var, s->mb_num * sizeof(INT16))
278         CHECKED_ALLOCZ(pic->mb_mean  , s->mb_num * sizeof(INT8))
279     }
280
281     CHECKED_ALLOCZ(pic->mbskip_table , s->mb_num * sizeof(UINT8)+1) //the +1 is for the slice end check
282     CHECKED_ALLOCZ(pic->qscale_table , s->mb_num * sizeof(UINT8))
283     pic->qstride= s->mb_width;
284     
285     return 0;
286 fail: //for the CHECKED_ALLOCZ macro
287     return -1;
288 }
289
290 static void free_picture(MpegEncContext *s, Picture *pic){
291     int i;
292     
293     av_freep(&pic->mb_var);
294     av_freep(&pic->mc_mb_var);
295     av_freep(&pic->mb_mean);
296     av_freep(&pic->mbskip_table);
297     av_freep(&pic->qscale_table);
298     
299     if(s->avctx->get_buffer == avcodec_default_get_buffer){
300         for(i=0; i<4; i++){
301             av_freep(&pic->base[i]);
302             pic->data[i]= NULL;
303         }
304         av_freep(&pic->opaque);
305     }
306 }
307
308 /* init common structure for both encoder and decoder */
309 int MPV_common_init(MpegEncContext *s)
310 {
311     int y_size, c_size, yc_size, i;
312
313     dsputil_init(&s->dsp, s->avctx->dsp_mask);
314     DCT_common_init(s);
315
316     s->flags= s->avctx->flags;
317
318     s->mb_width  = (s->width  + 15) / 16;
319     s->mb_height = (s->height + 15) / 16;
320
321     /* set default edge pos, will be overriden in decode_header if needed */
322     s->h_edge_pos= s->mb_width*16;
323     s->v_edge_pos= s->mb_height*16;
324
325     s->mb_num = s->mb_width * s->mb_height;
326
327     y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
328     c_size = (s->mb_width + 2) * (s->mb_height + 2);
329     yc_size = y_size + 2 * c_size;
330
331     /* convert fourcc to upper case */
332     s->avctx->fourcc=   toupper( s->avctx->fourcc     &0xFF)          
333                      + (toupper((s->avctx->fourcc>>8 )&0xFF)<<8 )
334                      + (toupper((s->avctx->fourcc>>16)&0xFF)<<16) 
335                      + (toupper((s->avctx->fourcc>>24)&0xFF)<<24);
336
337     CHECKED_ALLOCZ(s->edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
338
339     s->avctx->coded_picture= (AVVideoFrame*)&s->current_picture;
340
341     if (s->encoding) {
342         int mv_table_size= (s->mb_width+2)*(s->mb_height+2);
343
344         /* Allocate MV tables */
345         CHECKED_ALLOCZ(s->p_mv_table            , mv_table_size * 2 * sizeof(INT16))
346         CHECKED_ALLOCZ(s->b_forw_mv_table       , mv_table_size * 2 * sizeof(INT16))
347         CHECKED_ALLOCZ(s->b_back_mv_table       , mv_table_size * 2 * sizeof(INT16))
348         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table , mv_table_size * 2 * sizeof(INT16))
349         CHECKED_ALLOCZ(s->b_bidir_back_mv_table , mv_table_size * 2 * sizeof(INT16))
350         CHECKED_ALLOCZ(s->b_direct_forw_mv_table, mv_table_size * 2 * sizeof(INT16))
351         CHECKED_ALLOCZ(s->b_direct_back_mv_table, mv_table_size * 2 * sizeof(INT16))
352         CHECKED_ALLOCZ(s->b_direct_mv_table     , mv_table_size * 2 * sizeof(INT16))
353
354         //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
355         CHECKED_ALLOCZ(s->me_scratchpad,  s->width*2*16*3*sizeof(uint8_t)) 
356         
357         CHECKED_ALLOCZ(s->me_map      , ME_MAP_SIZE*sizeof(uint32_t))
358         CHECKED_ALLOCZ(s->me_score_map, ME_MAP_SIZE*sizeof(uint16_t))
359
360         if(s->codec_id==CODEC_ID_MPEG4){
361             CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE);
362             CHECKED_ALLOCZ(   s->pb2_buffer, PB_BUFFER_SIZE);
363         }
364         
365         if(s->msmpeg4_version){
366             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
367         }
368         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
369     }
370         
371     CHECKED_ALLOCZ(s->error_status_table, s->mb_num*sizeof(UINT8))
372     
373     if (s->out_format == FMT_H263 || s->encoding) {
374         int size;
375         /* Allocate MB type table */
376         CHECKED_ALLOCZ(s->mb_type  , s->mb_num * sizeof(UINT8))
377
378         /* MV prediction */
379         size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
380         CHECKED_ALLOCZ(s->motion_val, size * 2 * sizeof(INT16));
381     }
382
383     if(s->codec_id==CODEC_ID_MPEG4){
384         /* interlaced direct mode decoding tables */
385         CHECKED_ALLOCZ(s->field_mv_table, s->mb_num*2*2 * sizeof(INT16))
386         CHECKED_ALLOCZ(s->field_select_table, s->mb_num*2* sizeof(INT8))
387     }
388     /* 4mv b frame decoding table */
389     //note this is needed for h263 without b frames too (segfault on damaged streams otherwise)
390     CHECKED_ALLOCZ(s->co_located_type_table, s->mb_num * sizeof(UINT8))
391     if (s->out_format == FMT_H263) {
392         /* ac values */
393         CHECKED_ALLOCZ(s->ac_val[0], yc_size * sizeof(INT16) * 16);
394         s->ac_val[1] = s->ac_val[0] + y_size;
395         s->ac_val[2] = s->ac_val[1] + c_size;
396         
397         /* cbp values */
398         CHECKED_ALLOCZ(s->coded_block, y_size);
399         
400         /* divx501 bitstream reorder buffer */
401         CHECKED_ALLOCZ(s->bitstream_buffer, BITSTREAM_BUFFER_SIZE);
402
403         /* cbp, ac_pred, pred_dir */
404         CHECKED_ALLOCZ(s->cbp_table  , s->mb_num * sizeof(UINT8))
405         CHECKED_ALLOCZ(s->pred_dir_table, s->mb_num * sizeof(UINT8))
406     }
407     
408     if (s->h263_pred || s->h263_plus || !s->encoding) {
409         /* dc values */
410         //MN: we need these for error resilience of intra-frames
411         CHECKED_ALLOCZ(s->dc_val[0], yc_size * sizeof(INT16));
412         s->dc_val[1] = s->dc_val[0] + y_size;
413         s->dc_val[2] = s->dc_val[1] + c_size;
414         for(i=0;i<yc_size;i++)
415             s->dc_val[0][i] = 1024;
416     }
417
418     /* which mb is a intra block */
419     CHECKED_ALLOCZ(s->mbintra_table, s->mb_num);
420     memset(s->mbintra_table, 1, s->mb_num);
421     
422     /* default structure is frame */
423     s->picture_structure = PICT_FRAME;
424     
425     /* init macroblock skip table */
426     CHECKED_ALLOCZ(s->mbskip_table, s->mb_num+1);
427     //Note the +1 is for a quicker mpeg4 slice_end detection
428     
429     s->block= s->blocks[0];
430
431     s->parse_context.state= -1;
432
433     s->context_initialized = 1;
434     return 0;
435  fail:
436     MPV_common_end(s);
437     return -1;
438 }
439
440
441 //extern int sads;
442
443 /* init common structure for both encoder and decoder */
444 void MPV_common_end(MpegEncContext *s)
445 {
446     int i;
447
448     for(i=0; i<MAX_PICTURE_COUNT; i++){
449         if(s->picture[i].data[0]){
450             s->avctx->release_buffer(s->avctx, (AVVideoFrame*)&s->picture[i]);
451         }
452     }
453     
454     av_freep(&s->mb_type);
455     av_freep(&s->p_mv_table);
456     av_freep(&s->b_forw_mv_table);
457     av_freep(&s->b_back_mv_table);
458     av_freep(&s->b_bidir_forw_mv_table);
459     av_freep(&s->b_bidir_back_mv_table);
460     av_freep(&s->b_direct_forw_mv_table);
461     av_freep(&s->b_direct_back_mv_table);
462     av_freep(&s->b_direct_mv_table);
463     av_freep(&s->motion_val);
464     av_freep(&s->dc_val[0]);
465     av_freep(&s->ac_val[0]);
466     av_freep(&s->coded_block);
467     av_freep(&s->mbintra_table);
468     av_freep(&s->cbp_table);
469     av_freep(&s->pred_dir_table);
470     av_freep(&s->me_scratchpad);
471     av_freep(&s->me_map);
472     av_freep(&s->me_score_map);
473     
474     av_freep(&s->mbskip_table);
475     av_freep(&s->bitstream_buffer);
476     av_freep(&s->tex_pb_buffer);
477     av_freep(&s->pb2_buffer);
478     av_freep(&s->edge_emu_buffer);
479     av_freep(&s->co_located_type_table);
480     av_freep(&s->field_mv_table);
481     av_freep(&s->field_select_table);
482     av_freep(&s->avctx->stats_out);
483     av_freep(&s->ac_stats);
484     av_freep(&s->error_status_table);
485
486     for(i=0; i<MAX_PICTURE_COUNT; i++){
487         free_picture(s, &s->picture[i]);
488     }
489     s->context_initialized = 0;
490 }
491
492 /* init video encoder */
493 int MPV_encode_init(AVCodecContext *avctx)
494 {
495     MpegEncContext *s = avctx->priv_data;
496     int i;
497
498     avctx->pix_fmt = PIX_FMT_YUV420P;
499
500     s->bit_rate = avctx->bit_rate;
501     s->bit_rate_tolerance = avctx->bit_rate_tolerance;
502     s->frame_rate = avctx->frame_rate;
503     s->width = avctx->width;
504     s->height = avctx->height;
505     if(avctx->gop_size > 600){
506         fprintf(stderr, "Warning keyframe interval too large! reducing it ...\n");
507         avctx->gop_size=600;
508     }
509     s->gop_size = avctx->gop_size;
510     s->rtp_mode = avctx->rtp_mode;
511     s->rtp_payload_size = avctx->rtp_payload_size;
512     if (avctx->rtp_callback)
513         s->rtp_callback = avctx->rtp_callback;
514     s->qmin= avctx->qmin;
515     s->qmax= avctx->qmax;
516     s->max_qdiff= avctx->max_qdiff;
517     s->qcompress= avctx->qcompress;
518     s->qblur= avctx->qblur;
519     s->avctx = avctx;
520     s->flags= avctx->flags;
521     s->max_b_frames= avctx->max_b_frames;
522     s->b_frame_strategy= avctx->b_frame_strategy;
523     s->codec_id= avctx->codec->id;
524     s->luma_elim_threshold  = avctx->luma_elim_threshold;
525     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
526     s->strict_std_compliance= avctx->strict_std_compliance;
527     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
528     s->mpeg_quant= avctx->mpeg_quant;
529
530     if (s->gop_size <= 1) {
531         s->intra_only = 1;
532         s->gop_size = 12;
533     } else {
534         s->intra_only = 0;
535     }
536
537     /* ME algorithm */
538     if (avctx->me_method == 0)
539         /* For compatibility */
540         s->me_method = motion_estimation_method;
541     else
542         s->me_method = avctx->me_method;
543
544     /* Fixed QSCALE */
545     s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE);
546     
547     s->adaptive_quant= (   s->avctx->lumi_masking
548                         || s->avctx->dark_masking
549                         || s->avctx->temporal_cplx_masking 
550                         || s->avctx->spatial_cplx_masking
551                         || s->avctx->p_masking)
552                        && !s->fixed_qscale;
553     
554     s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
555
556     switch(avctx->codec->id) {
557     case CODEC_ID_MPEG1VIDEO:
558         s->out_format = FMT_MPEG1;
559         avctx->delay=0; //FIXME not sure, should check the spec
560         break;
561     case CODEC_ID_MJPEG:
562         s->out_format = FMT_MJPEG;
563         s->intra_only = 1; /* force intra only for jpeg */
564         s->mjpeg_write_tables = 1; /* write all tables */
565         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
566         s->mjpeg_vsample[0] = 2; /* set up default sampling factors */
567         s->mjpeg_vsample[1] = 1; /* the only currently supported values */
568         s->mjpeg_vsample[2] = 1; 
569         s->mjpeg_hsample[0] = 2;
570         s->mjpeg_hsample[1] = 1; 
571         s->mjpeg_hsample[2] = 1; 
572         if (mjpeg_init(s) < 0)
573             return -1;
574         avctx->delay=0;
575         break;
576     case CODEC_ID_H263:
577         if (h263_get_picture_format(s->width, s->height) == 7) {
578             printf("Input picture size isn't suitable for h263 codec! try h263+\n");
579             return -1;
580         }
581         s->out_format = FMT_H263;
582         avctx->delay=0;
583         break;
584     case CODEC_ID_H263P:
585         s->out_format = FMT_H263;
586         s->h263_plus = 1;
587         s->unrestricted_mv = 1;
588         s->h263_aic = 1;
589         
590         /* These are just to be sure */
591         s->umvplus = 0;
592         s->umvplus_dec = 0;
593         avctx->delay=0;
594         break;
595     case CODEC_ID_RV10:
596         s->out_format = FMT_H263;
597         s->h263_rv10 = 1;
598         avctx->delay=0;
599         break;
600     case CODEC_ID_MPEG4:
601         s->out_format = FMT_H263;
602         s->h263_pred = 1;
603         s->unrestricted_mv = 1;
604         s->has_b_frames= s->max_b_frames ? 1 : 0;
605         s->low_delay= !s->has_b_frames;
606         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
607         break;
608     case CODEC_ID_MSMPEG4V1:
609         s->out_format = FMT_H263;
610         s->h263_msmpeg4 = 1;
611         s->h263_pred = 1;
612         s->unrestricted_mv = 1;
613         s->msmpeg4_version= 1;
614         avctx->delay=0;
615         break;
616     case CODEC_ID_MSMPEG4V2:
617         s->out_format = FMT_H263;
618         s->h263_msmpeg4 = 1;
619         s->h263_pred = 1;
620         s->unrestricted_mv = 1;
621         s->msmpeg4_version= 2;
622         avctx->delay=0;
623         break;
624     case CODEC_ID_MSMPEG4V3:
625         s->out_format = FMT_H263;
626         s->h263_msmpeg4 = 1;
627         s->h263_pred = 1;
628         s->unrestricted_mv = 1;
629         s->msmpeg4_version= 3;
630         avctx->delay=0;
631         break;
632     case CODEC_ID_WMV1:
633         s->out_format = FMT_H263;
634         s->h263_msmpeg4 = 1;
635         s->h263_pred = 1;
636         s->unrestricted_mv = 1;
637         s->msmpeg4_version= 4;
638         avctx->delay=0;
639         break;
640     case CODEC_ID_WMV2:
641         s->out_format = FMT_H263;
642         s->h263_msmpeg4 = 1;
643         s->h263_pred = 1;
644         s->unrestricted_mv = 1;
645         s->msmpeg4_version= 5;
646         avctx->delay=0;
647         break;
648     default:
649         return -1;
650     }
651     
652     { /* set up some save defaults, some codecs might override them later */
653         static int done=0;
654         if(!done){
655             int i;
656             done=1;
657
658             default_mv_penalty= av_mallocz( sizeof(UINT16)*(MAX_FCODE+1)*(2*MAX_MV+1) );
659             memset(default_mv_penalty, 0, sizeof(UINT16)*(MAX_FCODE+1)*(2*MAX_MV+1));
660             memset(default_fcode_tab , 0, sizeof(UINT8)*(2*MAX_MV+1));
661
662             for(i=-16; i<16; i++){
663                 default_fcode_tab[i + MAX_MV]= 1;
664             }
665         }
666     }
667     s->mv_penalty= default_mv_penalty;
668     s->fcode_tab= default_fcode_tab;
669     s->y_dc_scale_table=
670     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
671  
672     /* dont use mv_penalty table for crap MV as it would be confused */
673     if (s->me_method < ME_EPZS) s->mv_penalty = default_mv_penalty;
674
675     s->encoding = 1;
676
677     /* init */
678     if (MPV_common_init(s) < 0)
679         return -1;
680     
681 #ifdef CONFIG_ENCODERS
682     if (s->out_format == FMT_H263)
683         h263_encode_init(s);
684     else if (s->out_format == FMT_MPEG1)
685         ff_mpeg1_encode_init(s);
686     if(s->msmpeg4_version)
687         ff_msmpeg4_encode_init(s);
688 #endif
689
690     /* init default q matrix */
691     for(i=0;i<64;i++) {
692         int j= s->idct_permutation[i];
693         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
694             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
695             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
696         }else if(s->out_format == FMT_H263){
697             s->intra_matrix[j] =
698             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
699         }else{ /* mpeg1 */
700             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
701             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
702         }
703     }
704
705     /* precompute matrix */
706     /* for mjpeg, we do include qscale in the matrix */
707     if (s->out_format != FMT_MJPEG) {
708         convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias, 
709                        s->intra_matrix, s->intra_quant_bias, 1, 31);
710         convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16, s->q_inter_matrix16_bias, 
711                        s->inter_matrix, s->inter_quant_bias, 1, 31);
712     }
713
714     if(ff_rate_control_init(s) < 0)
715         return -1;
716
717     s->picture_number = 0;
718     s->picture_in_gop_number = 0;
719     s->fake_picture_number = 0;
720     /* motion detector init */
721     s->f_code = 1;
722     s->b_code = 1;
723
724     return 0;
725 }
726
727 int MPV_encode_end(AVCodecContext *avctx)
728 {
729     MpegEncContext *s = avctx->priv_data;
730
731 #ifdef STATS
732     print_stats();
733 #endif
734
735     ff_rate_control_uninit(s);
736
737     MPV_common_end(s);
738     if (s->out_format == FMT_MJPEG)
739         mjpeg_close(s);
740       
741     return 0;
742 }
743
744 /* draw the edges of width 'w' of an image of size width, height */
745 //FIXME check that this is ok for mpeg4 interlaced
746 static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w)
747 {
748     UINT8 *ptr, *last_line;
749     int i;
750
751     last_line = buf + (height - 1) * wrap;
752     for(i=0;i<w;i++) {
753         /* top and bottom */
754         memcpy(buf - (i + 1) * wrap, buf, width);
755         memcpy(last_line + (i + 1) * wrap, last_line, width);
756     }
757     /* left and right */
758     ptr = buf;
759     for(i=0;i<height;i++) {
760         memset(ptr - w, ptr[0], w);
761         memset(ptr + width, ptr[width-1], w);
762         ptr += wrap;
763     }
764     /* corners */
765     for(i=0;i<w;i++) {
766         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
767         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
768         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
769         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
770     }
771 }
772
773 /* generic function for encode/decode called before a frame is coded/decoded */
774 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
775 {
776     int i, r;
777     AVVideoFrame *pic;
778
779     s->mb_skiped = 0;
780     
781     /* mark&release old frames */
782     if (s->pict_type != B_TYPE && s->last_picture.data[0]) {
783         Picture *pic= NULL;
784         for(i=0; i<MAX_PICTURE_COUNT; i++){
785             if(s->picture[i].data[0] == s->last_picture.data[0]){
786 //                s->picture[i].reference=0;
787                 avctx->release_buffer(avctx, (AVVideoFrame*)&s->picture[i]);
788                 break;
789             }    
790         }
791         assert(i<MAX_PICTURE_COUNT);
792
793         /* release forgotten pictures */
794         /* if(mpeg124/h263) */
795         if(!s->encoding){
796             for(i=0; i<MAX_PICTURE_COUNT; i++){
797                 if(s->picture[i].data[0] && s->picture[i].data[0] != s->next_picture.data[0] && s->picture[i].reference){
798                     fprintf(stderr, "releasing zombie picture\n");
799                     avctx->release_buffer(avctx, (AVVideoFrame*)&s->picture[i]);                
800                 }
801             }
802         }
803     }
804
805     if(!s->encoding){        
806         /* find unused Picture */
807         for(i=0; i<MAX_PICTURE_COUNT; i++){
808             if(s->picture[i].data[0]==NULL) break;
809         }
810         assert(i<MAX_PICTURE_COUNT);
811     
812         pic= (AVVideoFrame*)&s->picture[i];
813         pic->reference= s->pict_type != B_TYPE;
814         pic->coded_picture_number= s->current_picture.coded_picture_number+1;
815         
816         r= avctx->get_buffer(avctx, pic);
817     
818         if(r<0 || (s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1]))){
819             fprintf(stderr, "get_buffer() failed (stride changed), bye bye\n");
820             return -1;
821         }
822
823         s->linesize  = pic->linesize[0];
824         s->uvlinesize= pic->linesize[1];
825     
826         if(pic->qscale_table==NULL)
827             alloc_picture(s, (Picture*)pic);
828
829         s->current_picture= s->picture[i];
830     }
831
832     s->hurry_up= s->avctx->hurry_up;
833     s->error_resilience= avctx->error_resilience;
834
835     if (s->pict_type != B_TYPE) {
836         s->last_picture= s->next_picture;
837         s->next_picture= s->current_picture;
838     }
839    
840     /* set dequantizer, we cant do it during init as it might change for mpeg4
841        and we cant do it in the header decode as init isnt called for mpeg4 there yet */
842     if(s->out_format == FMT_H263){
843         if(s->mpeg_quant)
844             s->dct_unquantize = s->dct_unquantize_mpeg2;
845         else
846             s->dct_unquantize = s->dct_unquantize_h263;
847     }else 
848         s->dct_unquantize = s->dct_unquantize_mpeg1;
849
850     return 0;
851 }
852
853 /* generic function for encode/decode called after a frame has been coded/decoded */
854 void MPV_frame_end(MpegEncContext *s)
855 {
856     int i;
857
858     /* draw edge for correct motion prediction if outside */
859     if(s->codec_id!=CODEC_ID_SVQ1){
860         if (s->pict_type != B_TYPE && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
861             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
862             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
863             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
864         }
865     }
866     emms_c();
867     
868     s->last_pict_type    = s->pict_type;
869     if(s->pict_type!=B_TYPE){
870         s->last_non_b_pict_type= s->pict_type;
871         s->num_available_buffers++;
872         if(s->num_available_buffers>2) s->num_available_buffers= 2;
873     }
874     
875     s->current_picture.quality= s->qscale; //FIXME get average of qscale_table
876     s->current_picture.pict_type= s->pict_type;
877     s->current_picture.key_frame= s->pict_type == I_TYPE;
878     
879     /* copy back current_picture variables */
880     for(i=0; i<MAX_PICTURE_COUNT; i++){
881         if(s->picture[i].data[0] == s->current_picture.data[0]){
882             s->picture[i]= s->current_picture;
883             break;
884         }    
885     }
886     assert(i<MAX_PICTURE_COUNT);
887
888     /* release non refernce frames */
889     for(i=0; i<MAX_PICTURE_COUNT; i++){
890         if(s->picture[i].data[0] && !s->picture[i].reference)
891             s->avctx->release_buffer(s->avctx, (AVVideoFrame*)&s->picture[i]);
892     }
893 }
894
895 static int load_input_picture(MpegEncContext *s, AVVideoFrame *pic_arg){
896     AVVideoFrame *pic;
897     int i,r;
898     const int encoding_delay= s->max_b_frames;
899
900     /* find unused Picture */
901     for(i=0; i<MAX_PICTURE_COUNT; i++){
902         if(s->picture[i].data[0]==NULL) break;
903     }
904     assert(i<MAX_PICTURE_COUNT);
905         
906     pic= (AVVideoFrame*)&s->picture[i];
907     pic->reference= 1;
908     
909 //    assert(avctx->get_buffer == default_get_buffer || avctx->get_buffer==NULL);
910     r= s->avctx->get_buffer(s->avctx, pic);
911
912     if(r<0 || (s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1]))){
913         fprintf(stderr, "get_buffer() failed (stride changed), bye bye\n");
914         return -1;
915     }
916     
917     assert(s->linesize==0   || s->linesize  ==pic->linesize[0]);
918     assert(s->uvlinesize==0 || s->uvlinesize==pic->linesize[1]);
919     assert(pic->linesize[1] == pic->linesize[2]);
920     s->linesize  = pic->linesize[0];
921     s->uvlinesize= pic->linesize[1];
922     
923     if(pic->qscale_table==NULL)
924         alloc_picture(s, (Picture*)pic);
925
926 //    assert(s->input_picture[0]==NULL || s->input_picture[0]->data[0]==NULL);
927     
928     if(s->input_picture[encoding_delay])
929         pic->display_picture_number= s->input_picture[encoding_delay]->display_picture_number + 1;
930 //printf("dpn2:%d\n", pic->display_picture_number);
931
932     /* shift buffer entries */
933     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
934         s->input_picture[i-1]= s->input_picture[i];
935         
936     s->input_picture[encoding_delay]= (Picture*)pic;
937     pic->pict_type= pic_arg->pict_type;
938     pic->quality= pic_arg->quality;
939     
940     if(   pic->data[0] == pic_arg->data[0] 
941        && pic->data[1] == pic_arg->data[1]
942        && pic->data[2] == pic_arg->data[2]){
943        // empty
944     }else{
945         int h_chroma_shift, v_chroma_shift;
946         
947         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
948         
949         for(i=0; i<3; i++){
950             int src_stride= pic_arg->linesize[i];
951             int dst_stride= i ? s->uvlinesize : s->linesize;
952             int h_shift= i ? h_chroma_shift : 0;
953             int v_shift= i ? v_chroma_shift : 0;
954             int w= s->width >>h_shift;
955             int h= s->height>>v_shift;
956             uint8_t *src= pic_arg->data[i];
957             uint8_t *dst= pic->data[i] + 16;
958             
959             if(src_stride==dst_stride)
960                 memcpy(dst, src, src_stride*h);
961             else{
962                 while(h--){
963                     memcpy(dst, src, w);
964                     dst += dst_stride;
965                     src += src_stride;
966                 }
967             }
968         }
969     }
970
971     return 0;
972 }
973
974 static void select_input_picture(MpegEncContext *s){
975     int i;
976     const int encoding_delay= s->max_b_frames;
977     int coded_pic_num=0;    
978
979     if(s->reordered_input_picture[0])
980         coded_pic_num= s->reordered_input_picture[0]->coded_picture_number + 1;
981 //printf("cpn:%d\n", coded_pic_num);
982     for(i=1; i<MAX_PICTURE_COUNT; i++)
983         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
984     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
985
986     /* set next picture types & ordering */
987     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
988         if(s->input_picture[0]->pict_type){
989             /* user selected pict_type */
990             if(s->input_picture[0]->pict_type == I_TYPE){
991                 s->reordered_input_picture[0]= s->input_picture[0];
992                 s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
993             }else{
994                 int b_frames;
995
996                 for(b_frames=0; b_frames<s->max_b_frames+1; b_frames++){
997                     if(s->input_picture[b_frames]->pict_type!=B_TYPE) break;
998                 }
999                 
1000                 if(b_frames > s->max_b_frames){
1001                     fprintf(stderr, "warning, too many bframes in a row\n");
1002                     b_frames = s->max_b_frames;
1003                     s->input_picture[b_frames]->pict_type= I_TYPE;
1004                 }
1005                 
1006                 s->reordered_input_picture[0]= s->input_picture[b_frames];
1007                 s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
1008                 for(i=0; i<b_frames; i++){
1009                     coded_pic_num++;
1010                     s->reordered_input_picture[i+1]= s->input_picture[i];
1011                     s->reordered_input_picture[i+1]->coded_picture_number= coded_pic_num;
1012                 }    
1013             }
1014         }else{
1015             if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture.data[0]==NULL || s->intra_only){
1016                 s->reordered_input_picture[0]= s->input_picture[0];
1017                 s->reordered_input_picture[0]->pict_type= I_TYPE;
1018                 s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
1019             }else{
1020                 s->reordered_input_picture[0]= s->input_picture[s->max_b_frames];
1021                 if(s->picture_in_gop_number + s->max_b_frames >= s->gop_size)
1022                     s->reordered_input_picture[0]->pict_type= I_TYPE;
1023                 else
1024                     s->reordered_input_picture[0]->pict_type= P_TYPE;
1025                 s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
1026                 for(i=0; i<s->max_b_frames; i++){
1027                     coded_pic_num++;
1028                     s->reordered_input_picture[i+1]= s->input_picture[i];
1029                     s->reordered_input_picture[i+1]->pict_type= B_TYPE;
1030                     s->reordered_input_picture[i+1]->coded_picture_number= coded_pic_num;
1031                 }    
1032             }
1033         }
1034     }
1035     
1036     if(s->reordered_input_picture[0]){
1037         if(s->reordered_input_picture[0]->pict_type==B_TYPE){
1038             s->reordered_input_picture[0]->reference=0;
1039         }
1040         s->current_picture= *s->reordered_input_picture[0];
1041         s->new_picture= s->current_picture;
1042         s->new_picture.data[0]+=16;
1043         s->new_picture.data[1]+=16;
1044         s->new_picture.data[2]+=16;
1045     
1046         s->picture_number= s->new_picture.display_picture_number;
1047 //printf("dpn:%d\n", s->picture_number);
1048     }else{
1049        memset(&s->new_picture, 0, sizeof(Picture));
1050     }
1051 }
1052
1053 int MPV_encode_picture(AVCodecContext *avctx,
1054                        unsigned char *buf, int buf_size, void *data)
1055 {
1056     MpegEncContext *s = avctx->priv_data;
1057     AVVideoFrame *pic_arg = data;
1058     int i;
1059
1060     init_put_bits(&s->pb, buf, buf_size, NULL, NULL);
1061
1062     s->picture_in_gop_number++;
1063
1064     load_input_picture(s, pic_arg);
1065     
1066     select_input_picture(s);
1067     
1068     /* output? */
1069     if(s->new_picture.data[0]){
1070
1071         s->pict_type= s->new_picture.pict_type;
1072         if (s->fixed_qscale){ /* the ratecontrol needs the last qscale so we dont touch it for CBR */
1073             s->qscale= (int)(s->new_picture.quality+0.5);
1074             assert(s->qscale);
1075         }
1076 //emms_c();
1077 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
1078         MPV_frame_start(s, avctx);
1079
1080         encode_picture(s, s->picture_number);
1081         
1082         avctx->real_pict_num  = s->picture_number;
1083         avctx->header_bits = s->header_bits;
1084         avctx->mv_bits     = s->mv_bits;
1085         avctx->misc_bits   = s->misc_bits;
1086         avctx->i_tex_bits  = s->i_tex_bits;
1087         avctx->p_tex_bits  = s->p_tex_bits;
1088         avctx->i_count     = s->i_count;
1089         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
1090         avctx->skip_count  = s->skip_count;
1091
1092         MPV_frame_end(s);
1093
1094         if (s->out_format == FMT_MJPEG)
1095             mjpeg_picture_trailer(s);
1096         
1097         if(s->flags&CODEC_FLAG_PASS1)
1098             ff_write_pass1_stats(s);
1099     }
1100
1101     s->input_picture_number++;
1102
1103     flush_put_bits(&s->pb);
1104     s->frame_bits  = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
1105     
1106     s->total_bits += s->frame_bits;
1107     avctx->frame_bits  = s->frame_bits;
1108
1109     for(i=0; i<4; i++){
1110         avctx->error[i] += s->current_picture.error[i];
1111     }
1112     
1113     return pbBufPtr(&s->pb) - s->pb.buf;
1114 }
1115
1116 static inline void gmc1_motion(MpegEncContext *s,
1117                                UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
1118                                int dest_offset,
1119                                UINT8 **ref_picture, int src_offset)
1120 {
1121     UINT8 *ptr;
1122     int offset, src_x, src_y, linesize, uvlinesize;
1123     int motion_x, motion_y;
1124     int emu=0;
1125
1126     motion_x= s->sprite_offset[0][0];
1127     motion_y= s->sprite_offset[0][1];
1128     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
1129     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
1130     motion_x<<=(3-s->sprite_warping_accuracy);
1131     motion_y<<=(3-s->sprite_warping_accuracy);
1132     src_x = clip(src_x, -16, s->width);
1133     if (src_x == s->width)
1134         motion_x =0;
1135     src_y = clip(src_y, -16, s->height);
1136     if (src_y == s->height)
1137         motion_y =0;
1138
1139     linesize = s->linesize;
1140     uvlinesize = s->uvlinesize;
1141     
1142     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
1143
1144     dest_y+=dest_offset;
1145     if(s->flags&CODEC_FLAG_EMU_EDGE){
1146         if(src_x<0 || src_y<0 || src_x + (motion_x&15) + 16 > s->h_edge_pos
1147                               || src_y + (motion_y&15) + 16 > s->v_edge_pos){
1148             emulated_edge_mc(s, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1149             ptr= s->edge_emu_buffer;
1150             emu=1;
1151         }
1152     }
1153     
1154     if((motion_x|motion_y)&7){
1155         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1156         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1157     }else{
1158         int dxy;
1159         
1160         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
1161         if (s->no_rounding){
1162             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
1163         }else{
1164             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
1165         }
1166     }
1167     
1168     if(s->flags&CODEC_FLAG_GRAY) return;
1169
1170     motion_x= s->sprite_offset[1][0];
1171     motion_y= s->sprite_offset[1][1];
1172     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
1173     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
1174     motion_x<<=(3-s->sprite_warping_accuracy);
1175     motion_y<<=(3-s->sprite_warping_accuracy);
1176     src_x = clip(src_x, -8, s->width>>1);
1177     if (src_x == s->width>>1)
1178         motion_x =0;
1179     src_y = clip(src_y, -8, s->height>>1);
1180     if (src_y == s->height>>1)
1181         motion_y =0;
1182
1183     offset = (src_y * uvlinesize) + src_x + (src_offset>>1);
1184     ptr = ref_picture[1] + offset;
1185     if(emu){
1186         emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1187         ptr= s->edge_emu_buffer;
1188     }
1189     s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1190     
1191     ptr = ref_picture[2] + offset;
1192     if(emu){
1193         emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1194         ptr= s->edge_emu_buffer;
1195     }
1196     s->dsp.gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1197     
1198     return;
1199 }
1200
1201 static inline void gmc_motion(MpegEncContext *s,
1202                                UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
1203                                int dest_offset,
1204                                UINT8 **ref_picture, int src_offset)
1205 {
1206     UINT8 *ptr;
1207     int linesize, uvlinesize;
1208     const int a= s->sprite_warping_accuracy;
1209     int ox, oy;
1210
1211     linesize = s->linesize;
1212     uvlinesize = s->uvlinesize;
1213
1214     ptr = ref_picture[0] + src_offset;
1215
1216     dest_y+=dest_offset;
1217     
1218     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
1219     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
1220
1221     s->dsp.gmc(dest_y, ptr, linesize, 16,
1222            ox, 
1223            oy, 
1224            s->sprite_delta[0][0], s->sprite_delta[0][1],
1225            s->sprite_delta[1][0], s->sprite_delta[1][1], 
1226            a+1, (1<<(2*a+1)) - s->no_rounding,
1227            s->h_edge_pos, s->v_edge_pos);
1228     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
1229            ox + s->sprite_delta[0][0]*8, 
1230            oy + s->sprite_delta[1][0]*8, 
1231            s->sprite_delta[0][0], s->sprite_delta[0][1],
1232            s->sprite_delta[1][0], s->sprite_delta[1][1], 
1233            a+1, (1<<(2*a+1)) - s->no_rounding,
1234            s->h_edge_pos, s->v_edge_pos);
1235
1236     if(s->flags&CODEC_FLAG_GRAY) return;
1237
1238
1239     dest_cb+=dest_offset>>1;
1240     dest_cr+=dest_offset>>1;
1241     
1242     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
1243     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
1244
1245     ptr = ref_picture[1] + (src_offset>>1);
1246     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
1247            ox, 
1248            oy, 
1249            s->sprite_delta[0][0], s->sprite_delta[0][1],
1250            s->sprite_delta[1][0], s->sprite_delta[1][1], 
1251            a+1, (1<<(2*a+1)) - s->no_rounding,
1252            s->h_edge_pos>>1, s->v_edge_pos>>1);
1253     
1254     ptr = ref_picture[2] + (src_offset>>1);
1255     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
1256            ox, 
1257            oy, 
1258            s->sprite_delta[0][0], s->sprite_delta[0][1],
1259            s->sprite_delta[1][0], s->sprite_delta[1][1], 
1260            a+1, (1<<(2*a+1)) - s->no_rounding,
1261            s->h_edge_pos>>1, s->v_edge_pos>>1);
1262 }
1263
1264
1265 static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h, 
1266                                     int src_x, int src_y, int w, int h){
1267     int x, y;
1268     int start_y, start_x, end_y, end_x;
1269     UINT8 *buf= s->edge_emu_buffer;
1270
1271     if(src_y>= h){
1272         src+= (h-1-src_y)*linesize;
1273         src_y=h-1;
1274     }else if(src_y<=-block_h){
1275         src+= (1-block_h-src_y)*linesize;
1276         src_y=1-block_h;
1277     }
1278     if(src_x>= w){
1279         src+= (w-1-src_x);
1280         src_x=w-1;
1281     }else if(src_x<=-block_w){
1282         src+= (1-block_w-src_x);
1283         src_x=1-block_w;
1284     }
1285
1286     start_y= FFMAX(0, -src_y);
1287     start_x= FFMAX(0, -src_x);
1288     end_y= FFMIN(block_h, h-src_y);
1289     end_x= FFMIN(block_w, w-src_x);
1290
1291     // copy existing part
1292     for(y=start_y; y<end_y; y++){
1293         for(x=start_x; x<end_x; x++){
1294             buf[x + y*linesize]= src[x + y*linesize];
1295         }
1296     }
1297
1298     //top
1299     for(y=0; y<start_y; y++){
1300         for(x=start_x; x<end_x; x++){
1301             buf[x + y*linesize]= buf[x + start_y*linesize];
1302         }
1303     }
1304
1305     //bottom
1306     for(y=end_y; y<block_h; y++){
1307         for(x=start_x; x<end_x; x++){
1308             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
1309         }
1310     }
1311                                     
1312     for(y=0; y<block_h; y++){
1313        //left
1314         for(x=0; x<start_x; x++){
1315             buf[x + y*linesize]= buf[start_x + y*linesize];
1316         }
1317        
1318        //right
1319         for(x=end_x; x<block_w; x++){
1320             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
1321         }
1322     }
1323 }
1324
1325
1326 /* apply one mpeg motion vector to the three components */
1327 static inline void mpeg_motion(MpegEncContext *s,
1328                                UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
1329                                int dest_offset,
1330                                UINT8 **ref_picture, int src_offset,
1331                                int field_based, op_pixels_func (*pix_op)[4],
1332                                int motion_x, int motion_y, int h)
1333 {
1334     UINT8 *ptr;
1335     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
1336     int emu=0;
1337 #if 0    
1338 if(s->quarter_sample)
1339 {
1340     motion_x>>=1;
1341     motion_y>>=1;
1342 }
1343 #endif
1344     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
1345     src_x = s->mb_x * 16 + (motion_x >> 1);
1346     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 1);
1347                 
1348     /* WARNING: do no forget half pels */
1349     height = s->height >> field_based;
1350     v_edge_pos = s->v_edge_pos >> field_based;
1351     src_x = clip(src_x, -16, s->width);
1352     if (src_x == s->width)
1353         dxy &= ~1;
1354     src_y = clip(src_y, -16, height);
1355     if (src_y == height)
1356         dxy &= ~2;
1357     linesize   = s->linesize << field_based;
1358     uvlinesize = s->uvlinesize << field_based;
1359     ptr = ref_picture[0] + (src_y * linesize) + (src_x) + src_offset;
1360     dest_y += dest_offset;
1361
1362     if(s->flags&CODEC_FLAG_EMU_EDGE){
1363         if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 16 > s->h_edge_pos
1364                               || src_y + (motion_y&1) + h  > v_edge_pos){
1365             emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based, 
1366                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
1367             ptr= s->edge_emu_buffer + src_offset;
1368             emu=1;
1369         }
1370     }
1371     pix_op[0][dxy](dest_y, ptr, linesize, h);
1372
1373     if(s->flags&CODEC_FLAG_GRAY) return;
1374
1375     if (s->out_format == FMT_H263) {
1376         dxy = 0;
1377         if ((motion_x & 3) != 0)
1378             dxy |= 1;
1379         if ((motion_y & 3) != 0)
1380             dxy |= 2;
1381         mx = motion_x >> 2;
1382         my = motion_y >> 2;
1383     } else {
1384         mx = motion_x / 2;
1385         my = motion_y / 2;
1386         dxy = ((my & 1) << 1) | (mx & 1);
1387         mx >>= 1;
1388         my >>= 1;
1389     }
1390     
1391     src_x = s->mb_x * 8 + mx;
1392     src_y = s->mb_y * (8 >> field_based) + my;
1393     src_x = clip(src_x, -8, s->width >> 1);
1394     if (src_x == (s->width >> 1))
1395         dxy &= ~1;
1396     src_y = clip(src_y, -8, height >> 1);
1397     if (src_y == (height >> 1))
1398         dxy &= ~2;
1399     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
1400     ptr = ref_picture[1] + offset;
1401     if(emu){
1402         emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, 
1403                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1404         ptr= s->edge_emu_buffer + (src_offset >> 1);
1405     }
1406     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
1407
1408     ptr = ref_picture[2] + offset;
1409     if(emu){
1410         emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, 
1411                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1412         ptr= s->edge_emu_buffer + (src_offset >> 1);
1413     }
1414     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
1415 }
1416
1417 static inline void qpel_motion(MpegEncContext *s,
1418                                UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
1419                                int dest_offset,
1420                                UINT8 **ref_picture, int src_offset,
1421                                int field_based, op_pixels_func (*pix_op)[4],
1422                                qpel_mc_func (*qpix_op)[16],
1423                                int motion_x, int motion_y, int h)
1424 {
1425     UINT8 *ptr;
1426     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
1427     int emu=0;
1428
1429     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
1430     src_x = s->mb_x * 16 + (motion_x >> 2);
1431     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
1432
1433     height = s->height >> field_based;
1434     v_edge_pos = s->v_edge_pos >> field_based;
1435     src_x = clip(src_x, -16, s->width);
1436     if (src_x == s->width)
1437         dxy &= ~3;
1438     src_y = clip(src_y, -16, height);
1439     if (src_y == height)
1440         dxy &= ~12;
1441     linesize = s->linesize << field_based;
1442     uvlinesize = s->uvlinesize << field_based;
1443     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
1444     dest_y += dest_offset;
1445 //printf("%d %d %d\n", src_x, src_y, dxy);
1446     
1447     if(s->flags&CODEC_FLAG_EMU_EDGE){
1448         if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 16 > s->h_edge_pos
1449                               || src_y + (motion_y&3) + h  > v_edge_pos){
1450             emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based, 
1451                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
1452             ptr= s->edge_emu_buffer + src_offset;
1453             emu=1;
1454         }
1455     }
1456     if(!field_based)
1457         qpix_op[0][dxy](dest_y, ptr, linesize);
1458     else{
1459         //damn interlaced mode
1460         //FIXME boundary mirroring is not exactly correct here
1461         qpix_op[1][dxy](dest_y  , ptr  , linesize);
1462         qpix_op[1][dxy](dest_y+8, ptr+8, linesize);
1463     }
1464
1465     if(s->flags&CODEC_FLAG_GRAY) return;
1466
1467     if(field_based){
1468         mx= motion_x/2;
1469         my= motion_y>>1;
1470     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
1471         mx= (motion_x>>1)|(motion_x&1);
1472         my= (motion_y>>1)|(motion_y&1);
1473     }else{
1474         mx= motion_x/2;
1475         my= motion_y/2;
1476     }
1477     mx= (mx>>1)|(mx&1);
1478     my= (my>>1)|(my&1);
1479     dxy= (mx&1) | ((my&1)<<1);
1480     mx>>=1;
1481     my>>=1;
1482
1483     src_x = s->mb_x * 8 + mx;
1484     src_y = s->mb_y * (8 >> field_based) + my;
1485     src_x = clip(src_x, -8, s->width >> 1);
1486     if (src_x == (s->width >> 1))
1487         dxy &= ~1;
1488     src_y = clip(src_y, -8, height >> 1);
1489     if (src_y == (height >> 1))
1490         dxy &= ~2;
1491
1492     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
1493     ptr = ref_picture[1] + offset;
1494     if(emu){
1495         emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, 
1496                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1497         ptr= s->edge_emu_buffer + (src_offset >> 1);
1498     }
1499     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
1500     
1501     ptr = ref_picture[2] + offset;
1502     if(emu){
1503         emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, 
1504                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1505         ptr= s->edge_emu_buffer + (src_offset >> 1);
1506     }
1507     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
1508 }
1509
1510
1511 static inline void MPV_motion(MpegEncContext *s, 
1512                               UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
1513                               int dir, UINT8 **ref_picture, 
1514                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
1515 {
1516     int dxy, offset, mx, my, src_x, src_y, motion_x, motion_y;
1517     int mb_x, mb_y, i;
1518     UINT8 *ptr, *dest;
1519     int emu=0;
1520
1521     mb_x = s->mb_x;
1522     mb_y = s->mb_y;
1523
1524     switch(s->mv_type) {
1525     case MV_TYPE_16X16:
1526         if(s->mcsel){
1527             if(s->real_sprite_warping_points==1){
1528                 gmc1_motion(s, dest_y, dest_cb, dest_cr, 0,
1529                             ref_picture, 0);
1530             }else{
1531                 gmc_motion(s, dest_y, dest_cb, dest_cr, 0,
1532                             ref_picture, 0);
1533             }
1534         }else if(s->quarter_sample){
1535             qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
1536                         ref_picture, 0,
1537                         0, pix_op, qpix_op,
1538                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
1539         }else{
1540             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
1541                         ref_picture, 0,
1542                         0, pix_op,
1543                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
1544         }           
1545         break;
1546     case MV_TYPE_8X8:
1547         mx = 0;
1548         my = 0;
1549         if(s->quarter_sample){
1550             for(i=0;i<4;i++) {
1551                 motion_x = s->mv[dir][i][0];
1552                 motion_y = s->mv[dir][i][1];
1553
1554                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
1555                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
1556                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
1557                     
1558                 /* WARNING: do no forget half pels */
1559                 src_x = clip(src_x, -16, s->width);
1560                 if (src_x == s->width)
1561                     dxy &= ~3;
1562                 src_y = clip(src_y, -16, s->height);
1563                 if (src_y == s->height)
1564                     dxy &= ~12;
1565                     
1566                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
1567                 if(s->flags&CODEC_FLAG_EMU_EDGE){
1568                     if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 8 > s->h_edge_pos
1569                                           || src_y + (motion_y&3) + 8 > s->v_edge_pos){
1570                         emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1571                         ptr= s->edge_emu_buffer;
1572                     }
1573                 }
1574                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
1575                 qpix_op[1][dxy](dest, ptr, s->linesize);
1576
1577                 mx += s->mv[dir][i][0]/2;
1578                 my += s->mv[dir][i][1]/2;
1579             }
1580         }else{
1581             for(i=0;i<4;i++) {
1582                 motion_x = s->mv[dir][i][0];
1583                 motion_y = s->mv[dir][i][1];
1584
1585                 dxy = ((motion_y & 1) << 1) | (motion_x & 1);
1586                 src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8;
1587                 src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8;
1588                     
1589                 /* WARNING: do no forget half pels */
1590                 src_x = clip(src_x, -16, s->width);
1591                 if (src_x == s->width)
1592                     dxy &= ~1;
1593                 src_y = clip(src_y, -16, s->height);
1594                 if (src_y == s->height)
1595                     dxy &= ~2;
1596                     
1597                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
1598                 if(s->flags&CODEC_FLAG_EMU_EDGE){
1599                     if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 8 > s->h_edge_pos
1600                                           || src_y + (motion_y&1) + 8 > s->v_edge_pos){
1601                         emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1602                         ptr= s->edge_emu_buffer;
1603                     }
1604                 }
1605                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
1606                 pix_op[1][dxy](dest, ptr, s->linesize, 8);
1607
1608                 mx += s->mv[dir][i][0];
1609                 my += s->mv[dir][i][1];
1610             }
1611         }
1612
1613         if(s->flags&CODEC_FLAG_GRAY) break;
1614         /* In case of 8X8, we construct a single chroma motion vector
1615            with a special rounding */
1616         for(i=0;i<4;i++) {
1617         }
1618         if (mx >= 0)
1619             mx = (h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1));
1620         else {
1621             mx = -mx;
1622             mx = -(h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1));
1623         }
1624         if (my >= 0)
1625             my = (h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1));
1626         else {
1627             my = -my;
1628             my = -(h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1));
1629         }
1630         dxy = ((my & 1) << 1) | (mx & 1);
1631         mx >>= 1;
1632         my >>= 1;
1633
1634         src_x = mb_x * 8 + mx;
1635         src_y = mb_y * 8 + my;
1636         src_x = clip(src_x, -8, s->width/2);
1637         if (src_x == s->width/2)
1638             dxy &= ~1;
1639         src_y = clip(src_y, -8, s->height/2);
1640         if (src_y == s->height/2)
1641             dxy &= ~2;
1642         
1643         offset = (src_y * (s->uvlinesize)) + src_x;
1644         ptr = ref_picture[1] + offset;
1645         if(s->flags&CODEC_FLAG_EMU_EDGE){
1646                 if(src_x<0 || src_y<0 || src_x + (dxy &1) + 8 > s->h_edge_pos>>1
1647                                       || src_y + (dxy>>1) + 8 > s->v_edge_pos>>1){
1648                     emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1649                     ptr= s->edge_emu_buffer;
1650                     emu=1;
1651                 }
1652             }
1653         pix_op[1][dxy](dest_cb, ptr, s->uvlinesize, 8);
1654
1655         ptr = ref_picture[2] + offset;
1656         if(emu){
1657             emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1658             ptr= s->edge_emu_buffer;
1659         }
1660         pix_op[1][dxy](dest_cr, ptr, s->uvlinesize, 8);
1661         break;
1662     case MV_TYPE_FIELD:
1663         if (s->picture_structure == PICT_FRAME) {
1664             if(s->quarter_sample){
1665                 /* top field */
1666                 qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
1667                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
1668                             1, pix_op, qpix_op,
1669                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
1670                 /* bottom field */
1671                 qpel_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
1672                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
1673                             1, pix_op, qpix_op,
1674                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
1675             }else{
1676                 /* top field */       
1677                 mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
1678                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
1679                             1, pix_op,
1680                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
1681                 /* bottom field */
1682                 mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
1683                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
1684                             1, pix_op,
1685                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
1686             }
1687         } else {
1688             
1689
1690         }
1691         break;
1692     }
1693 }
1694
1695
1696 /* put block[] to dest[] */
1697 static inline void put_dct(MpegEncContext *s, 
1698                            DCTELEM *block, int i, UINT8 *dest, int line_size)
1699 {
1700     s->dct_unquantize(s, block, i, s->qscale);
1701     s->idct_put (dest, line_size, block);
1702 }
1703
1704 /* add block[] to dest[] */
1705 static inline void add_dct(MpegEncContext *s, 
1706                            DCTELEM *block, int i, UINT8 *dest, int line_size)
1707 {
1708     if (s->block_last_index[i] >= 0) {
1709         s->idct_add (dest, line_size, block);
1710     }
1711 }
1712
1713 static inline void add_dequant_dct(MpegEncContext *s, 
1714                            DCTELEM *block, int i, UINT8 *dest, int line_size)
1715 {
1716     if (s->block_last_index[i] >= 0) {
1717         s->dct_unquantize(s, block, i, s->qscale);
1718
1719         s->idct_add (dest, line_size, block);
1720     }
1721 }
1722
1723 /**
1724  * cleans dc, ac, coded_block for the current non intra MB
1725  */
1726 void ff_clean_intra_table_entries(MpegEncContext *s)
1727 {
1728     int wrap = s->block_wrap[0];
1729     int xy = s->block_index[0];
1730     
1731     s->dc_val[0][xy           ] = 
1732     s->dc_val[0][xy + 1       ] = 
1733     s->dc_val[0][xy     + wrap] =
1734     s->dc_val[0][xy + 1 + wrap] = 1024;
1735     /* ac pred */
1736     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(INT16));
1737     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(INT16));
1738     if (s->msmpeg4_version>=3) {
1739         s->coded_block[xy           ] =
1740         s->coded_block[xy + 1       ] =
1741         s->coded_block[xy     + wrap] =
1742         s->coded_block[xy + 1 + wrap] = 0;
1743     }
1744     /* chroma */
1745     wrap = s->block_wrap[4];
1746     xy = s->mb_x + 1 + (s->mb_y + 1) * wrap;
1747     s->dc_val[1][xy] =
1748     s->dc_val[2][xy] = 1024;
1749     /* ac pred */
1750     memset(s->ac_val[1][xy], 0, 16 * sizeof(INT16));
1751     memset(s->ac_val[2][xy], 0, 16 * sizeof(INT16));
1752     
1753     s->mbintra_table[s->mb_x + s->mb_y*s->mb_width]= 0;
1754 }
1755
1756 /* generic function called after a macroblock has been parsed by the
1757    decoder or after it has been encoded by the encoder.
1758
1759    Important variables used:
1760    s->mb_intra : true if intra macroblock
1761    s->mv_dir   : motion vector direction
1762    s->mv_type  : motion vector type
1763    s->mv       : motion vector
1764    s->interlaced_dct : true if interlaced dct used (mpeg2)
1765  */
1766 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
1767 {
1768     int mb_x, mb_y;
1769     const int mb_xy = s->mb_y * s->mb_width + s->mb_x;
1770
1771     mb_x = s->mb_x;
1772     mb_y = s->mb_y;
1773
1774     s->current_picture.qscale_table[mb_xy]= s->qscale;
1775
1776     /* update DC predictors for P macroblocks */
1777     if (!s->mb_intra) {
1778         if (s->h263_pred || s->h263_aic) {
1779             if(s->mbintra_table[mb_xy])
1780                 ff_clean_intra_table_entries(s);
1781         } else {
1782             s->last_dc[0] =
1783             s->last_dc[1] =
1784             s->last_dc[2] = 128 << s->intra_dc_precision;
1785         }
1786     }
1787     else if (s->h263_pred || s->h263_aic)
1788         s->mbintra_table[mb_xy]=1;
1789
1790     /* update motion predictor, not for B-frames as they need the motion_val from the last P/S-Frame */
1791     if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE) { //FIXME move into h263.c if possible, format specific stuff shouldnt be here
1792         
1793         const int wrap = s->block_wrap[0];
1794         const int xy = s->block_index[0];
1795         const int mb_index= s->mb_x + s->mb_y*s->mb_width;
1796         if(s->mv_type == MV_TYPE_8X8){
1797             s->co_located_type_table[mb_index]= CO_LOCATED_TYPE_4MV;
1798         } else {
1799             int motion_x, motion_y;
1800             if (s->mb_intra) {
1801                 motion_x = 0;
1802                 motion_y = 0;
1803                 if(s->co_located_type_table)
1804                     s->co_located_type_table[mb_index]= 0;
1805             } else if (s->mv_type == MV_TYPE_16X16) {
1806                 motion_x = s->mv[0][0][0];
1807                 motion_y = s->mv[0][0][1];
1808                 if(s->co_located_type_table)
1809                     s->co_located_type_table[mb_index]= 0;
1810             } else /*if (s->mv_type == MV_TYPE_FIELD)*/ {
1811                 int i;
1812                 motion_x = s->mv[0][0][0] + s->mv[0][1][0];
1813                 motion_y = s->mv[0][0][1] + s->mv[0][1][1];
1814                 motion_x = (motion_x>>1) | (motion_x&1);
1815                 for(i=0; i<2; i++){
1816                     s->field_mv_table[mb_index][i][0]= s->mv[0][i][0];
1817                     s->field_mv_table[mb_index][i][1]= s->mv[0][i][1];
1818                     s->field_select_table[mb_index][i]= s->field_select[0][i];
1819                 }
1820                 s->co_located_type_table[mb_index]= CO_LOCATED_TYPE_FIELDMV;
1821             }
1822             /* no update if 8X8 because it has been done during parsing */
1823             s->motion_val[xy][0] = motion_x;
1824             s->motion_val[xy][1] = motion_y;
1825             s->motion_val[xy + 1][0] = motion_x;
1826             s->motion_val[xy + 1][1] = motion_y;
1827             s->motion_val[xy + wrap][0] = motion_x;
1828             s->motion_val[xy + wrap][1] = motion_y;
1829             s->motion_val[xy + 1 + wrap][0] = motion_x;
1830             s->motion_val[xy + 1 + wrap][1] = motion_y;
1831         }
1832     }
1833     
1834     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
1835         UINT8 *dest_y, *dest_cb, *dest_cr;
1836         int dct_linesize, dct_offset;
1837         op_pixels_func (*op_pix)[4];
1838         qpel_mc_func (*op_qpix)[16];
1839
1840         /* avoid copy if macroblock skipped in last frame too */
1841         if (s->pict_type != B_TYPE) {
1842             s->current_picture.mbskip_table[mb_xy]= s->mb_skiped;
1843         }
1844
1845         /* skip only during decoding as we might trash the buffers during encoding a bit */
1846         if(!s->encoding){
1847             UINT8 *mbskip_ptr = &s->mbskip_table[mb_xy];
1848             const int age= s->current_picture.age;
1849
1850             assert(age);
1851
1852             if (s->mb_skiped) {
1853                 s->mb_skiped= 0;
1854                 assert(s->pict_type!=I_TYPE);
1855  
1856                 (*mbskip_ptr) ++; /* indicate that this time we skiped it */
1857                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
1858
1859                 /* if previous was skipped too, then nothing to do !  */
1860                 if (*mbskip_ptr >= age){
1861 //if(s->pict_type!=B_TYPE && s->mb_x==0) printf("\n");
1862 //if(s->pict_type!=B_TYPE) printf("%d%d ", *mbskip_ptr, age);
1863                     if(s->pict_type!=B_TYPE) return;
1864                     if(s->avctx->draw_horiz_band==NULL && *mbskip_ptr > age) return; 
1865                     /* we dont draw complete frames here so we cant skip */
1866                 }
1867             } else {
1868                 *mbskip_ptr = 0; /* not skipped */
1869             }
1870         }else
1871             s->mb_skiped= 0;
1872
1873         if(s->pict_type==B_TYPE && s->avctx->draw_horiz_band){
1874             dest_y  = s->current_picture.data[0] + mb_x * 16;
1875             dest_cb = s->current_picture.data[1] + mb_x * 8;
1876             dest_cr = s->current_picture.data[2] + mb_x * 8;
1877         }else{
1878             dest_y  = s->current_picture.data[0] + (mb_y * 16* s->linesize  ) + mb_x * 16;
1879             dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
1880             dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
1881         }
1882
1883         if (s->interlaced_dct) {
1884             dct_linesize = s->linesize * 2;
1885             dct_offset = s->linesize;
1886         } else {
1887             dct_linesize = s->linesize;
1888             dct_offset = s->linesize * 8;
1889         }
1890
1891         if (!s->mb_intra) {
1892             /* motion handling */
1893             /* decoding or more than one mb_type (MC was allready done otherwise) */
1894             if((!s->encoding) || (s->mb_type[mb_xy]&(s->mb_type[mb_xy]-1))){
1895                 if ((!s->no_rounding) || s->pict_type==B_TYPE){                
1896                     op_pix = s->dsp.put_pixels_tab;
1897                     op_qpix= s->dsp.put_qpel_pixels_tab;
1898                 }else{
1899                     op_pix = s->dsp.put_no_rnd_pixels_tab;
1900                     op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
1901                 }
1902
1903                 if (s->mv_dir & MV_DIR_FORWARD) {
1904                     MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
1905                     op_pix = s->dsp.avg_pixels_tab;
1906                     op_qpix= s->dsp.avg_qpel_pixels_tab;
1907                 }
1908                 if (s->mv_dir & MV_DIR_BACKWARD) {
1909                     MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
1910                 }
1911             }
1912
1913             /* skip dequant / idct if we are really late ;) */
1914             if(s->hurry_up>1) return;
1915
1916             /* add dct residue */
1917             if(s->encoding || !(   s->mpeg2 || s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO 
1918                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
1919                 add_dequant_dct(s, block[0], 0, dest_y, dct_linesize);
1920                 add_dequant_dct(s, block[1], 1, dest_y + 8, dct_linesize);
1921                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
1922                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
1923
1924                 if(!(s->flags&CODEC_FLAG_GRAY)){
1925                     add_dequant_dct(s, block[4], 4, dest_cb, s->uvlinesize);
1926                     add_dequant_dct(s, block[5], 5, dest_cr, s->uvlinesize);
1927                 }
1928             } else {
1929                 add_dct(s, block[0], 0, dest_y, dct_linesize);
1930                 add_dct(s, block[1], 1, dest_y + 8, dct_linesize);
1931                 add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
1932                 add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
1933
1934                 if(!(s->flags&CODEC_FLAG_GRAY)){
1935                     add_dct(s, block[4], 4, dest_cb, s->uvlinesize);
1936                     add_dct(s, block[5], 5, dest_cr, s->uvlinesize);
1937                 }
1938             }
1939         } else {
1940             /* dct only in intra block */
1941             if(s->encoding || !(s->mpeg2 || s->codec_id==CODEC_ID_MPEG1VIDEO)){
1942                 put_dct(s, block[0], 0, dest_y, dct_linesize);
1943                 put_dct(s, block[1], 1, dest_y + 8, dct_linesize);
1944                 put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
1945                 put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
1946
1947                 if(!(s->flags&CODEC_FLAG_GRAY)){
1948                     put_dct(s, block[4], 4, dest_cb, s->uvlinesize);
1949                     put_dct(s, block[5], 5, dest_cr, s->uvlinesize);
1950                 }
1951             }else{
1952                 s->idct_put(dest_y                 , dct_linesize, block[0]);
1953                 s->idct_put(dest_y              + 8, dct_linesize, block[1]);
1954                 s->idct_put(dest_y + dct_offset    , dct_linesize, block[2]);
1955                 s->idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]);
1956
1957                 if(!(s->flags&CODEC_FLAG_GRAY)){
1958                     s->idct_put(dest_cb, s->uvlinesize, block[4]);
1959                     s->idct_put(dest_cr, s->uvlinesize, block[5]);
1960                 }
1961             }
1962         }
1963     }
1964 }
1965
1966 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
1967 {
1968     static const char tab[64]=
1969         {3,2,2,1,1,1,1,1,
1970          1,1,1,1,1,1,1,1,
1971          1,1,1,1,1,1,1,1,
1972          0,0,0,0,0,0,0,0,
1973          0,0,0,0,0,0,0,0,
1974          0,0,0,0,0,0,0,0,
1975          0,0,0,0,0,0,0,0,
1976          0,0,0,0,0,0,0,0};
1977     int score=0;
1978     int run=0;
1979     int i;
1980     DCTELEM *block= s->block[n];
1981     const int last_index= s->block_last_index[n];
1982     int skip_dc;
1983
1984     if(threshold<0){
1985         skip_dc=0;
1986         threshold= -threshold;
1987     }else
1988         skip_dc=1;
1989
1990     /* are all which we could set to zero are allready zero? */
1991     if(last_index<=skip_dc - 1) return;
1992
1993     for(i=0; i<=last_index; i++){
1994         const int j = s->intra_scantable.permutated[i];
1995         const int level = ABS(block[j]);
1996         if(level==1){
1997             if(skip_dc && i==0) continue;
1998             score+= tab[run];
1999             run=0;
2000         }else if(level>1){
2001             return;
2002         }else{
2003             run++;
2004         }
2005     }
2006     if(score >= threshold) return;
2007     for(i=skip_dc; i<=last_index; i++){
2008         const int j = s->intra_scantable.permutated[i];
2009         block[j]=0;
2010     }
2011     if(block[0]) s->block_last_index[n]= 0;
2012     else         s->block_last_index[n]= -1;
2013 }
2014
2015 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
2016 {
2017     int i;
2018     const int maxlevel= s->max_qcoeff;
2019     const int minlevel= s->min_qcoeff;
2020     
2021     if(s->mb_intra){
2022         i=1; //skip clipping of intra dc
2023     }else
2024         i=0;
2025     
2026     for(;i<=last_index; i++){
2027         const int j= s->intra_scantable.permutated[i];
2028         int level = block[j];
2029        
2030         if     (level>maxlevel) level=maxlevel;
2031         else if(level<minlevel) level=minlevel;
2032         block[j]= level;
2033     }
2034 }
2035
2036 static inline void requantize_coeffs(MpegEncContext *s, DCTELEM block[64], int oldq, int newq, int n)
2037 {
2038     int i;
2039
2040     if(s->mb_intra){
2041         i=1; //skip clipping of intra dc
2042          //FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...)
2043     }else
2044         i=0;
2045     
2046     for(;i<=s->block_last_index[n]; i++){
2047         const int j = s->intra_scantable.permutated[i];
2048         int level = block[j];
2049         
2050         block[j]= ROUNDED_DIV(level*oldq, newq);
2051     }
2052
2053     for(i=s->block_last_index[n]; i>=0; i--){
2054         const int j = s->intra_scantable.permutated[i];
2055         if(block[j]) break;
2056     }
2057     s->block_last_index[n]= i;
2058 }
2059
2060 static inline void auto_requantize_coeffs(MpegEncContext *s, DCTELEM block[6][64])
2061 {
2062     int i,n, newq;
2063     const int maxlevel= s->max_qcoeff;
2064     const int minlevel= s->min_qcoeff;
2065     int largest=0, smallest=0;
2066
2067     assert(s->adaptive_quant);
2068     
2069     for(n=0; n<6; n++){
2070         if(s->mb_intra){
2071             i=1; //skip clipping of intra dc
2072              //FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...)
2073         }else
2074             i=0;
2075
2076         for(;i<=s->block_last_index[n]; i++){
2077             const int j = s->intra_scantable.permutated[i];
2078             int level = block[n][j];
2079             if(largest  < level) largest = level;
2080             if(smallest > level) smallest= level;
2081         }
2082     }
2083     
2084     for(newq=s->qscale+1; newq<32; newq++){
2085         if(   ROUNDED_DIV(smallest*s->qscale, newq) >= minlevel
2086            && ROUNDED_DIV(largest *s->qscale, newq) <= maxlevel) 
2087             break;
2088     }
2089         
2090     if(s->out_format==FMT_H263){
2091         /* h263 like formats cannot change qscale by more than 2 easiely */
2092         if(s->avctx->qmin + 2 < newq)
2093             newq= s->avctx->qmin + 2;
2094     }
2095
2096     for(n=0; n<6; n++){
2097         requantize_coeffs(s, block[n], s->qscale, newq, n);
2098         clip_coeffs(s, block[n], s->block_last_index[n]);
2099     }
2100      
2101     s->dquant+= newq - s->qscale;
2102     s->qscale= newq;
2103 }
2104 #if 0
2105 static int pix_vcmp16x8(UINT8 *s, int stride){ //FIXME move to dsputil & optimize
2106     int score=0;
2107     int x,y;
2108     
2109     for(y=0; y<7; y++){
2110         for(x=0; x<16; x+=4){
2111             score+= ABS(s[x  ] - s[x  +stride]) + ABS(s[x+1] - s[x+1+stride]) 
2112                    +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
2113         }
2114         s+= stride;
2115     }
2116     
2117     return score;
2118 }
2119
2120 static int pix_diff_vcmp16x8(UINT8 *s1, UINT8*s2, int stride){ //FIXME move to dsputil & optimize
2121     int score=0;
2122     int x,y;
2123     
2124     for(y=0; y<7; y++){
2125         for(x=0; x<16; x++){
2126             score+= ABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2127         }
2128         s1+= stride;
2129         s2+= stride;
2130     }
2131     
2132     return score;
2133 }
2134 #else
2135 #define SQ(a) ((a)*(a))
2136
2137 static int pix_vcmp16x8(UINT8 *s, int stride){ //FIXME move to dsputil & optimize
2138     int score=0;
2139     int x,y;
2140     
2141     for(y=0; y<7; y++){
2142         for(x=0; x<16; x+=4){
2143             score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride]) 
2144                    +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
2145         }
2146         s+= stride;
2147     }
2148     
2149     return score;
2150 }
2151
2152 static int pix_diff_vcmp16x8(UINT8 *s1, UINT8*s2, int stride){ //FIXME move to dsputil & optimize
2153     int score=0;
2154     int x,y;
2155     
2156     for(y=0; y<7; y++){
2157         for(x=0; x<16; x++){
2158             score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2159         }
2160         s1+= stride;
2161         s2+= stride;
2162     }
2163     
2164     return score;
2165 }
2166
2167 #endif
2168
2169 void ff_draw_horiz_band(MpegEncContext *s){
2170     if (    s->avctx->draw_horiz_band 
2171         && (s->num_available_buffers>=1 || (!s->has_b_frames)) ) {
2172         UINT8 *src_ptr[3];
2173         int y, h, offset;
2174         y = s->mb_y * 16;
2175         h = s->height - y;
2176         if (h > 16)
2177             h = 16;
2178
2179         if(s->pict_type==B_TYPE)
2180             offset = 0;
2181         else
2182             offset = y * s->linesize;
2183
2184         if(s->pict_type==B_TYPE || (!s->has_b_frames)){
2185             src_ptr[0] = s->current_picture.data[0] + offset;
2186             src_ptr[1] = s->current_picture.data[1] + (offset >> 2);
2187             src_ptr[2] = s->current_picture.data[2] + (offset >> 2);
2188         } else {
2189             src_ptr[0] = s->last_picture.data[0] + offset;
2190             src_ptr[1] = s->last_picture.data[1] + (offset >> 2);
2191             src_ptr[2] = s->last_picture.data[2] + (offset >> 2);
2192         }
2193         emms_c();
2194
2195         s->avctx->draw_horiz_band(s->avctx, src_ptr, s->linesize,
2196                                y, s->width, h);
2197     }
2198 }
2199
2200 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2201 {
2202     const int mb_x= s->mb_x;
2203     const int mb_y= s->mb_y;
2204     int i;
2205     int skip_dct[6];
2206     int dct_offset   = s->linesize*8; //default for progressive frames
2207     
2208     for(i=0; i<6; i++) skip_dct[i]=0;
2209     
2210     if(s->adaptive_quant){
2211         s->dquant= s->current_picture.qscale_table[mb_x + mb_y*s->mb_width] - s->qscale;
2212
2213         if(s->out_format==FMT_H263){
2214             if     (s->dquant> 2) s->dquant= 2;
2215             else if(s->dquant<-2) s->dquant=-2;
2216         }
2217             
2218         if(s->codec_id==CODEC_ID_MPEG4){        
2219             if(!s->mb_intra){
2220                 assert(s->dquant==0 || s->mv_type!=MV_TYPE_8X8);
2221
2222                 if(s->mv_dir&MV_DIRECT)
2223                     s->dquant=0;
2224             }
2225         }
2226         s->qscale+= s->dquant;
2227         s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
2228         s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
2229     }
2230
2231     if (s->mb_intra) {
2232         UINT8 *ptr;
2233         int wrap_y;
2234         int emu=0;
2235
2236         wrap_y = s->linesize;
2237         ptr = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
2238
2239         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
2240             emulated_edge_mc(s, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2241             ptr= s->edge_emu_buffer;
2242             emu=1;
2243         }
2244         
2245         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
2246             int progressive_score, interlaced_score;
2247             
2248             progressive_score= pix_vcmp16x8(ptr, wrap_y  ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y );
2249             interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y  , wrap_y*2);
2250             
2251             if(progressive_score > interlaced_score + 100){
2252                 s->interlaced_dct=1;
2253             
2254                 dct_offset= wrap_y;
2255                 wrap_y<<=1;
2256             }else
2257                 s->interlaced_dct=0;
2258         }
2259         
2260         s->dsp.get_pixels(s->block[0], ptr                 , wrap_y);
2261         s->dsp.get_pixels(s->block[1], ptr              + 8, wrap_y);
2262         s->dsp.get_pixels(s->block[2], ptr + dct_offset    , wrap_y);
2263         s->dsp.get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y);
2264
2265         if(s->flags&CODEC_FLAG_GRAY){
2266             skip_dct[4]= 1;
2267             skip_dct[5]= 1;
2268         }else{
2269             int wrap_c = s->uvlinesize;
2270             ptr = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
2271             if(emu){
2272                 emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2273                 ptr= s->edge_emu_buffer;
2274             }
2275             s->dsp.get_pixels(s->block[4], ptr, wrap_c);
2276
2277             ptr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
2278             if(emu){
2279                 emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2280                 ptr= s->edge_emu_buffer;
2281             }
2282             s->dsp.get_pixels(s->block[5], ptr, wrap_c);
2283         }
2284     }else{
2285         op_pixels_func (*op_pix)[4];
2286         qpel_mc_func (*op_qpix)[16];
2287         UINT8 *dest_y, *dest_cb, *dest_cr;
2288         UINT8 *ptr_y, *ptr_cb, *ptr_cr;
2289         int wrap_y, wrap_c;
2290         int emu=0;
2291
2292         dest_y  = s->current_picture.data[0] + (mb_y * 16 * s->linesize    ) + mb_x * 16;
2293         dest_cb = s->current_picture.data[1] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
2294         dest_cr = s->current_picture.data[2] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
2295         wrap_y = s->linesize;
2296         wrap_c = s->uvlinesize;
2297         ptr_y  = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
2298         ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
2299         ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
2300
2301         if ((!s->no_rounding) || s->pict_type==B_TYPE){
2302             op_pix = s->dsp.put_pixels_tab;
2303             op_qpix= s->dsp.put_qpel_pixels_tab;
2304         }else{
2305             op_pix = s->dsp.put_no_rnd_pixels_tab;
2306             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
2307         }
2308
2309         if (s->mv_dir & MV_DIR_FORWARD) {
2310             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
2311             op_pix = s->dsp.avg_pixels_tab;
2312             op_qpix= s->dsp.avg_qpel_pixels_tab;
2313         }
2314         if (s->mv_dir & MV_DIR_BACKWARD) {
2315             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
2316         }
2317
2318         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
2319             emulated_edge_mc(s, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2320             ptr_y= s->edge_emu_buffer;
2321             emu=1;
2322         }
2323         
2324         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
2325             int progressive_score, interlaced_score;
2326             
2327             progressive_score= pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y  ) 
2328                              + pix_diff_vcmp16x8(ptr_y + wrap_y*8, dest_y + wrap_y*8, wrap_y  );
2329             interlaced_score = pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y*2)
2330                              + pix_diff_vcmp16x8(ptr_y + wrap_y  , dest_y + wrap_y  , wrap_y*2);
2331             
2332             if(progressive_score > interlaced_score + 600){
2333                 s->interlaced_dct=1;
2334             
2335                 dct_offset= wrap_y;
2336                 wrap_y<<=1;
2337             }else
2338                 s->interlaced_dct=0;
2339         }
2340         
2341         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
2342         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
2343         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
2344         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
2345         
2346         if(s->flags&CODEC_FLAG_GRAY){
2347             skip_dct[4]= 1;
2348             skip_dct[5]= 1;
2349         }else{
2350             if(emu){
2351                 emulated_edge_mc(s, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2352                 ptr_cb= s->edge_emu_buffer;
2353             }
2354             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2355             if(emu){
2356                 emulated_edge_mc(s, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2357                 ptr_cr= s->edge_emu_buffer;
2358             }
2359             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2360         }
2361         /* pre quantization */         
2362         if(s->current_picture.mc_mb_var[s->mb_width*mb_y+ mb_x]<2*s->qscale*s->qscale){
2363             //FIXME optimize
2364             if(s->dsp.pix_abs8x8(ptr_y               , dest_y               , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
2365             if(s->dsp.pix_abs8x8(ptr_y            + 8, dest_y            + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
2366             if(s->dsp.pix_abs8x8(ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
2367             if(s->dsp.pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
2368             if(s->dsp.pix_abs8x8(ptr_cb              , dest_cb              , wrap_c) < 20*s->qscale) skip_dct[4]= 1;
2369             if(s->dsp.pix_abs8x8(ptr_cr              , dest_cr              , wrap_c) < 20*s->qscale) skip_dct[5]= 1;
2370 #if 0
2371 {
2372  static int stat[7];
2373  int num=0;
2374  for(i=0; i<6; i++)
2375   if(skip_dct[i]) num++;
2376  stat[num]++;
2377  
2378  if(s->mb_x==0 && s->mb_y==0){
2379   for(i=0; i<7; i++){
2380    printf("%6d %1d\n", stat[i], i);
2381   }
2382  }
2383 }
2384 #endif
2385         }
2386
2387     }
2388             
2389 #if 0
2390             {
2391                 float adap_parm;
2392                 
2393                 adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_width*mb_y+mb_x] + 1.0) /
2394                             ((s->mb_var[s->mb_width*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0);
2395             
2396                 printf("\ntype=%c qscale=%2d adap=%0.2f dquant=%4.2f var=%4d avgvar=%4d", 
2397                         (s->mb_type[s->mb_width*mb_y+mb_x] > 0) ? 'I' : 'P', 
2398                         s->qscale, adap_parm, s->qscale*adap_parm,
2399                         s->mb_var[s->mb_width*mb_y+mb_x], s->avg_mb_var);
2400             }
2401 #endif
2402     /* DCT & quantize */
2403     if(s->out_format==FMT_MJPEG){
2404         for(i=0;i<6;i++) {
2405             int overflow;
2406             s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, 8, &overflow);
2407             if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
2408         }
2409     }else{
2410         for(i=0;i<6;i++) {
2411             if(!skip_dct[i]){
2412                 int overflow;
2413                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2414             // FIXME we could decide to change to quantizer instead of clipping
2415             // JS: I don't think that would be a good idea it could lower quality instead
2416             //     of improve it. Just INTRADC clipping deserves changes in quantizer
2417                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
2418             }else
2419                 s->block_last_index[i]= -1;
2420         }
2421         if(s->luma_elim_threshold && !s->mb_intra)
2422             for(i=0; i<4; i++)
2423                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2424         if(s->chroma_elim_threshold && !s->mb_intra)
2425             for(i=4; i<6; i++)
2426                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2427     }
2428
2429     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
2430         s->block_last_index[4]=
2431         s->block_last_index[5]= 0;
2432         s->block[4][0]=
2433         s->block[5][0]= 128;
2434     }
2435
2436 #ifdef CONFIG_ENCODERS
2437     /* huffman encode */
2438     switch(s->out_format) {
2439     case FMT_MPEG1:
2440         mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2441         break;
2442     case FMT_H263:
2443         if (s->h263_msmpeg4)
2444             msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2445         else if(s->h263_pred)
2446             mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2447         else
2448             h263_encode_mb(s, s->block, motion_x, motion_y);
2449         break;
2450     case FMT_MJPEG:
2451         mjpeg_encode_mb(s, s->block);
2452         break;
2453     }
2454 #endif
2455 }
2456
2457 void ff_copy_bits(PutBitContext *pb, UINT8 *src, int length)
2458 {
2459     int bytes= length>>4;
2460     int bits= length&15;
2461     int i;
2462
2463     if(length==0) return;
2464
2465     for(i=0; i<bytes; i++) put_bits(pb, 16, be2me_16(((uint16_t*)src)[i]));
2466     put_bits(pb, bits, be2me_16(((uint16_t*)src)[i])>>(16-bits));
2467 }
2468
2469 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2470     int i;
2471
2472     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
2473
2474     /* mpeg1 */
2475     d->mb_incr= s->mb_incr;
2476     for(i=0; i<3; i++)
2477         d->last_dc[i]= s->last_dc[i];
2478     
2479     /* statistics */
2480     d->mv_bits= s->mv_bits;
2481     d->i_tex_bits= s->i_tex_bits;
2482     d->p_tex_bits= s->p_tex_bits;
2483     d->i_count= s->i_count;
2484     d->f_count= s->f_count;
2485     d->b_count= s->b_count;
2486     d->skip_count= s->skip_count;
2487     d->misc_bits= s->misc_bits;
2488     d->last_bits= 0;
2489
2490     d->mb_skiped= s->mb_skiped;
2491 }
2492
2493 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2494     int i;
2495
2496     memcpy(d->mv, s->mv, 2*4*2*sizeof(int)); 
2497     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
2498     
2499     /* mpeg1 */
2500     d->mb_incr= s->mb_incr;
2501     for(i=0; i<3; i++)
2502         d->last_dc[i]= s->last_dc[i];
2503     
2504     /* statistics */
2505     d->mv_bits= s->mv_bits;
2506     d->i_tex_bits= s->i_tex_bits;
2507     d->p_tex_bits= s->p_tex_bits;
2508     d->i_count= s->i_count;
2509     d->f_count= s->f_count;
2510     d->b_count= s->b_count;
2511     d->skip_count= s->skip_count;
2512     d->misc_bits= s->misc_bits;
2513
2514     d->mb_intra= s->mb_intra;
2515     d->mb_skiped= s->mb_skiped;
2516     d->mv_type= s->mv_type;
2517     d->mv_dir= s->mv_dir;
2518     d->pb= s->pb;
2519     if(s->data_partitioning){
2520         d->pb2= s->pb2;
2521         d->tex_pb= s->tex_pb;
2522     }
2523     d->block= s->block;
2524     for(i=0; i<6; i++)
2525         d->block_last_index[i]= s->block_last_index[i];
2526     d->interlaced_dct= s->interlaced_dct;
2527 }
2528
2529 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type, 
2530                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2531                            int *dmin, int *next_block, int motion_x, int motion_y)
2532 {
2533     int bits_count;
2534     
2535     copy_context_before_encode(s, backup, type);
2536
2537     s->block= s->blocks[*next_block];
2538     s->pb= pb[*next_block];
2539     if(s->data_partitioning){
2540         s->pb2   = pb2   [*next_block];
2541         s->tex_pb= tex_pb[*next_block];
2542     }
2543
2544     encode_mb(s, motion_x, motion_y);
2545
2546     bits_count= get_bit_count(&s->pb);
2547     if(s->data_partitioning){
2548         bits_count+= get_bit_count(&s->pb2);
2549         bits_count+= get_bit_count(&s->tex_pb);
2550     }
2551
2552     if(bits_count<*dmin){
2553         *dmin= bits_count;
2554         *next_block^=1;
2555
2556         copy_context_after_encode(best, s, type);
2557     }
2558 }
2559                 
2560 static inline int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2561     uint32_t *sq = squareTbl + 256;
2562     int acc=0;
2563     int x,y;
2564     
2565     if(w==16 && h==16) 
2566         return s->dsp.pix_norm(src1, src2, stride);
2567     
2568     for(y=0; y<h; y++){
2569         for(x=0; x<w; x++){
2570             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2571         } 
2572     }
2573     return acc;
2574 }
2575
2576 static void encode_picture(MpegEncContext *s, int picture_number)
2577 {
2578     int mb_x, mb_y, pdif = 0;
2579     int i;
2580     int bits;
2581     MpegEncContext best_s, backup_s;
2582     UINT8 bit_buf[2][3000];
2583     UINT8 bit_buf2[2][3000];
2584     UINT8 bit_buf_tex[2][3000];
2585     PutBitContext pb[2], pb2[2], tex_pb[2];
2586
2587     for(i=0; i<2; i++){
2588         init_put_bits(&pb    [i], bit_buf    [i], 3000, NULL, NULL);
2589         init_put_bits(&pb2   [i], bit_buf2   [i], 3000, NULL, NULL);
2590         init_put_bits(&tex_pb[i], bit_buf_tex[i], 3000, NULL, NULL);
2591     }
2592
2593     s->picture_number = picture_number;
2594
2595     s->block_wrap[0]=
2596     s->block_wrap[1]=
2597     s->block_wrap[2]=
2598     s->block_wrap[3]= s->mb_width*2 + 2;
2599     s->block_wrap[4]=
2600     s->block_wrap[5]= s->mb_width + 2;
2601     
2602     /* Reset the average MB variance */
2603     s->current_picture.mb_var_sum = 0;
2604     s->current_picture.mc_mb_var_sum = 0;
2605
2606     /* we need to initialize some time vars before we can encode b-frames */
2607     if (s->h263_pred && !s->h263_msmpeg4)
2608         ff_set_mpeg4_time(s, s->picture_number); 
2609
2610     s->scene_change_score=0;
2611     
2612     s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration
2613
2614     /* Estimate motion for every MB */
2615     if(s->pict_type != I_TYPE){
2616         for(mb_y=0; mb_y < s->mb_height; mb_y++) {
2617             s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
2618             s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
2619             s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1;
2620             s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2);
2621             for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2622                 s->mb_x = mb_x;
2623                 s->mb_y = mb_y;
2624                 s->block_index[0]+=2;
2625                 s->block_index[1]+=2;
2626                 s->block_index[2]+=2;
2627                 s->block_index[3]+=2;
2628
2629                 /* compute motion vector & mb_type and store in context */
2630                 if(s->pict_type==B_TYPE)
2631                     ff_estimate_b_frame_motion(s, mb_x, mb_y);
2632                 else
2633                     ff_estimate_p_frame_motion(s, mb_x, mb_y);
2634 //                s->mb_type[mb_y*s->mb_width + mb_x]=MB_TYPE_INTER;
2635             }
2636         }
2637     }else /* if(s->pict_type == I_TYPE) */{
2638         /* I-Frame */
2639         //FIXME do we need to zero them?
2640         memset(s->motion_val[0], 0, sizeof(INT16)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
2641         memset(s->p_mv_table   , 0, sizeof(INT16)*(s->mb_width+2)*(s->mb_height+2)*2);
2642         memset(s->mb_type      , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height);
2643         
2644         if(!s->fixed_qscale){
2645             /* finding spatial complexity for I-frame rate control */
2646             for(mb_y=0; mb_y < s->mb_height; mb_y++) {
2647                 for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2648                     int xx = mb_x * 16;
2649                     int yy = mb_y * 16;
2650                     uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
2651                     int varc;
2652                     int sum = s->dsp.pix_sum(pix, s->linesize);
2653     
2654                     varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
2655
2656                     s->current_picture.mb_var [s->mb_width * mb_y + mb_x] = varc;
2657                     s->current_picture.mb_mean[s->mb_width * mb_y + mb_x] = (sum+128)>>8;
2658                     s->current_picture.mb_var_sum    += varc;
2659                 }
2660             }
2661         }
2662     }
2663     emms_c();
2664
2665     if(s->scene_change_score > 0 && s->pict_type == P_TYPE){
2666         s->pict_type= I_TYPE;
2667         memset(s->mb_type   , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height);
2668 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
2669     }
2670
2671     if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) 
2672         s->f_code= ff_get_best_fcode(s, s->p_mv_table, MB_TYPE_INTER);
2673         ff_fix_long_p_mvs(s);
2674     if(s->pict_type==B_TYPE){
2675         s->f_code= ff_get_best_fcode(s, s->b_forw_mv_table, MB_TYPE_FORWARD);
2676         s->b_code= ff_get_best_fcode(s, s->b_back_mv_table, MB_TYPE_BACKWARD);
2677
2678         ff_fix_long_b_mvs(s, s->b_forw_mv_table, s->f_code, MB_TYPE_FORWARD);
2679         ff_fix_long_b_mvs(s, s->b_back_mv_table, s->b_code, MB_TYPE_BACKWARD);
2680         ff_fix_long_b_mvs(s, s->b_bidir_forw_mv_table, s->f_code, MB_TYPE_BIDIR);
2681         ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR);
2682     }
2683     
2684     if (s->fixed_qscale) 
2685         s->frame_qscale = s->current_picture.quality;
2686     else
2687         s->frame_qscale = ff_rate_estimate_qscale(s);
2688
2689     if(s->adaptive_quant){
2690         switch(s->codec_id){
2691         case CODEC_ID_MPEG4:
2692             ff_clean_mpeg4_qscales(s);
2693             break;
2694         case CODEC_ID_H263:
2695         case CODEC_ID_H263P:
2696             ff_clean_h263_qscales(s);
2697             break;
2698         }
2699
2700         s->qscale= s->current_picture.qscale_table[0];
2701     }else
2702         s->qscale= (int)(s->frame_qscale + 0.5);
2703         
2704     if (s->out_format == FMT_MJPEG) {
2705         /* for mjpeg, we do include qscale in the matrix */
2706         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
2707         for(i=1;i<64;i++){
2708             int j= s->idct_permutation[i];
2709
2710             s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
2711         }
2712         convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, 
2713                        s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias, 8, 8);
2714     }
2715     
2716     //FIXME var duplication
2717     s->current_picture.key_frame= s->pict_type == I_TYPE;
2718     s->current_picture.pict_type= s->pict_type;
2719
2720     if(s->current_picture.key_frame)
2721         s->picture_in_gop_number=0;
2722
2723     s->last_bits= get_bit_count(&s->pb);
2724     switch(s->out_format) {
2725     case FMT_MJPEG:
2726         mjpeg_picture_header(s);
2727         break;
2728     case FMT_H263:
2729         if (s->h263_msmpeg4) 
2730             msmpeg4_encode_picture_header(s, picture_number);
2731         else if (s->h263_pred)
2732             mpeg4_encode_picture_header(s, picture_number);
2733         else if (s->h263_rv10) 
2734             rv10_encode_picture_header(s, picture_number);
2735         else
2736             h263_encode_picture_header(s, picture_number);
2737         break;
2738     case FMT_MPEG1:
2739         mpeg1_encode_picture_header(s, picture_number);
2740         break;
2741     }
2742     bits= get_bit_count(&s->pb);
2743     s->header_bits= bits - s->last_bits;
2744     s->last_bits= bits;
2745     s->mv_bits=0;
2746     s->misc_bits=0;
2747     s->i_tex_bits=0;
2748     s->p_tex_bits=0;
2749     s->i_count=0;
2750     s->f_count=0;
2751     s->b_count=0;
2752     s->skip_count=0;
2753
2754     for(i=0; i<3; i++){
2755         /* init last dc values */
2756         /* note: quant matrix value (8) is implied here */
2757         s->last_dc[i] = 128;
2758         
2759         s->current_picture.error[i] = 0;
2760     }
2761     s->mb_incr = 1;
2762     s->last_mv[0][0][0] = 0;
2763     s->last_mv[0][0][1] = 0;
2764
2765     if (s->codec_id==CODEC_ID_H263 || s->codec_id==CODEC_ID_H263P)
2766         s->gob_index = ff_h263_get_gob_height(s);
2767
2768     if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame)
2769         ff_mpeg4_init_partitions(s);
2770
2771     s->resync_mb_x=0;
2772     s->resync_mb_y=0;
2773     s->first_slice_line = 1;
2774     s->ptr_lastgob = s->pb.buf;
2775     s->ptr_last_mb_line = s->pb.buf;
2776     for(mb_y=0; mb_y < s->mb_height; mb_y++) {
2777         s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
2778         s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
2779         
2780         s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
2781         s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
2782         s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1;
2783         s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2);
2784         s->block_index[4]= s->block_wrap[4]*(mb_y + 1)                    + s->block_wrap[0]*(s->mb_height*2 + 2);
2785         s->block_index[5]= s->block_wrap[4]*(mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2);
2786         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2787             const int mb_type= s->mb_type[mb_y * s->mb_width + mb_x];
2788             const int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
2789 //            int d;
2790             int dmin=10000000;
2791
2792             s->mb_x = mb_x;
2793             s->mb_y = mb_y;
2794             s->block_index[0]+=2;
2795             s->block_index[1]+=2;
2796             s->block_index[2]+=2;
2797             s->block_index[3]+=2;
2798             s->block_index[4]++;
2799             s->block_index[5]++;
2800
2801             /* write gob / video packet header  */
2802             if(s->rtp_mode){
2803                 int current_packet_size, is_gob_start;
2804                 
2805                 current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
2806                 is_gob_start=0;
2807                 
2808                 if(s->codec_id==CODEC_ID_MPEG4){
2809                     if(current_packet_size + s->mb_line_avgsize/s->mb_width >= s->rtp_payload_size
2810                        && s->mb_y + s->mb_x>0){
2811
2812                         if(s->partitioned_frame){
2813                             ff_mpeg4_merge_partitions(s);
2814                             ff_mpeg4_init_partitions(s);
2815                         }
2816                         ff_mpeg4_encode_video_packet_header(s);
2817
2818                         if(s->flags&CODEC_FLAG_PASS1){
2819                             int bits= get_bit_count(&s->pb);
2820                             s->misc_bits+= bits - s->last_bits;
2821                             s->last_bits= bits;
2822                         }
2823                         ff_mpeg4_clean_buffers(s);
2824                         is_gob_start=1;
2825                     }
2826                 }else{
2827                     if(current_packet_size + s->mb_line_avgsize*s->gob_index >= s->rtp_payload_size
2828                        && s->mb_x==0 && s->mb_y>0 && s->mb_y%s->gob_index==0){
2829                        
2830                         h263_encode_gob_header(s, mb_y);                       
2831                         is_gob_start=1;
2832                     }
2833                 }
2834
2835                 if(is_gob_start){
2836                     s->ptr_lastgob = pbBufPtr(&s->pb);
2837                     s->first_slice_line=1;
2838                     s->resync_mb_x=mb_x;
2839                     s->resync_mb_y=mb_y;
2840                 }
2841             }
2842
2843             if(  (s->resync_mb_x   == s->mb_x)
2844                && s->resync_mb_y+1 == s->mb_y){
2845                 s->first_slice_line=0; 
2846             }
2847
2848             if(mb_type & (mb_type-1)){ // more than 1 MB type possible
2849                 int next_block=0;
2850                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2851
2852                 copy_context_before_encode(&backup_s, s, -1);
2853                 backup_s.pb= s->pb;
2854                 best_s.data_partitioning= s->data_partitioning;
2855                 best_s.partitioned_frame= s->partitioned_frame;
2856                 if(s->data_partitioning){
2857                     backup_s.pb2= s->pb2;
2858                     backup_s.tex_pb= s->tex_pb;
2859                 }
2860
2861                 if(mb_type&MB_TYPE_INTER){
2862                     s->mv_dir = MV_DIR_FORWARD;
2863                     s->mv_type = MV_TYPE_16X16;
2864                     s->mb_intra= 0;
2865                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2866                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2867                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER, pb, pb2, tex_pb, 
2868                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2869                 }
2870                 if(mb_type&MB_TYPE_INTER4V){                 
2871                     s->mv_dir = MV_DIR_FORWARD;
2872                     s->mv_type = MV_TYPE_8X8;
2873                     s->mb_intra= 0;
2874                     for(i=0; i<4; i++){
2875                         s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
2876                         s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
2877                     }
2878                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER4V, pb, pb2, tex_pb, 
2879                                  &dmin, &next_block, 0, 0);
2880                 }
2881                 if(mb_type&MB_TYPE_FORWARD){
2882                     s->mv_dir = MV_DIR_FORWARD;
2883                     s->mv_type = MV_TYPE_16X16;
2884                     s->mb_intra= 0;
2885                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2886                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2887                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_FORWARD, pb, pb2, tex_pb, 
2888                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2889                 }
2890                 if(mb_type&MB_TYPE_BACKWARD){
2891                     s->mv_dir = MV_DIR_BACKWARD;
2892                     s->mv_type = MV_TYPE_16X16;
2893                     s->mb_intra= 0;
2894                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2895                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2896                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BACKWARD, pb, pb2, tex_pb, 
2897                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2898                 }
2899                 if(mb_type&MB_TYPE_BIDIR){
2900                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2901                     s->mv_type = MV_TYPE_16X16;
2902                     s->mb_intra= 0;
2903                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2904                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2905                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2906                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2907                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BIDIR, pb, pb2, tex_pb, 
2908                                  &dmin, &next_block, 0, 0);
2909                 }
2910                 if(mb_type&MB_TYPE_DIRECT){
2911                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2912                     s->mv_type = MV_TYPE_16X16; //FIXME
2913                     s->mb_intra= 0;
2914                     s->mv[0][0][0] = s->b_direct_forw_mv_table[xy][0];
2915                     s->mv[0][0][1] = s->b_direct_forw_mv_table[xy][1];
2916                     s->mv[1][0][0] = s->b_direct_back_mv_table[xy][0];
2917                     s->mv[1][0][1] = s->b_direct_back_mv_table[xy][1];
2918                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb, 
2919                                  &dmin, &next_block, s->b_direct_mv_table[xy][0], s->b_direct_mv_table[xy][1]);
2920                 }
2921                 if(mb_type&MB_TYPE_INTRA){
2922                     s->mv_dir = MV_DIR_FORWARD;
2923                     s->mv_type = MV_TYPE_16X16;
2924                     s->mb_intra= 1;
2925                     s->mv[0][0][0] = 0;
2926                     s->mv[0][0][1] = 0;
2927                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTRA, pb, pb2, tex_pb, 
2928                                  &dmin, &next_block, 0, 0);
2929                     /* force cleaning of ac/dc pred stuff if needed ... */
2930                     if(s->h263_pred || s->h263_aic)
2931                         s->mbintra_table[mb_x + mb_y*s->mb_width]=1;
2932                 }
2933                 copy_context_after_encode(s, &best_s, -1);
2934                 
2935                 pb_bits_count= get_bit_count(&s->pb);
2936                 flush_put_bits(&s->pb);
2937                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2938                 s->pb= backup_s.pb;
2939                 
2940                 if(s->data_partitioning){
2941                     pb2_bits_count= get_bit_count(&s->pb2);
2942                     flush_put_bits(&s->pb2);
2943                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2944                     s->pb2= backup_s.pb2;
2945                     
2946                     tex_pb_bits_count= get_bit_count(&s->tex_pb);
2947                     flush_put_bits(&s->tex_pb);
2948                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2949                     s->tex_pb= backup_s.tex_pb;
2950                 }
2951                 s->last_bits= get_bit_count(&s->pb);
2952             } else {
2953                 int motion_x, motion_y;
2954                 s->mv_type=MV_TYPE_16X16;
2955                 // only one MB-Type possible
2956                 switch(mb_type){
2957                 case MB_TYPE_INTRA:
2958                     s->mv_dir = MV_DIR_FORWARD;
2959                     s->mb_intra= 1;
2960                     motion_x= s->mv[0][0][0] = 0;
2961                     motion_y= s->mv[0][0][1] = 0;
2962                     break;
2963                 case MB_TYPE_INTER:
2964                     s->mv_dir = MV_DIR_FORWARD;
2965                     s->mb_intra= 0;
2966                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2967                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2968                     break;
2969                 case MB_TYPE_INTER4V:
2970                     s->mv_dir = MV_DIR_FORWARD;
2971                     s->mv_type = MV_TYPE_8X8;
2972                     s->mb_intra= 0;
2973                     for(i=0; i<4; i++){
2974                         s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
2975                         s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
2976                     }
2977                     motion_x= motion_y= 0;
2978                     break;
2979                 case MB_TYPE_DIRECT:
2980                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2981                     s->mb_intra= 0;
2982                     motion_x=s->b_direct_mv_table[xy][0];
2983                     motion_y=s->b_direct_mv_table[xy][1];
2984                     s->mv[0][0][0] = s->b_direct_forw_mv_table[xy][0];
2985                     s->mv[0][0][1] = s->b_direct_forw_mv_table[xy][1];
2986                     s->mv[1][0][0] = s->b_direct_back_mv_table[xy][0];
2987                     s->mv[1][0][1] = s->b_direct_back_mv_table[xy][1];
2988                     break;
2989                 case MB_TYPE_BIDIR:
2990                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2991                     s->mb_intra= 0;
2992                     motion_x=0;
2993                     motion_y=0;
2994                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2995                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2996                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2997                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2998                     break;
2999                 case MB_TYPE_BACKWARD:
3000                     s->mv_dir = MV_DIR_BACKWARD;
3001                     s->mb_intra= 0;
3002                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3003                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3004                     break;
3005                 case MB_TYPE_FORWARD:
3006                     s->mv_dir = MV_DIR_FORWARD;
3007                     s->mb_intra= 0;
3008                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3009                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3010 //                    printf(" %d %d ", motion_x, motion_y);
3011                     break;
3012                 default:
3013                     motion_x=motion_y=0; //gcc warning fix
3014                     printf("illegal MB type\n");
3015                 }
3016                 encode_mb(s, motion_x, motion_y);
3017             }
3018             /* clean the MV table in IPS frames for direct mode in B frames */
3019             if(s->mb_intra /* && I,P,S_TYPE */){
3020                 s->p_mv_table[xy][0]=0;
3021                 s->p_mv_table[xy][1]=0;
3022             }
3023
3024             MPV_decode_mb(s, s->block);
3025             
3026             if(s->flags&CODEC_FLAG_PSNR){
3027                 int w= 16;
3028                 int h= 16;
3029
3030                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3031                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3032                 
3033                 s->current_picture.error[0] += sse(
3034                     s,
3035                     s->new_picture    .data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3036                     s->current_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3037                     w, h, s->linesize);
3038                 s->current_picture.error[1] += sse(
3039                     s,
3040                     s->new_picture    .data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3041                     s->current_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3042                     w>>1, h>>1, s->uvlinesize);
3043                 s->current_picture.error[2] += sse(
3044                     s,
3045                     s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3046                     s->current_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3047                     w>>1, h>>1, s->uvlinesize);
3048             }
3049 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_width, get_bit_count(&s->pb));
3050         }
3051
3052
3053         /* Obtain average mb_row size for RTP */
3054         if (s->rtp_mode) {
3055             if (mb_y==0)
3056                 s->mb_line_avgsize = pbBufPtr(&s->pb) - s->ptr_last_mb_line;
3057             else {    
3058                 s->mb_line_avgsize = (s->mb_line_avgsize + pbBufPtr(&s->pb) - s->ptr_last_mb_line) >> 1;
3059             }
3060             s->ptr_last_mb_line = pbBufPtr(&s->pb);
3061         }
3062     }
3063     emms_c();
3064
3065     if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame)
3066         ff_mpeg4_merge_partitions(s);
3067
3068     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
3069         msmpeg4_encode_ext_header(s);
3070
3071     if(s->codec_id==CODEC_ID_MPEG4) 
3072         ff_mpeg4_stuffing(&s->pb);
3073
3074     //if (s->gob_number)
3075     //    fprintf(stderr,"\nNumber of GOB: %d", s->gob_number);
3076     
3077     /* Send the last GOB if RTP */    
3078     if (s->rtp_mode) {
3079         flush_put_bits(&s->pb);
3080         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
3081         /* Call the RTP callback to send the last GOB */
3082         if (s->rtp_callback)
3083             s->rtp_callback(s->ptr_lastgob, pdif, s->gob_number);
3084         s->ptr_lastgob = pbBufPtr(&s->pb);
3085         //fprintf(stderr,"\nGOB: %2d size: %d (last)", s->gob_number, pdif);
3086     }
3087 }
3088
3089 static int dct_quantize_c(MpegEncContext *s, 
3090                         DCTELEM *block, int n,
3091                         int qscale, int *overflow)
3092 {
3093     int i, j, level, last_non_zero, q;
3094     const int *qmat;
3095     const UINT8 *scantable= s->intra_scantable.scantable;
3096     int bias;
3097     int max=0;
3098     unsigned int threshold1, threshold2;
3099
3100     s->fdct (block);
3101
3102     if (s->mb_intra) {
3103         if (!s->h263_aic) {
3104             if (n < 4)
3105                 q = s->y_dc_scale;
3106             else
3107                 q = s->c_dc_scale;
3108             q = q << 3;
3109         } else
3110             /* For AIC we skip quant/dequant of INTRADC */
3111             q = 1 << 3;
3112             
3113         /* note: block[0] is assumed to be positive */
3114         block[0] = (block[0] + (q >> 1)) / q;
3115         i = 1;
3116         last_non_zero = 0;
3117         qmat = s->q_intra_matrix[qscale];
3118         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
3119     } else {
3120         i = 0;
3121         last_non_zero = -1;
3122         qmat = s->q_inter_matrix[qscale];
3123         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
3124     }
3125     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3126     threshold2= (threshold1<<1);
3127
3128     for(;i<64;i++) {
3129         j = scantable[i];
3130         level = block[j];
3131         level = level * qmat[j];
3132
3133 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3134 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3135         if(((unsigned)(level+threshold1))>threshold2){
3136             if(level>0){
3137                 level= (bias + level)>>QMAT_SHIFT;
3138                 block[j]= level;
3139             }else{
3140                 level= (bias - level)>>QMAT_SHIFT;
3141                 block[j]= -level;
3142             }
3143             max |=level;
3144             last_non_zero = i;
3145         }else{
3146             block[j]=0;
3147         }
3148     }
3149     *overflow= s->max_qcoeff < max; //overflow might have happend
3150     
3151     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
3152     if (s->idct_permutation_type != FF_NO_IDCT_PERM)
3153         ff_block_permute(block, s->idct_permutation, scantable, last_non_zero);
3154
3155     return last_non_zero;
3156 }
3157
3158 static void dct_unquantize_mpeg1_c(MpegEncContext *s, 
3159                                    DCTELEM *block, int n, int qscale)
3160 {
3161     int i, level, nCoeffs;
3162     const UINT16 *quant_matrix;
3163
3164     nCoeffs= s->block_last_index[n];
3165     
3166     if (s->mb_intra) {
3167         if (n < 4) 
3168             block[0] = block[0] * s->y_dc_scale;
3169         else
3170             block[0] = block[0] * s->c_dc_scale;
3171         /* XXX: only mpeg1 */
3172         quant_matrix = s->intra_matrix;
3173         for(i=1;i<=nCoeffs;i++) {
3174             int j= s->intra_scantable.permutated[i];
3175             level = block[j];
3176             if (level) {
3177                 if (level < 0) {
3178                     level = -level;
3179                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
3180                     level = (level - 1) | 1;
3181                     level = -level;
3182                 } else {
3183                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
3184                     level = (level - 1) | 1;
3185                 }
3186 #ifdef PARANOID
3187                 if (level < -2048 || level > 2047)
3188                     fprintf(stderr, "unquant error %d %d\n", i, level);
3189 #endif
3190                 block[j] = level;
3191             }
3192         }
3193     } else {
3194         i = 0;
3195         quant_matrix = s->inter_matrix;
3196         for(;i<=nCoeffs;i++) {
3197             int j= s->intra_scantable.permutated[i];
3198             level = block[j];
3199             if (level) {
3200                 if (level < 0) {
3201                     level = -level;
3202                     level = (((level << 1) + 1) * qscale *
3203                              ((int) (quant_matrix[j]))) >> 4;
3204                     level = (level - 1) | 1;
3205                     level = -level;
3206                 } else {
3207                     level = (((level << 1) + 1) * qscale *
3208                              ((int) (quant_matrix[j]))) >> 4;
3209                     level = (level - 1) | 1;
3210                 }
3211 #ifdef PARANOID
3212                 if (level < -2048 || level > 2047)
3213                     fprintf(stderr, "unquant error %d %d\n", i, level);
3214 #endif
3215                 block[j] = level;
3216             }
3217         }
3218     }
3219 }
3220
3221 static void dct_unquantize_mpeg2_c(MpegEncContext *s, 
3222                                    DCTELEM *block, int n, int qscale)
3223 {
3224     int i, level, nCoeffs;
3225     const UINT16 *quant_matrix;
3226
3227     if(s->alternate_scan) nCoeffs= 63;
3228     else nCoeffs= s->block_last_index[n];
3229     
3230     if (s->mb_intra) {
3231         if (n < 4) 
3232             block[0] = block[0] * s->y_dc_scale;
3233         else
3234             block[0] = block[0] * s->c_dc_scale;
3235         quant_matrix = s->intra_matrix;
3236         for(i=1;i<=nCoeffs;i++) {
3237             int j= s->intra_scantable.permutated[i];
3238             level = block[j];
3239             if (level) {
3240                 if (level < 0) {
3241                     level = -level;
3242                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
3243                     level = -level;
3244                 } else {
3245                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
3246                 }
3247 #ifdef PARANOID
3248                 if (level < -2048 || level > 2047)
3249                     fprintf(stderr, "unquant error %d %d\n", i, level);
3250 #endif
3251                 block[j] = level;
3252             }
3253         }
3254     } else {
3255         int sum=-1;
3256         i = 0;
3257         quant_matrix = s->inter_matrix;
3258         for(;i<=nCoeffs;i++) {
3259             int j= s->intra_scantable.permutated[i];
3260             level = block[j];
3261             if (level) {
3262                 if (level < 0) {
3263                     level = -level;
3264                     level = (((level << 1) + 1) * qscale *
3265                              ((int) (quant_matrix[j]))) >> 4;
3266                     level = -level;
3267                 } else {
3268                     level = (((level << 1) + 1) * qscale *
3269                              ((int) (quant_matrix[j]))) >> 4;
3270                 }
3271 #ifdef PARANOID
3272                 if (level < -2048 || level > 2047)
3273                     fprintf(stderr, "unquant error %d %d\n", i, level);
3274 #endif
3275                 block[j] = level;
3276                 sum+=level;
3277             }
3278         }
3279         block[63]^=sum&1;
3280     }
3281 }
3282
3283
3284 static void dct_unquantize_h263_c(MpegEncContext *s, 
3285                                   DCTELEM *block, int n, int qscale)
3286 {
3287     int i, level, qmul, qadd;
3288     int nCoeffs;
3289     
3290     assert(s->block_last_index[n]>=0);
3291     
3292     qadd = (qscale - 1) | 1;
3293     qmul = qscale << 1;
3294     
3295     if (s->mb_intra) {
3296         if (!s->h263_aic) {
3297             if (n < 4) 
3298                 block[0] = block[0] * s->y_dc_scale;
3299             else
3300                 block[0] = block[0] * s->c_dc_scale;
3301         }else
3302             qadd = 0;
3303         i = 1;
3304         nCoeffs= 63; //does not allways use zigzag table 
3305     } else {
3306         i = 0;
3307         nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
3308     }
3309
3310     for(;i<=nCoeffs;i++) {
3311         level = block[i];
3312         if (level) {
3313             if (level < 0) {
3314                 level = level * qmul - qadd;
3315             } else {
3316                 level = level * qmul + qadd;
3317             }
3318 #ifdef PARANOID
3319                 if (level < -2048 || level > 2047)
3320                     fprintf(stderr, "unquant error %d %d\n", i, level);
3321 #endif
3322             block[i] = level;
3323         }
3324     }
3325 }
3326
3327 AVCodec mpeg1video_encoder = {
3328     "mpeg1video",
3329     CODEC_TYPE_VIDEO,
3330     CODEC_ID_MPEG1VIDEO,
3331     sizeof(MpegEncContext),
3332     MPV_encode_init,
3333     MPV_encode_picture,
3334     MPV_encode_end,
3335 };
3336
3337 AVCodec h263_encoder = {
3338     "h263",
3339     CODEC_TYPE_VIDEO,
3340     CODEC_ID_H263,
3341     sizeof(MpegEncContext),
3342     MPV_encode_init,
3343     MPV_encode_picture,
3344     MPV_encode_end,
3345 };
3346
3347 AVCodec h263p_encoder = {
3348     "h263p",
3349     CODEC_TYPE_VIDEO,
3350     CODEC_ID_H263P,
3351     sizeof(MpegEncContext),
3352     MPV_encode_init,
3353     MPV_encode_picture,
3354     MPV_encode_end,
3355 };
3356
3357 AVCodec rv10_encoder = {
3358     "rv10",
3359     CODEC_TYPE_VIDEO,
3360     CODEC_ID_RV10,
3361     sizeof(MpegEncContext),
3362     MPV_encode_init,
3363     MPV_encode_picture,
3364     MPV_encode_end,
3365 };
3366
3367 AVCodec mjpeg_encoder = {
3368     "mjpeg",
3369     CODEC_TYPE_VIDEO,
3370     CODEC_ID_MJPEG,
3371     sizeof(MpegEncContext),
3372     MPV_encode_init,
3373     MPV_encode_picture,
3374     MPV_encode_end,
3375 };
3376
3377 AVCodec mpeg4_encoder = {
3378     "mpeg4",
3379     CODEC_TYPE_VIDEO,
3380     CODEC_ID_MPEG4,
3381     sizeof(MpegEncContext),
3382     MPV_encode_init,
3383     MPV_encode_picture,
3384     MPV_encode_end,
3385 };
3386
3387 AVCodec msmpeg4v1_encoder = {
3388     "msmpeg4v1",
3389     CODEC_TYPE_VIDEO,
3390     CODEC_ID_MSMPEG4V1,
3391     sizeof(MpegEncContext),
3392     MPV_encode_init,
3393     MPV_encode_picture,
3394     MPV_encode_end,
3395 };
3396
3397 AVCodec msmpeg4v2_encoder = {
3398     "msmpeg4v2",
3399     CODEC_TYPE_VIDEO,
3400     CODEC_ID_MSMPEG4V2,
3401     sizeof(MpegEncContext),
3402     MPV_encode_init,
3403     MPV_encode_picture,
3404     MPV_encode_end,
3405 };
3406
3407 AVCodec msmpeg4v3_encoder = {
3408     "msmpeg4",
3409     CODEC_TYPE_VIDEO,
3410     CODEC_ID_MSMPEG4V3,
3411     sizeof(MpegEncContext),
3412     MPV_encode_init,
3413     MPV_encode_picture,
3414     MPV_encode_end,
3415 };
3416
3417 AVCodec wmv1_encoder = {
3418     "wmv1",
3419     CODEC_TYPE_VIDEO,
3420     CODEC_ID_WMV1,
3421     sizeof(MpegEncContext),
3422     MPV_encode_init,
3423     MPV_encode_picture,
3424     MPV_encode_end,
3425 };
3426
3427 AVCodec wmv2_encoder = {
3428     "wmv2",
3429     CODEC_TYPE_VIDEO,
3430     CODEC_ID_WMV2,
3431     sizeof(MpegEncContext),
3432     MPV_encode_init,
3433     MPV_encode_picture,
3434     MPV_encode_end,
3435 };