]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
fix a warning
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  *
20  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
21  */
22
23 /**
24  * @file mpegvideo.c
25  * The simplest mpeg encoder (well, it was the simplest!).
26  */
27
28 #include "avcodec.h"
29 #include "dsputil.h"
30 #include "mpegvideo.h"
31 #include "faandct.h"
32 #include <limits.h>
33
34 #ifdef USE_FASTMEMCPY
35 #include "libvo/fastmemcpy.h"
36 #endif
37
38 //#undef NDEBUG
39 //#include <assert.h>
40
41 #ifdef CONFIG_ENCODERS
42 static int encode_picture(MpegEncContext *s, int picture_number);
43 #endif //CONFIG_ENCODERS
44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
45                                    DCTELEM *block, int n, int qscale);
46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
47                                    DCTELEM *block, int n, int qscale);
48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
49                                    DCTELEM *block, int n, int qscale);
50 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
51                                    DCTELEM *block, int n, int qscale);
52 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
53                                    DCTELEM *block, int n, int qscale);
54 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
55                                   DCTELEM *block, int n, int qscale);
56 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
57                                   DCTELEM *block, int n, int qscale);
58 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
59 #ifdef CONFIG_ENCODERS
60 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
61 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
62 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
63 static int sse_mb(MpegEncContext *s);
64 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
65 #endif //CONFIG_ENCODERS
66
67 #ifdef HAVE_XVMC
68 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
69 extern void XVMC_field_end(MpegEncContext *s);
70 extern void XVMC_decode_mb(MpegEncContext *s);
71 #endif
72
73 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
74
75
76 /* enable all paranoid tests for rounding, overflows, etc... */
77 //#define PARANOID
78
79 //#define DEBUG
80
81
82 /* for jpeg fast DCT */
83 #define CONST_BITS 14
84
85 static const uint16_t aanscales[64] = {
86     /* precomputed values scaled up by 14 bits */
87     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
88     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
89     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
90     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
91     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
92     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
93     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
94     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
95 };
96
97 static const uint8_t h263_chroma_roundtab[16] = {
98 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
99     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
100 };
101
102 static const uint8_t ff_default_chroma_qscale_table[32]={
103 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
104     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
105 };
106
107 #ifdef CONFIG_ENCODERS
108 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
109 static uint8_t default_fcode_tab[MAX_MV*2+1];
110
111 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
112
113 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
114                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
115 {
116     int qscale;
117     int shift=0;
118
119     for(qscale=qmin; qscale<=qmax; qscale++){
120         int i;
121         if (dsp->fdct == ff_jpeg_fdct_islow
122 #ifdef FAAN_POSTSCALE
123             || dsp->fdct == ff_faandct
124 #endif
125             ) {
126             for(i=0;i<64;i++) {
127                 const int j= dsp->idct_permutation[i];
128                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
129                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
130                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
131                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
132
133                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
134                                 (qscale * quant_matrix[j]));
135             }
136         } else if (dsp->fdct == fdct_ifast
137 #ifndef FAAN_POSTSCALE
138                    || dsp->fdct == ff_faandct
139 #endif
140                    ) {
141             for(i=0;i<64;i++) {
142                 const int j= dsp->idct_permutation[i];
143                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
144                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
145                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
146                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
147
148                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
149                                 (aanscales[i] * qscale * quant_matrix[j]));
150             }
151         } else {
152             for(i=0;i<64;i++) {
153                 const int j= dsp->idct_permutation[i];
154                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
155                    So 16           <= qscale * quant_matrix[i]             <= 7905
156                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
157                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
158                 */
159                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
160 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
161                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
162
163                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
164                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
165             }
166         }
167
168         for(i=intra; i<64; i++){
169             int64_t max= 8191;
170             if (dsp->fdct == fdct_ifast
171 #ifndef FAAN_POSTSCALE
172                    || dsp->fdct == ff_faandct
173 #endif
174                    ) {
175                 max= (8191LL*aanscales[i]) >> 14;
176             }
177             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
178                 shift++;
179             }
180         }
181     }
182     if(shift){
183         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
184     }
185 }
186
187 static inline void update_qscale(MpegEncContext *s){
188     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
189     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
190
191     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
192 }
193 #endif //CONFIG_ENCODERS
194
195 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
196     int i;
197     int end;
198
199     st->scantable= src_scantable;
200
201     for(i=0; i<64; i++){
202         int j;
203         j = src_scantable[i];
204         st->permutated[i] = permutation[j];
205 #ifdef ARCH_POWERPC
206         st->inverse[j] = i;
207 #endif
208     }
209
210     end=-1;
211     for(i=0; i<64; i++){
212         int j;
213         j = st->permutated[i];
214         if(j>end) end=j;
215         st->raster_end[i]= end;
216     }
217 }
218
219 #ifdef CONFIG_ENCODERS
220 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix){
221     int i;
222
223     if(matrix){
224         put_bits(pb, 1, 1);
225         for(i=0;i<64;i++) {
226             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
227         }
228     }else
229         put_bits(pb, 1, 0);
230 }
231 #endif //CONFIG_ENCODERS
232
233 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
234     int i;
235
236     assert(p<=end);
237     if(p>=end)
238         return end;
239
240     for(i=0; i<3; i++){
241         uint32_t tmp= *state << 8;
242         *state= tmp + *(p++);
243         if(tmp == 0x100 || p==end)
244             return p;
245     }
246
247     while(p<end){
248         if     (p[-1] > 1      ) p+= 3;
249         else if(p[-2]          ) p+= 2;
250         else if(p[-3]|(p[-1]-1)) p++;
251         else{
252             p++;
253             break;
254         }
255     }
256
257     p= FFMIN(p, end)-4;
258     *state=  be2me_32(unaligned32(p));
259
260     return p+4;
261 }
262
263 /* init common dct for both encoder and decoder */
264 int DCT_common_init(MpegEncContext *s)
265 {
266     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
267     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
268     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
269     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
270     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
271     if(s->flags & CODEC_FLAG_BITEXACT)
272         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
273     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
274
275 #ifdef CONFIG_ENCODERS
276     s->dct_quantize= dct_quantize_c;
277     s->denoise_dct= denoise_dct_c;
278 #endif //CONFIG_ENCODERS
279
280 #ifdef HAVE_MMX
281     MPV_common_init_mmx(s);
282 #endif
283 #ifdef ARCH_ALPHA
284     MPV_common_init_axp(s);
285 #endif
286 #ifdef HAVE_MLIB
287     MPV_common_init_mlib(s);
288 #endif
289 #ifdef HAVE_MMI
290     MPV_common_init_mmi(s);
291 #endif
292 #ifdef ARCH_ARMV4L
293     MPV_common_init_armv4l(s);
294 #endif
295 #ifdef ARCH_POWERPC
296     MPV_common_init_ppc(s);
297 #endif
298
299 #ifdef CONFIG_ENCODERS
300     s->fast_dct_quantize= s->dct_quantize;
301
302     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
303         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
304     }
305
306 #endif //CONFIG_ENCODERS
307
308     /* load & permutate scantables
309        note: only wmv uses different ones
310     */
311     if(s->alternate_scan){
312         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
313         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
314     }else{
315         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
316         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
317     }
318     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
319     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
320
321     return 0;
322 }
323
324 static void copy_picture(Picture *dst, Picture *src){
325     *dst = *src;
326     dst->type= FF_BUFFER_TYPE_COPY;
327 }
328
329 #ifdef CONFIG_ENCODERS
330 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
331     int i;
332
333     dst->pict_type              = src->pict_type;
334     dst->quality                = src->quality;
335     dst->coded_picture_number   = src->coded_picture_number;
336     dst->display_picture_number = src->display_picture_number;
337 //    dst->reference              = src->reference;
338     dst->pts                    = src->pts;
339     dst->interlaced_frame       = src->interlaced_frame;
340     dst->top_field_first        = src->top_field_first;
341
342     if(s->avctx->me_threshold){
343         if(!src->motion_val[0])
344             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
345         if(!src->mb_type)
346             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
347         if(!src->ref_index[0])
348             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
349         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
350             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
351             src->motion_subsample_log2, dst->motion_subsample_log2);
352
353         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
354
355         for(i=0; i<2; i++){
356             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
357             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
358
359             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
360                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
361             }
362             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
363                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
364             }
365         }
366     }
367 }
368 #endif
369
370 /**
371  * allocates a Picture
372  * The pixels are allocated/set by calling get_buffer() if shared=0
373  */
374 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
375     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
376     const int mb_array_size= s->mb_stride*s->mb_height;
377     const int b8_array_size= s->b8_stride*s->mb_height*2;
378     const int b4_array_size= s->b4_stride*s->mb_height*4;
379     int i;
380
381     if(shared){
382         assert(pic->data[0]);
383         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
384         pic->type= FF_BUFFER_TYPE_SHARED;
385     }else{
386         int r;
387
388         assert(!pic->data[0]);
389
390         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
391
392         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
393             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
394             return -1;
395         }
396
397         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
398             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
399             return -1;
400         }
401
402         if(pic->linesize[1] != pic->linesize[2]){
403             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
404             return -1;
405         }
406
407         s->linesize  = pic->linesize[0];
408         s->uvlinesize= pic->linesize[1];
409     }
410
411     if(pic->qscale_table==NULL){
412         if (s->encoding) {
413             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
414             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
415             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
416         }
417
418         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
419         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
420         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
421         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
422         if(s->out_format == FMT_H264){
423             for(i=0; i<2; i++){
424                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
425                 pic->motion_val[i]= pic->motion_val_base[i]+4;
426                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
427             }
428             pic->motion_subsample_log2= 2;
429         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
430             for(i=0; i<2; i++){
431                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
432                 pic->motion_val[i]= pic->motion_val_base[i]+4;
433                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
434             }
435             pic->motion_subsample_log2= 3;
436         }
437         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
438             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
439         }
440         pic->qstride= s->mb_stride;
441         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
442     }
443
444     //it might be nicer if the application would keep track of these but it would require a API change
445     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
446     s->prev_pict_types[0]= s->pict_type;
447     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
448         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
449
450     return 0;
451 fail: //for the CHECKED_ALLOCZ macro
452     return -1;
453 }
454
455 /**
456  * deallocates a picture
457  */
458 static void free_picture(MpegEncContext *s, Picture *pic){
459     int i;
460
461     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
462         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
463     }
464
465     av_freep(&pic->mb_var);
466     av_freep(&pic->mc_mb_var);
467     av_freep(&pic->mb_mean);
468     av_freep(&pic->mbskip_table);
469     av_freep(&pic->qscale_table);
470     av_freep(&pic->mb_type_base);
471     av_freep(&pic->dct_coeff);
472     av_freep(&pic->pan_scan);
473     pic->mb_type= NULL;
474     for(i=0; i<2; i++){
475         av_freep(&pic->motion_val_base[i]);
476         av_freep(&pic->ref_index[i]);
477     }
478
479     if(pic->type == FF_BUFFER_TYPE_SHARED){
480         for(i=0; i<4; i++){
481             pic->base[i]=
482             pic->data[i]= NULL;
483         }
484         pic->type= 0;
485     }
486 }
487
488 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
489     int i;
490
491     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
492     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
493     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
494
495      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
496     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
497     s->rd_scratchpad=   s->me.scratchpad;
498     s->b_scratchpad=    s->me.scratchpad;
499     s->obmc_scratchpad= s->me.scratchpad + 16;
500     if (s->encoding) {
501         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
502         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
503         if(s->avctx->noise_reduction){
504             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
505         }
506     }
507     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
508     s->block= s->blocks[0];
509
510     for(i=0;i<12;i++){
511         s->pblocks[i] = (short *)(&s->block[i]);
512     }
513     return 0;
514 fail:
515     return -1; //free() through MPV_common_end()
516 }
517
518 static void free_duplicate_context(MpegEncContext *s){
519     if(s==NULL) return;
520
521     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
522     av_freep(&s->me.scratchpad);
523     s->rd_scratchpad=
524     s->b_scratchpad=
525     s->obmc_scratchpad= NULL;
526
527     av_freep(&s->dct_error_sum);
528     av_freep(&s->me.map);
529     av_freep(&s->me.score_map);
530     av_freep(&s->blocks);
531     s->block= NULL;
532 }
533
534 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
535 #define COPY(a) bak->a= src->a
536     COPY(allocated_edge_emu_buffer);
537     COPY(edge_emu_buffer);
538     COPY(me.scratchpad);
539     COPY(rd_scratchpad);
540     COPY(b_scratchpad);
541     COPY(obmc_scratchpad);
542     COPY(me.map);
543     COPY(me.score_map);
544     COPY(blocks);
545     COPY(block);
546     COPY(start_mb_y);
547     COPY(end_mb_y);
548     COPY(me.map_generation);
549     COPY(pb);
550     COPY(dct_error_sum);
551     COPY(dct_count[0]);
552     COPY(dct_count[1]);
553 #undef COPY
554 }
555
556 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
557     MpegEncContext bak;
558     int i;
559     //FIXME copy only needed parts
560 //START_TIMER
561     backup_duplicate_context(&bak, dst);
562     memcpy(dst, src, sizeof(MpegEncContext));
563     backup_duplicate_context(dst, &bak);
564     for(i=0;i<12;i++){
565         dst->pblocks[i] = (short *)(&dst->block[i]);
566     }
567 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
568 }
569
570 #ifdef CONFIG_ENCODERS
571 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
572 #define COPY(a) dst->a= src->a
573     COPY(pict_type);
574     COPY(current_picture);
575     COPY(f_code);
576     COPY(b_code);
577     COPY(qscale);
578     COPY(lambda);
579     COPY(lambda2);
580     COPY(picture_in_gop_number);
581     COPY(gop_picture_number);
582     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
583     COPY(progressive_frame); //FIXME don't set in encode_header
584     COPY(partitioned_frame); //FIXME don't set in encode_header
585 #undef COPY
586 }
587 #endif
588
589 /**
590  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
591  * the changed fields will not depend upon the prior state of the MpegEncContext.
592  */
593 static void MPV_common_defaults(MpegEncContext *s){
594     s->y_dc_scale_table=
595     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
596     s->chroma_qscale_table= ff_default_chroma_qscale_table;
597     s->progressive_frame= 1;
598     s->progressive_sequence= 1;
599     s->picture_structure= PICT_FRAME;
600
601     s->coded_picture_number = 0;
602     s->picture_number = 0;
603     s->input_picture_number = 0;
604
605     s->picture_in_gop_number = 0;
606
607     s->f_code = 1;
608     s->b_code = 1;
609 }
610
611 /**
612  * sets the given MpegEncContext to defaults for decoding.
613  * the changed fields will not depend upon the prior state of the MpegEncContext.
614  */
615 void MPV_decode_defaults(MpegEncContext *s){
616     MPV_common_defaults(s);
617 }
618
619 /**
620  * sets the given MpegEncContext to defaults for encoding.
621  * the changed fields will not depend upon the prior state of the MpegEncContext.
622  */
623
624 #ifdef CONFIG_ENCODERS
625 static void MPV_encode_defaults(MpegEncContext *s){
626     static int done=0;
627
628     MPV_common_defaults(s);
629
630     if(!done){
631         int i;
632         done=1;
633
634         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
635         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
636
637         for(i=-16; i<16; i++){
638             default_fcode_tab[i + MAX_MV]= 1;
639         }
640     }
641     s->me.mv_penalty= default_mv_penalty;
642     s->fcode_tab= default_fcode_tab;
643 }
644 #endif //CONFIG_ENCODERS
645
646 /**
647  * init common structure for both encoder and decoder.
648  * this assumes that some variables like width/height are already set
649  */
650 int MPV_common_init(MpegEncContext *s)
651 {
652     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
653
654     s->mb_height = (s->height + 15) / 16;
655
656     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
657         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
658         return -1;
659     }
660
661     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
662         return -1;
663
664     dsputil_init(&s->dsp, s->avctx);
665     DCT_common_init(s);
666
667     s->flags= s->avctx->flags;
668     s->flags2= s->avctx->flags2;
669
670     s->mb_width  = (s->width  + 15) / 16;
671     s->mb_stride = s->mb_width + 1;
672     s->b8_stride = s->mb_width*2 + 1;
673     s->b4_stride = s->mb_width*4 + 1;
674     mb_array_size= s->mb_height * s->mb_stride;
675     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
676
677     /* set chroma shifts */
678     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
679                                                     &(s->chroma_y_shift) );
680
681     /* set default edge pos, will be overriden in decode_header if needed */
682     s->h_edge_pos= s->mb_width*16;
683     s->v_edge_pos= s->mb_height*16;
684
685     s->mb_num = s->mb_width * s->mb_height;
686
687     s->block_wrap[0]=
688     s->block_wrap[1]=
689     s->block_wrap[2]=
690     s->block_wrap[3]= s->b8_stride;
691     s->block_wrap[4]=
692     s->block_wrap[5]= s->mb_stride;
693
694     y_size = s->b8_stride * (2 * s->mb_height + 1);
695     c_size = s->mb_stride * (s->mb_height + 1);
696     yc_size = y_size + 2 * c_size;
697
698     /* convert fourcc to upper case */
699     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
700                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
701                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
702                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
703
704     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
705                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
706                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
707                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
708
709     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
710
711     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
712     for(y=0; y<s->mb_height; y++){
713         for(x=0; x<s->mb_width; x++){
714             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
715         }
716     }
717     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
718
719     if (s->encoding) {
720         /* Allocate MV tables */
721         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
722         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
723         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
724         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
725         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
726         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
727         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
728         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
729         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
730         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
731         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
732         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
733
734         if(s->msmpeg4_version){
735             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
736         }
737         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
738
739         /* Allocate MB type table */
740         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
741
742         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
743
744         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
745         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
746         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
747         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
748         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
749         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
750
751         if(s->avctx->noise_reduction){
752             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
753         }
754     }
755     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
756
757     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
758
759     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
760         /* interlaced direct mode decoding tables */
761             for(i=0; i<2; i++){
762                 int j, k;
763                 for(j=0; j<2; j++){
764                     for(k=0; k<2; k++){
765                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
766                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
767                     }
768                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
769                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
770                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
771                 }
772                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
773             }
774     }
775     if (s->out_format == FMT_H263) {
776         /* ac values */
777         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
778         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
779         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
780         s->ac_val[2] = s->ac_val[1] + c_size;
781
782         /* cbp values */
783         CHECKED_ALLOCZ(s->coded_block_base, y_size);
784         s->coded_block= s->coded_block_base + s->b8_stride + 1;
785
786         /* cbp, ac_pred, pred_dir */
787         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
788         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
789     }
790
791     if (s->h263_pred || s->h263_plus || !s->encoding) {
792         /* dc values */
793         //MN: we need these for error resilience of intra-frames
794         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
795         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
796         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
797         s->dc_val[2] = s->dc_val[1] + c_size;
798         for(i=0;i<yc_size;i++)
799             s->dc_val_base[i] = 1024;
800     }
801
802     /* which mb is a intra block */
803     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
804     memset(s->mbintra_table, 1, mb_array_size);
805
806     /* init macroblock skip table */
807     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
808     //Note the +1 is for a quicker mpeg4 slice_end detection
809     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
810
811     s->parse_context.state= -1;
812     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
813        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
814        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
815        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
816     }
817
818     s->context_initialized = 1;
819
820     s->thread_context[0]= s;
821     for(i=1; i<s->avctx->thread_count; i++){
822         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
823         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
824     }
825
826     for(i=0; i<s->avctx->thread_count; i++){
827         if(init_duplicate_context(s->thread_context[i], s) < 0)
828            goto fail;
829         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
830         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
831     }
832
833     return 0;
834  fail:
835     MPV_common_end(s);
836     return -1;
837 }
838
839 /* init common structure for both encoder and decoder */
840 void MPV_common_end(MpegEncContext *s)
841 {
842     int i, j, k;
843
844     for(i=0; i<s->avctx->thread_count; i++){
845         free_duplicate_context(s->thread_context[i]);
846     }
847     for(i=1; i<s->avctx->thread_count; i++){
848         av_freep(&s->thread_context[i]);
849     }
850
851     av_freep(&s->parse_context.buffer);
852     s->parse_context.buffer_size=0;
853
854     av_freep(&s->mb_type);
855     av_freep(&s->p_mv_table_base);
856     av_freep(&s->b_forw_mv_table_base);
857     av_freep(&s->b_back_mv_table_base);
858     av_freep(&s->b_bidir_forw_mv_table_base);
859     av_freep(&s->b_bidir_back_mv_table_base);
860     av_freep(&s->b_direct_mv_table_base);
861     s->p_mv_table= NULL;
862     s->b_forw_mv_table= NULL;
863     s->b_back_mv_table= NULL;
864     s->b_bidir_forw_mv_table= NULL;
865     s->b_bidir_back_mv_table= NULL;
866     s->b_direct_mv_table= NULL;
867     for(i=0; i<2; i++){
868         for(j=0; j<2; j++){
869             for(k=0; k<2; k++){
870                 av_freep(&s->b_field_mv_table_base[i][j][k]);
871                 s->b_field_mv_table[i][j][k]=NULL;
872             }
873             av_freep(&s->b_field_select_table[i][j]);
874             av_freep(&s->p_field_mv_table_base[i][j]);
875             s->p_field_mv_table[i][j]=NULL;
876         }
877         av_freep(&s->p_field_select_table[i]);
878     }
879
880     av_freep(&s->dc_val_base);
881     av_freep(&s->ac_val_base);
882     av_freep(&s->coded_block_base);
883     av_freep(&s->mbintra_table);
884     av_freep(&s->cbp_table);
885     av_freep(&s->pred_dir_table);
886
887     av_freep(&s->mbskip_table);
888     av_freep(&s->prev_pict_types);
889     av_freep(&s->bitstream_buffer);
890     s->allocated_bitstream_buffer_size=0;
891
892     av_freep(&s->avctx->stats_out);
893     av_freep(&s->ac_stats);
894     av_freep(&s->error_status_table);
895     av_freep(&s->mb_index2xy);
896     av_freep(&s->lambda_table);
897     av_freep(&s->q_intra_matrix);
898     av_freep(&s->q_inter_matrix);
899     av_freep(&s->q_intra_matrix16);
900     av_freep(&s->q_inter_matrix16);
901     av_freep(&s->input_picture);
902     av_freep(&s->reordered_input_picture);
903     av_freep(&s->dct_offset);
904
905     if(s->picture){
906         for(i=0; i<MAX_PICTURE_COUNT; i++){
907             free_picture(s, &s->picture[i]);
908         }
909     }
910     av_freep(&s->picture);
911     s->context_initialized = 0;
912     s->last_picture_ptr=
913     s->next_picture_ptr=
914     s->current_picture_ptr= NULL;
915     s->linesize= s->uvlinesize= 0;
916
917     for(i=0; i<3; i++)
918         av_freep(&s->visualization_buffer[i]);
919
920     avcodec_default_free_buffers(s->avctx);
921 }
922
923 #ifdef CONFIG_ENCODERS
924
925 /* init video encoder */
926 int MPV_encode_init(AVCodecContext *avctx)
927 {
928     MpegEncContext *s = avctx->priv_data;
929     int i;
930     int chroma_h_shift, chroma_v_shift;
931
932     MPV_encode_defaults(s);
933
934     switch (avctx->codec_id) {
935     case CODEC_ID_MPEG2VIDEO:
936         if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P){
937             av_log(avctx, AV_LOG_ERROR, "only YUV420 and YUV422 are supported\n");
938             return -1;
939         }
940         break;
941     case CODEC_ID_LJPEG:
942     case CODEC_ID_MJPEG:
943         if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUVJ422P &&
944            ((avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P) || avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL)){
945             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
946             return -1;
947         }
948         break;
949     default:
950         if(avctx->pix_fmt != PIX_FMT_YUV420P){
951             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
952             return -1;
953         }
954     }
955
956     switch (avctx->pix_fmt) {
957     case PIX_FMT_YUVJ422P:
958     case PIX_FMT_YUV422P:
959         s->chroma_format = CHROMA_422;
960         break;
961     case PIX_FMT_YUVJ420P:
962     case PIX_FMT_YUV420P:
963     default:
964         s->chroma_format = CHROMA_420;
965         break;
966     }
967
968     s->bit_rate = avctx->bit_rate;
969     s->width = avctx->width;
970     s->height = avctx->height;
971     if(avctx->gop_size > 600 && avctx->strict_std_compliance>FF_COMPLIANCE_EXPERIMENTAL){
972         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
973         avctx->gop_size=600;
974     }
975     s->gop_size = avctx->gop_size;
976     s->avctx = avctx;
977     s->flags= avctx->flags;
978     s->flags2= avctx->flags2;
979     s->max_b_frames= avctx->max_b_frames;
980     s->codec_id= avctx->codec->id;
981     s->luma_elim_threshold  = avctx->luma_elim_threshold;
982     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
983     s->strict_std_compliance= avctx->strict_std_compliance;
984     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
985     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
986     s->mpeg_quant= avctx->mpeg_quant;
987     s->rtp_mode= !!avctx->rtp_payload_size;
988     s->intra_dc_precision= avctx->intra_dc_precision;
989     s->user_specified_pts = AV_NOPTS_VALUE;
990
991     if (s->gop_size <= 1) {
992         s->intra_only = 1;
993         s->gop_size = 12;
994     } else {
995         s->intra_only = 0;
996     }
997
998     s->me_method = avctx->me_method;
999
1000     /* Fixed QSCALE */
1001     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
1002
1003     s->adaptive_quant= (   s->avctx->lumi_masking
1004                         || s->avctx->dark_masking
1005                         || s->avctx->temporal_cplx_masking
1006                         || s->avctx->spatial_cplx_masking
1007                         || s->avctx->p_masking
1008                         || s->avctx->border_masking
1009                         || (s->flags&CODEC_FLAG_QP_RD))
1010                        && !s->fixed_qscale;
1011
1012     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
1013     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
1014     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
1015     s->intra_vlc_format= !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
1016
1017     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
1018         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
1019         return -1;
1020     }
1021
1022     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
1023         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
1024     }
1025
1026     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
1027         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
1028         return -1;
1029     }
1030
1031     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
1032         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
1033         return -1;
1034     }
1035
1036     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
1037        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
1038        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
1039
1040         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
1041     }
1042
1043     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
1044        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
1045         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
1046         return -1;
1047     }
1048
1049     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
1050         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
1051         return -1;
1052     }
1053
1054     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
1055         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
1056         return -1;
1057     }
1058
1059     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1060         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1061         return -1;
1062     }
1063
1064     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1065         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1066         return -1;
1067     }
1068
1069     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1070         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1071         return -1;
1072     }
1073
1074     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1075        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1076         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1077         return -1;
1078     }
1079
1080     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1081         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1082         return -1;
1083     }
1084
1085     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1086         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1087         return -1;
1088     }
1089
1090     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1091         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1092         return -1;
1093     }
1094
1095     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1096         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1097         return -1;
1098     }
1099
1100     if((s->flags2 & CODEC_FLAG2_INTRA_VLC) && s->codec_id != CODEC_ID_MPEG2VIDEO){
1101         av_log(avctx, AV_LOG_ERROR, "intra vlc table not supported by codec\n");
1102         return -1;
1103     }
1104
1105     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1106        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1107        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1108         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1109         return -1;
1110     }
1111
1112     if(s->avctx->thread_count > 1)
1113         s->rtp_mode= 1;
1114
1115     if(!avctx->time_base.den || !avctx->time_base.num){
1116         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1117         return -1;
1118     }
1119
1120     i= (INT_MAX/2+128)>>8;
1121     if(avctx->me_threshold >= i){
1122         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1123         return -1;
1124     }
1125     if(avctx->mb_threshold >= i){
1126         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1127         return -1;
1128     }
1129
1130     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1131         av_log(avctx, AV_LOG_INFO, "notice: b_frame_strategy only affects the first pass\n");
1132         avctx->b_frame_strategy = 0;
1133     }
1134
1135     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1136     if(i > 1){
1137         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1138         avctx->time_base.den /= i;
1139         avctx->time_base.num /= i;
1140 //        return -1;
1141     }
1142
1143     if(s->codec_id==CODEC_ID_MJPEG){
1144         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1145         s->inter_quant_bias= 0;
1146     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1147         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1148         s->inter_quant_bias= 0;
1149     }else{
1150         s->intra_quant_bias=0;
1151         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1152     }
1153
1154     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1155         s->intra_quant_bias= avctx->intra_quant_bias;
1156     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1157         s->inter_quant_bias= avctx->inter_quant_bias;
1158
1159     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1160
1161     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1162         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1163         return -1;
1164     }
1165     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1166
1167     switch(avctx->codec->id) {
1168     case CODEC_ID_MPEG1VIDEO:
1169         s->out_format = FMT_MPEG1;
1170         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1171         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1172         break;
1173     case CODEC_ID_MPEG2VIDEO:
1174         s->out_format = FMT_MPEG1;
1175         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1176         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1177         s->rtp_mode= 1;
1178         break;
1179     case CODEC_ID_LJPEG:
1180     case CODEC_ID_JPEGLS:
1181     case CODEC_ID_MJPEG:
1182         s->out_format = FMT_MJPEG;
1183         s->intra_only = 1; /* force intra only for jpeg */
1184         s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
1185         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1186         s->mjpeg_vsample[0] = 2;
1187         s->mjpeg_vsample[1] = 2>>chroma_v_shift;
1188         s->mjpeg_vsample[2] = 2>>chroma_v_shift;
1189         s->mjpeg_hsample[0] = 2;
1190         s->mjpeg_hsample[1] = 2>>chroma_h_shift;
1191         s->mjpeg_hsample[2] = 2>>chroma_h_shift;
1192         if (mjpeg_init(s) < 0)
1193             return -1;
1194         avctx->delay=0;
1195         s->low_delay=1;
1196         break;
1197     case CODEC_ID_H261:
1198         s->out_format = FMT_H261;
1199         avctx->delay=0;
1200         s->low_delay=1;
1201         break;
1202     case CODEC_ID_H263:
1203         if (h263_get_picture_format(s->width, s->height) == 7) {
1204             av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
1205             return -1;
1206         }
1207         s->out_format = FMT_H263;
1208         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1209         avctx->delay=0;
1210         s->low_delay=1;
1211         break;
1212     case CODEC_ID_H263P:
1213         s->out_format = FMT_H263;
1214         s->h263_plus = 1;
1215         /* Fx */
1216         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1217         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1218         s->modified_quant= s->h263_aic;
1219         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1220         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1221         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1222         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1223         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1224
1225         /* /Fx */
1226         /* These are just to be sure */
1227         avctx->delay=0;
1228         s->low_delay=1;
1229         break;
1230     case CODEC_ID_FLV1:
1231         s->out_format = FMT_H263;
1232         s->h263_flv = 2; /* format = 1; 11-bit codes */
1233         s->unrestricted_mv = 1;
1234         s->rtp_mode=0; /* don't allow GOB */
1235         avctx->delay=0;
1236         s->low_delay=1;
1237         break;
1238     case CODEC_ID_RV10:
1239         s->out_format = FMT_H263;
1240         avctx->delay=0;
1241         s->low_delay=1;
1242         break;
1243     case CODEC_ID_RV20:
1244         s->out_format = FMT_H263;
1245         avctx->delay=0;
1246         s->low_delay=1;
1247         s->modified_quant=1;
1248         s->h263_aic=1;
1249         s->h263_plus=1;
1250         s->loop_filter=1;
1251         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1252         break;
1253     case CODEC_ID_MPEG4:
1254         s->out_format = FMT_H263;
1255         s->h263_pred = 1;
1256         s->unrestricted_mv = 1;
1257         s->low_delay= s->max_b_frames ? 0 : 1;
1258         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1259         break;
1260     case CODEC_ID_MSMPEG4V1:
1261         s->out_format = FMT_H263;
1262         s->h263_msmpeg4 = 1;
1263         s->h263_pred = 1;
1264         s->unrestricted_mv = 1;
1265         s->msmpeg4_version= 1;
1266         avctx->delay=0;
1267         s->low_delay=1;
1268         break;
1269     case CODEC_ID_MSMPEG4V2:
1270         s->out_format = FMT_H263;
1271         s->h263_msmpeg4 = 1;
1272         s->h263_pred = 1;
1273         s->unrestricted_mv = 1;
1274         s->msmpeg4_version= 2;
1275         avctx->delay=0;
1276         s->low_delay=1;
1277         break;
1278     case CODEC_ID_MSMPEG4V3:
1279         s->out_format = FMT_H263;
1280         s->h263_msmpeg4 = 1;
1281         s->h263_pred = 1;
1282         s->unrestricted_mv = 1;
1283         s->msmpeg4_version= 3;
1284         s->flipflop_rounding=1;
1285         avctx->delay=0;
1286         s->low_delay=1;
1287         break;
1288     case CODEC_ID_WMV1:
1289         s->out_format = FMT_H263;
1290         s->h263_msmpeg4 = 1;
1291         s->h263_pred = 1;
1292         s->unrestricted_mv = 1;
1293         s->msmpeg4_version= 4;
1294         s->flipflop_rounding=1;
1295         avctx->delay=0;
1296         s->low_delay=1;
1297         break;
1298     case CODEC_ID_WMV2:
1299         s->out_format = FMT_H263;
1300         s->h263_msmpeg4 = 1;
1301         s->h263_pred = 1;
1302         s->unrestricted_mv = 1;
1303         s->msmpeg4_version= 5;
1304         s->flipflop_rounding=1;
1305         avctx->delay=0;
1306         s->low_delay=1;
1307         break;
1308     default:
1309         return -1;
1310     }
1311
1312     avctx->has_b_frames= !s->low_delay;
1313
1314     s->encoding = 1;
1315
1316     /* init */
1317     if (MPV_common_init(s) < 0)
1318         return -1;
1319
1320     if(s->modified_quant)
1321         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1322     s->progressive_frame=
1323     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1324     s->quant_precision=5;
1325
1326     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1327     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1328
1329 #ifdef CONFIG_H261_ENCODER
1330     if (s->out_format == FMT_H261)
1331         ff_h261_encode_init(s);
1332 #endif
1333     if (s->out_format == FMT_H263)
1334         h263_encode_init(s);
1335     if(s->msmpeg4_version)
1336         ff_msmpeg4_encode_init(s);
1337     if (s->out_format == FMT_MPEG1)
1338         ff_mpeg1_encode_init(s);
1339
1340     /* init q matrix */
1341     for(i=0;i<64;i++) {
1342         int j= s->dsp.idct_permutation[i];
1343         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1344             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1345             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1346         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1347             s->intra_matrix[j] =
1348             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1349         }else
1350         { /* mpeg1/2 */
1351             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1352             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1353         }
1354         if(s->avctx->intra_matrix)
1355             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1356         if(s->avctx->inter_matrix)
1357             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1358     }
1359
1360     /* precompute matrix */
1361     /* for mjpeg, we do include qscale in the matrix */
1362     if (s->out_format != FMT_MJPEG) {
1363         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1364                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1365         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1366                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1367     }
1368
1369     if(ff_rate_control_init(s) < 0)
1370         return -1;
1371
1372     return 0;
1373 }
1374
1375 int MPV_encode_end(AVCodecContext *avctx)
1376 {
1377     MpegEncContext *s = avctx->priv_data;
1378
1379     ff_rate_control_uninit(s);
1380
1381     MPV_common_end(s);
1382     if (s->out_format == FMT_MJPEG)
1383         mjpeg_close(s);
1384
1385     av_freep(&avctx->extradata);
1386
1387     return 0;
1388 }
1389
1390 #endif //CONFIG_ENCODERS
1391
1392 void init_rl(RLTable *rl, int use_static)
1393 {
1394     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1395     uint8_t index_run[MAX_RUN+1];
1396     int last, run, level, start, end, i;
1397
1398     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1399     if(use_static && rl->max_level[0])
1400         return;
1401
1402     /* compute max_level[], max_run[] and index_run[] */
1403     for(last=0;last<2;last++) {
1404         if (last == 0) {
1405             start = 0;
1406             end = rl->last;
1407         } else {
1408             start = rl->last;
1409             end = rl->n;
1410         }
1411
1412         memset(max_level, 0, MAX_RUN + 1);
1413         memset(max_run, 0, MAX_LEVEL + 1);
1414         memset(index_run, rl->n, MAX_RUN + 1);
1415         for(i=start;i<end;i++) {
1416             run = rl->table_run[i];
1417             level = rl->table_level[i];
1418             if (index_run[run] == rl->n)
1419                 index_run[run] = i;
1420             if (level > max_level[run])
1421                 max_level[run] = level;
1422             if (run > max_run[level])
1423                 max_run[level] = run;
1424         }
1425         if(use_static)
1426             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1427         else
1428             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1429         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1430         if(use_static)
1431             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1432         else
1433             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1434         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1435         if(use_static)
1436             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1437         else
1438             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1439         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1440     }
1441 }
1442
1443 /* draw the edges of width 'w' of an image of size width, height */
1444 //FIXME check that this is ok for mpeg4 interlaced
1445 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1446 {
1447     uint8_t *ptr, *last_line;
1448     int i;
1449
1450     last_line = buf + (height - 1) * wrap;
1451     for(i=0;i<w;i++) {
1452         /* top and bottom */
1453         memcpy(buf - (i + 1) * wrap, buf, width);
1454         memcpy(last_line + (i + 1) * wrap, last_line, width);
1455     }
1456     /* left and right */
1457     ptr = buf;
1458     for(i=0;i<height;i++) {
1459         memset(ptr - w, ptr[0], w);
1460         memset(ptr + width, ptr[width-1], w);
1461         ptr += wrap;
1462     }
1463     /* corners */
1464     for(i=0;i<w;i++) {
1465         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1466         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1467         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1468         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1469     }
1470 }
1471
1472 int ff_find_unused_picture(MpegEncContext *s, int shared){
1473     int i;
1474
1475     if(shared){
1476         for(i=0; i<MAX_PICTURE_COUNT; i++){
1477             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1478         }
1479     }else{
1480         for(i=0; i<MAX_PICTURE_COUNT; i++){
1481             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1482         }
1483         for(i=0; i<MAX_PICTURE_COUNT; i++){
1484             if(s->picture[i].data[0]==NULL) return i;
1485         }
1486     }
1487
1488     assert(0);
1489     return -1;
1490 }
1491
1492 static void update_noise_reduction(MpegEncContext *s){
1493     int intra, i;
1494
1495     for(intra=0; intra<2; intra++){
1496         if(s->dct_count[intra] > (1<<16)){
1497             for(i=0; i<64; i++){
1498                 s->dct_error_sum[intra][i] >>=1;
1499             }
1500             s->dct_count[intra] >>= 1;
1501         }
1502
1503         for(i=0; i<64; i++){
1504             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1505         }
1506     }
1507 }
1508
1509 /**
1510  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1511  */
1512 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1513 {
1514     int i;
1515     AVFrame *pic;
1516     s->mb_skipped = 0;
1517
1518     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1519
1520     /* mark&release old frames */
1521     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1522         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1523
1524         /* release forgotten pictures */
1525         /* if(mpeg124/h263) */
1526         if(!s->encoding){
1527             for(i=0; i<MAX_PICTURE_COUNT; i++){
1528                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1529                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1530                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1531                 }
1532             }
1533         }
1534     }
1535 alloc:
1536     if(!s->encoding){
1537         /* release non reference frames */
1538         for(i=0; i<MAX_PICTURE_COUNT; i++){
1539             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1540                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1541             }
1542         }
1543
1544         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1545             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1546         else{
1547             i= ff_find_unused_picture(s, 0);
1548             pic= (AVFrame*)&s->picture[i];
1549         }
1550
1551         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1552                         && !s->dropable ? 3 : 0;
1553
1554         pic->coded_picture_number= s->coded_picture_number++;
1555
1556         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1557             return -1;
1558
1559         s->current_picture_ptr= (Picture*)pic;
1560         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1561         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1562     }
1563
1564     s->current_picture_ptr->pict_type= s->pict_type;
1565 //    if(s->flags && CODEC_FLAG_QSCALE)
1566   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1567     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1568
1569     copy_picture(&s->current_picture, s->current_picture_ptr);
1570
1571   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1572     if (s->pict_type != B_TYPE) {
1573         s->last_picture_ptr= s->next_picture_ptr;
1574         if(!s->dropable)
1575             s->next_picture_ptr= s->current_picture_ptr;
1576     }
1577 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1578         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1579         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1580         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1581         s->pict_type, s->dropable);*/
1582
1583     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1584     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1585
1586     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1587         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1588         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1589         goto alloc;
1590     }
1591
1592     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1593
1594     if(s->picture_structure!=PICT_FRAME){
1595         int i;
1596         for(i=0; i<4; i++){
1597             if(s->picture_structure == PICT_BOTTOM_FIELD){
1598                  s->current_picture.data[i] += s->current_picture.linesize[i];
1599             }
1600             s->current_picture.linesize[i] *= 2;
1601             s->last_picture.linesize[i] *=2;
1602             s->next_picture.linesize[i] *=2;
1603         }
1604     }
1605   }
1606
1607     s->hurry_up= s->avctx->hurry_up;
1608     s->error_resilience= avctx->error_resilience;
1609
1610     /* set dequantizer, we can't do it during init as it might change for mpeg4
1611        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1612     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1613         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1614         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1615     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1616         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1617         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1618     }else{
1619         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1620         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1621     }
1622
1623     if(s->dct_error_sum){
1624         assert(s->avctx->noise_reduction && s->encoding);
1625
1626         update_noise_reduction(s);
1627     }
1628
1629 #ifdef HAVE_XVMC
1630     if(s->avctx->xvmc_acceleration)
1631         return XVMC_field_start(s, avctx);
1632 #endif
1633     return 0;
1634 }
1635
1636 /* generic function for encode/decode called after a frame has been coded/decoded */
1637 void MPV_frame_end(MpegEncContext *s)
1638 {
1639     int i;
1640     /* draw edge for correct motion prediction if outside */
1641 #ifdef HAVE_XVMC
1642 //just to make sure that all data is rendered.
1643     if(s->avctx->xvmc_acceleration){
1644         XVMC_field_end(s);
1645     }else
1646 #endif
1647     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1648             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1649             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1650             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1651     }
1652     emms_c();
1653
1654     s->last_pict_type    = s->pict_type;
1655     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1656     if(s->pict_type!=B_TYPE){
1657         s->last_non_b_pict_type= s->pict_type;
1658     }
1659 #if 0
1660         /* copy back current_picture variables */
1661     for(i=0; i<MAX_PICTURE_COUNT; i++){
1662         if(s->picture[i].data[0] == s->current_picture.data[0]){
1663             s->picture[i]= s->current_picture;
1664             break;
1665         }
1666     }
1667     assert(i<MAX_PICTURE_COUNT);
1668 #endif
1669
1670     if(s->encoding){
1671         /* release non-reference frames */
1672         for(i=0; i<MAX_PICTURE_COUNT; i++){
1673             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1674                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1675             }
1676         }
1677     }
1678     // clear copies, to avoid confusion
1679 #if 0
1680     memset(&s->last_picture, 0, sizeof(Picture));
1681     memset(&s->next_picture, 0, sizeof(Picture));
1682     memset(&s->current_picture, 0, sizeof(Picture));
1683 #endif
1684     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1685 }
1686
1687 /**
1688  * draws an line from (ex, ey) -> (sx, sy).
1689  * @param w width of the image
1690  * @param h height of the image
1691  * @param stride stride/linesize of the image
1692  * @param color color of the arrow
1693  */
1694 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1695     int x, y, fr, f;
1696
1697     sx= clip(sx, 0, w-1);
1698     sy= clip(sy, 0, h-1);
1699     ex= clip(ex, 0, w-1);
1700     ey= clip(ey, 0, h-1);
1701
1702     buf[sy*stride + sx]+= color;
1703
1704     if(ABS(ex - sx) > ABS(ey - sy)){
1705         if(sx > ex){
1706             SWAP(int, sx, ex);
1707             SWAP(int, sy, ey);
1708         }
1709         buf+= sx + sy*stride;
1710         ex-= sx;
1711         f= ((ey-sy)<<16)/ex;
1712         for(x= 0; x <= ex; x++){
1713             y = (x*f)>>16;
1714             fr= (x*f)&0xFFFF;
1715             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1716             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1717         }
1718     }else{
1719         if(sy > ey){
1720             SWAP(int, sx, ex);
1721             SWAP(int, sy, ey);
1722         }
1723         buf+= sx + sy*stride;
1724         ey-= sy;
1725         if(ey) f= ((ex-sx)<<16)/ey;
1726         else   f= 0;
1727         for(y= 0; y <= ey; y++){
1728             x = (y*f)>>16;
1729             fr= (y*f)&0xFFFF;
1730             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1731             buf[y*stride + x+1]+= (color*         fr )>>16;;
1732         }
1733     }
1734 }
1735
1736 /**
1737  * draws an arrow from (ex, ey) -> (sx, sy).
1738  * @param w width of the image
1739  * @param h height of the image
1740  * @param stride stride/linesize of the image
1741  * @param color color of the arrow
1742  */
1743 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1744     int dx,dy;
1745
1746     sx= clip(sx, -100, w+100);
1747     sy= clip(sy, -100, h+100);
1748     ex= clip(ex, -100, w+100);
1749     ey= clip(ey, -100, h+100);
1750
1751     dx= ex - sx;
1752     dy= ey - sy;
1753
1754     if(dx*dx + dy*dy > 3*3){
1755         int rx=  dx + dy;
1756         int ry= -dx + dy;
1757         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1758
1759         //FIXME subpixel accuracy
1760         rx= ROUNDED_DIV(rx*3<<4, length);
1761         ry= ROUNDED_DIV(ry*3<<4, length);
1762
1763         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1764         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1765     }
1766     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1767 }
1768
1769 /**
1770  * prints debuging info for the given picture.
1771  */
1772 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1773
1774     if(!pict || !pict->mb_type) return;
1775
1776     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1777         int x,y;
1778
1779         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1780         switch (pict->pict_type) {
1781             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1782             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1783             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1784             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1785             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1786             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1787         }
1788         for(y=0; y<s->mb_height; y++){
1789             for(x=0; x<s->mb_width; x++){
1790                 if(s->avctx->debug&FF_DEBUG_SKIP){
1791                     int count= s->mbskip_table[x + y*s->mb_stride];
1792                     if(count>9) count=9;
1793                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1794                 }
1795                 if(s->avctx->debug&FF_DEBUG_QP){
1796                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1797                 }
1798                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1799                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1800                     //Type & MV direction
1801                     if(IS_PCM(mb_type))
1802                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1803                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1804                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1805                     else if(IS_INTRA4x4(mb_type))
1806                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1807                     else if(IS_INTRA16x16(mb_type))
1808                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1809                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1810                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1811                     else if(IS_DIRECT(mb_type))
1812                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1813                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1814                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1815                     else if(IS_GMC(mb_type))
1816                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1817                     else if(IS_SKIP(mb_type))
1818                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1819                     else if(!USES_LIST(mb_type, 1))
1820                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1821                     else if(!USES_LIST(mb_type, 0))
1822                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1823                     else{
1824                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1825                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1826                     }
1827
1828                     //segmentation
1829                     if(IS_8X8(mb_type))
1830                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1831                     else if(IS_16X8(mb_type))
1832                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1833                     else if(IS_8X16(mb_type))
1834                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1835                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1836                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1837                     else
1838                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1839
1840
1841                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1842                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1843                     else
1844                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1845                 }
1846 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1847             }
1848             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1849         }
1850     }
1851
1852     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1853         const int shift= 1 + s->quarter_sample;
1854         int mb_y;
1855         uint8_t *ptr;
1856         int i;
1857         int h_chroma_shift, v_chroma_shift;
1858         const int width = s->avctx->width;
1859         const int height= s->avctx->height;
1860         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1861         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1862         s->low_delay=0; //needed to see the vectors without trashing the buffers
1863
1864         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1865         for(i=0; i<3; i++){
1866             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1867             pict->data[i]= s->visualization_buffer[i];
1868         }
1869         pict->type= FF_BUFFER_TYPE_COPY;
1870         ptr= pict->data[0];
1871
1872         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1873             int mb_x;
1874             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1875                 const int mb_index= mb_x + mb_y*s->mb_stride;
1876                 if((s->avctx->debug_mv) && pict->motion_val){
1877                   int type;
1878                   for(type=0; type<3; type++){
1879                     int direction = 0;
1880                     switch (type) {
1881                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1882                                 continue;
1883                               direction = 0;
1884                               break;
1885                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1886                                 continue;
1887                               direction = 0;
1888                               break;
1889                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1890                                 continue;
1891                               direction = 1;
1892                               break;
1893                     }
1894                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1895                         continue;
1896
1897                     if(IS_8X8(pict->mb_type[mb_index])){
1898                       int i;
1899                       for(i=0; i<4; i++){
1900                         int sx= mb_x*16 + 4 + 8*(i&1);
1901                         int sy= mb_y*16 + 4 + 8*(i>>1);
1902                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1903                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1904                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1905                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1906                       }
1907                     }else if(IS_16X8(pict->mb_type[mb_index])){
1908                       int i;
1909                       for(i=0; i<2; i++){
1910                         int sx=mb_x*16 + 8;
1911                         int sy=mb_y*16 + 4 + 8*i;
1912                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1913                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1914                         int my=(pict->motion_val[direction][xy][1]>>shift);
1915
1916                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1917                             my*=2;
1918
1919                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1920                       }
1921                     }else if(IS_8X16(pict->mb_type[mb_index])){
1922                       int i;
1923                       for(i=0; i<2; i++){
1924                         int sx=mb_x*16 + 4 + 8*i;
1925                         int sy=mb_y*16 + 8;
1926                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1927                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1928                         int my=(pict->motion_val[direction][xy][1]>>shift);
1929
1930                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1931                             my*=2;
1932
1933                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1934                       }
1935                     }else{
1936                       int sx= mb_x*16 + 8;
1937                       int sy= mb_y*16 + 8;
1938                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1939                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1940                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1941                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1942                     }
1943                   }
1944                 }
1945                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1946                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1947                     int y;
1948                     for(y=0; y<8; y++){
1949                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1950                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1951                     }
1952                 }
1953                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1954                     int mb_type= pict->mb_type[mb_index];
1955                     uint64_t u,v;
1956                     int y;
1957 #define COLOR(theta, r)\
1958 u= (int)(128 + r*cos(theta*3.141592/180));\
1959 v= (int)(128 + r*sin(theta*3.141592/180));
1960
1961
1962                     u=v=128;
1963                     if(IS_PCM(mb_type)){
1964                         COLOR(120,48)
1965                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1966                         COLOR(30,48)
1967                     }else if(IS_INTRA4x4(mb_type)){
1968                         COLOR(90,48)
1969                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1970 //                        COLOR(120,48)
1971                     }else if(IS_DIRECT(mb_type)){
1972                         COLOR(150,48)
1973                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1974                         COLOR(170,48)
1975                     }else if(IS_GMC(mb_type)){
1976                         COLOR(190,48)
1977                     }else if(IS_SKIP(mb_type)){
1978 //                        COLOR(180,48)
1979                     }else if(!USES_LIST(mb_type, 1)){
1980                         COLOR(240,48)
1981                     }else if(!USES_LIST(mb_type, 0)){
1982                         COLOR(0,48)
1983                     }else{
1984                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1985                         COLOR(300,48)
1986                     }
1987
1988                     u*= 0x0101010101010101ULL;
1989                     v*= 0x0101010101010101ULL;
1990                     for(y=0; y<8; y++){
1991                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1992                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1993                     }
1994
1995                     //segmentation
1996                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1997                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1998                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1999                     }
2000                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
2001                         for(y=0; y<16; y++)
2002                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
2003                     }
2004                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
2005                         int dm= 1 << (mv_sample_log2-2);
2006                         for(i=0; i<4; i++){
2007                             int sx= mb_x*16 + 8*(i&1);
2008                             int sy= mb_y*16 + 8*(i>>1);
2009                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
2010                             //FIXME bidir
2011                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
2012                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
2013                                 for(y=0; y<8; y++)
2014                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
2015                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
2016                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
2017                         }
2018                     }
2019
2020                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
2021                         // hmm
2022                     }
2023                 }
2024                 s->mbskip_table[mb_index]=0;
2025             }
2026         }
2027     }
2028 }
2029
2030 #ifdef CONFIG_ENCODERS
2031
2032 static int get_sae(uint8_t *src, int ref, int stride){
2033     int x,y;
2034     int acc=0;
2035
2036     for(y=0; y<16; y++){
2037         for(x=0; x<16; x++){
2038             acc+= ABS(src[x+y*stride] - ref);
2039         }
2040     }
2041
2042     return acc;
2043 }
2044
2045 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
2046     int x, y, w, h;
2047     int acc=0;
2048
2049     w= s->width &~15;
2050     h= s->height&~15;
2051
2052     for(y=0; y<h; y+=16){
2053         for(x=0; x<w; x+=16){
2054             int offset= x + y*stride;
2055             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
2056             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
2057             int sae = get_sae(src + offset, mean, stride);
2058
2059             acc+= sae + 500 < sad;
2060         }
2061     }
2062     return acc;
2063 }
2064
2065
2066 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2067     AVFrame *pic=NULL;
2068     int64_t pts;
2069     int i;
2070     const int encoding_delay= s->max_b_frames;
2071     int direct=1;
2072
2073     if(pic_arg){
2074         pts= pic_arg->pts;
2075         pic_arg->display_picture_number= s->input_picture_number++;
2076
2077         if(pts != AV_NOPTS_VALUE){
2078             if(s->user_specified_pts != AV_NOPTS_VALUE){
2079                 int64_t time= pts;
2080                 int64_t last= s->user_specified_pts;
2081
2082                 if(time <= last){
2083                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2084                     return -1;
2085                 }
2086             }
2087             s->user_specified_pts= pts;
2088         }else{
2089             if(s->user_specified_pts != AV_NOPTS_VALUE){
2090                 s->user_specified_pts=
2091                 pts= s->user_specified_pts + 1;
2092                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2093             }else{
2094                 pts= pic_arg->display_picture_number;
2095             }
2096         }
2097     }
2098
2099   if(pic_arg){
2100     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2101     if(pic_arg->linesize[0] != s->linesize) direct=0;
2102     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2103     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2104
2105 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2106
2107     if(direct){
2108         i= ff_find_unused_picture(s, 1);
2109
2110         pic= (AVFrame*)&s->picture[i];
2111         pic->reference= 3;
2112
2113         for(i=0; i<4; i++){
2114             pic->data[i]= pic_arg->data[i];
2115             pic->linesize[i]= pic_arg->linesize[i];
2116         }
2117         alloc_picture(s, (Picture*)pic, 1);
2118     }else{
2119         i= ff_find_unused_picture(s, 0);
2120
2121         pic= (AVFrame*)&s->picture[i];
2122         pic->reference= 3;
2123
2124         alloc_picture(s, (Picture*)pic, 0);
2125
2126         if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
2127            && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
2128            && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
2129        // empty
2130         }else{
2131             int h_chroma_shift, v_chroma_shift;
2132             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2133
2134             for(i=0; i<3; i++){
2135                 int src_stride= pic_arg->linesize[i];
2136                 int dst_stride= i ? s->uvlinesize : s->linesize;
2137                 int h_shift= i ? h_chroma_shift : 0;
2138                 int v_shift= i ? v_chroma_shift : 0;
2139                 int w= s->width >>h_shift;
2140                 int h= s->height>>v_shift;
2141                 uint8_t *src= pic_arg->data[i];
2142                 uint8_t *dst= pic->data[i];
2143
2144                 if(!s->avctx->rc_buffer_size)
2145                     dst +=INPLACE_OFFSET;
2146
2147                 if(src_stride==dst_stride)
2148                     memcpy(dst, src, src_stride*h);
2149                 else{
2150                     while(h--){
2151                         memcpy(dst, src, w);
2152                         dst += dst_stride;
2153                         src += src_stride;
2154                     }
2155                 }
2156             }
2157         }
2158     }
2159     copy_picture_attributes(s, pic, pic_arg);
2160     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2161   }
2162
2163     /* shift buffer entries */
2164     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2165         s->input_picture[i-1]= s->input_picture[i];
2166
2167     s->input_picture[encoding_delay]= (Picture*)pic;
2168
2169     return 0;
2170 }
2171
2172 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2173     int x, y, plane;
2174     int score=0;
2175     int64_t score64=0;
2176
2177     for(plane=0; plane<3; plane++){
2178         const int stride= p->linesize[plane];
2179         const int bw= plane ? 1 : 2;
2180         for(y=0; y<s->mb_height*bw; y++){
2181             for(x=0; x<s->mb_width*bw; x++){
2182                 int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2183                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2184
2185                 switch(s->avctx->frame_skip_exp){
2186                     case 0: score= FFMAX(score, v); break;
2187                     case 1: score+= ABS(v);break;
2188                     case 2: score+= v*v;break;
2189                     case 3: score64+= ABS(v*v*(int64_t)v);break;
2190                     case 4: score64+= v*v*(int64_t)(v*v);break;
2191                 }
2192             }
2193         }
2194     }
2195
2196     if(score) score64= score;
2197
2198     if(score64 < s->avctx->frame_skip_threshold)
2199         return 1;
2200     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2201         return 1;
2202     return 0;
2203 }
2204
2205 static int estimate_best_b_count(MpegEncContext *s){
2206     AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2207     AVCodecContext *c= avcodec_alloc_context();
2208     AVFrame input[FF_MAX_B_FRAMES+2];
2209     const int scale= s->avctx->brd_scale;
2210     int i, j, out_size, p_lambda, b_lambda, lambda2;
2211     int outbuf_size= s->width * s->height; //FIXME
2212     uint8_t *outbuf= av_malloc(outbuf_size);
2213     int64_t best_rd= INT64_MAX;
2214     int best_b_count= -1;
2215
2216     assert(scale>=0 && scale <=3);
2217
2218 //    emms_c();
2219     p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
2220     b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *ABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
2221     if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
2222     lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
2223
2224     c->width = s->width >> scale;
2225     c->height= s->height>> scale;
2226     c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2227     c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2228     c->mb_decision= s->avctx->mb_decision;
2229     c->me_cmp= s->avctx->me_cmp;
2230     c->mb_cmp= s->avctx->mb_cmp;
2231     c->me_sub_cmp= s->avctx->me_sub_cmp;
2232     c->pix_fmt = PIX_FMT_YUV420P;
2233     c->time_base= s->avctx->time_base;
2234     c->max_b_frames= s->max_b_frames;
2235
2236     if (avcodec_open(c, codec) < 0)
2237         return -1;
2238
2239     for(i=0; i<s->max_b_frames+2; i++){
2240         int ysize= c->width*c->height;
2241         int csize= (c->width/2)*(c->height/2);
2242         Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
2243
2244         if(pre_input_ptr)
2245             pre_input= *pre_input_ptr;
2246
2247         if(pre_input.type != FF_BUFFER_TYPE_SHARED && i){
2248             pre_input.data[0]+=INPLACE_OFFSET;
2249             pre_input.data[1]+=INPLACE_OFFSET;
2250             pre_input.data[2]+=INPLACE_OFFSET;
2251         }
2252
2253         avcodec_get_frame_defaults(&input[i]);
2254         input[i].data[0]= av_malloc(ysize + 2*csize);
2255         input[i].data[1]= input[i].data[0] + ysize;
2256         input[i].data[2]= input[i].data[1] + csize;
2257         input[i].linesize[0]= c->width;
2258         input[i].linesize[1]=
2259         input[i].linesize[2]= c->width/2;
2260
2261         if(!i || s->input_picture[i-1]){
2262             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0], pre_input.data[0], pre_input.linesize[0], c->width, c->height);
2263             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1], pre_input.data[1], pre_input.linesize[1], c->width>>1, c->height>>1);
2264             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2], pre_input.data[2], pre_input.linesize[2], c->width>>1, c->height>>1);
2265         }
2266     }
2267
2268     for(j=0; j<s->max_b_frames+1; j++){
2269         int64_t rd=0;
2270
2271         if(!s->input_picture[j])
2272             break;
2273
2274         c->error[0]= c->error[1]= c->error[2]= 0;
2275
2276         input[0].pict_type= I_TYPE;
2277         input[0].quality= 1 * FF_QP2LAMBDA;
2278         out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2279 //        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2280
2281         for(i=0; i<s->max_b_frames+1; i++){
2282             int is_p= i % (j+1) == j || i==s->max_b_frames;
2283
2284             input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2285             input[i+1].quality= is_p ? p_lambda : b_lambda;
2286             out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2287             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2288         }
2289
2290         /* get the delayed frames */
2291         while(out_size){
2292             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2293             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2294         }
2295
2296         rd += c->error[0] + c->error[1] + c->error[2];
2297
2298         if(rd < best_rd){
2299             best_rd= rd;
2300             best_b_count= j;
2301         }
2302     }
2303
2304     av_freep(&outbuf);
2305     avcodec_close(c);
2306     av_freep(&c);
2307
2308     for(i=0; i<s->max_b_frames+2; i++){
2309         av_freep(&input[i].data[0]);
2310     }
2311
2312     return best_b_count;
2313 }
2314
2315 static void select_input_picture(MpegEncContext *s){
2316     int i;
2317
2318     for(i=1; i<MAX_PICTURE_COUNT; i++)
2319         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2320     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2321
2322     /* set next picture type & ordering */
2323     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2324         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2325             s->reordered_input_picture[0]= s->input_picture[0];
2326             s->reordered_input_picture[0]->pict_type= I_TYPE;
2327             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2328         }else{
2329             int b_frames;
2330
2331             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2332                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2333                 //FIXME check that te gop check above is +-1 correct
2334 //av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2335
2336                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2337                         for(i=0; i<4; i++)
2338                             s->input_picture[0]->data[i]= NULL;
2339                         s->input_picture[0]->type= 0;
2340                     }else{
2341                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2342                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2343
2344                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2345                     }
2346
2347                     emms_c();
2348                     ff_vbv_update(s, 0);
2349
2350                     goto no_output_pic;
2351                 }
2352             }
2353
2354             if(s->flags&CODEC_FLAG_PASS2){
2355                 for(i=0; i<s->max_b_frames+1; i++){
2356                     int pict_num= s->input_picture[0]->display_picture_number + i;
2357
2358                     if(pict_num >= s->rc_context.num_entries)
2359                         break;
2360                     if(!s->input_picture[i]){
2361                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2362                         break;
2363                     }
2364
2365                     s->input_picture[i]->pict_type=
2366                         s->rc_context.entry[pict_num].new_pict_type;
2367                 }
2368             }
2369
2370             if(s->avctx->b_frame_strategy==0){
2371                 b_frames= s->max_b_frames;
2372                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2373             }else if(s->avctx->b_frame_strategy==1){
2374                 for(i=1; i<s->max_b_frames+1; i++){
2375                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2376                         s->input_picture[i]->b_frame_score=
2377                             get_intra_count(s, s->input_picture[i  ]->data[0],
2378                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2379                     }
2380                 }
2381                 for(i=0; i<s->max_b_frames+1; i++){
2382                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/s->avctx->b_sensitivity) break;
2383                 }
2384
2385                 b_frames= FFMAX(0, i-1);
2386
2387                 /* reset scores */
2388                 for(i=0; i<b_frames+1; i++){
2389                     s->input_picture[i]->b_frame_score=0;
2390                 }
2391             }else if(s->avctx->b_frame_strategy==2){
2392                 b_frames= estimate_best_b_count(s);
2393             }else{
2394                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2395                 b_frames=0;
2396             }
2397
2398             emms_c();
2399 //static int b_count=0;
2400 //b_count+= b_frames;
2401 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2402
2403             for(i= b_frames - 1; i>=0; i--){
2404                 int type= s->input_picture[i]->pict_type;
2405                 if(type && type != B_TYPE)
2406                     b_frames= i;
2407             }
2408             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2409                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2410             }
2411
2412             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2413               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2414                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2415               }else{
2416                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2417                     b_frames=0;
2418                 s->input_picture[b_frames]->pict_type= I_TYPE;
2419               }
2420             }
2421
2422             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2423                && b_frames
2424                && s->input_picture[b_frames]->pict_type== I_TYPE)
2425                 b_frames--;
2426
2427             s->reordered_input_picture[0]= s->input_picture[b_frames];
2428             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2429                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2430             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2431             for(i=0; i<b_frames; i++){
2432                 s->reordered_input_picture[i+1]= s->input_picture[i];
2433                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2434                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2435             }
2436         }
2437     }
2438 no_output_pic:
2439     if(s->reordered_input_picture[0]){
2440         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2441
2442         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2443
2444         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED || s->avctx->rc_buffer_size){
2445             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2446
2447             int i= ff_find_unused_picture(s, 0);
2448             Picture *pic= &s->picture[i];
2449
2450             pic->reference              = s->reordered_input_picture[0]->reference;
2451             alloc_picture(s, pic, 0);
2452
2453             /* mark us unused / free shared pic */
2454             if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_INTERNAL)
2455                 s->avctx->release_buffer(s->avctx, (AVFrame*)s->reordered_input_picture[0]);
2456             for(i=0; i<4; i++)
2457                 s->reordered_input_picture[0]->data[i]= NULL;
2458             s->reordered_input_picture[0]->type= 0;
2459
2460             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2461
2462             s->current_picture_ptr= pic;
2463         }else{
2464             // input is not a shared pix -> reuse buffer for current_pix
2465
2466             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2467                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2468
2469             s->current_picture_ptr= s->reordered_input_picture[0];
2470             for(i=0; i<4; i++){
2471                 s->new_picture.data[i]+= INPLACE_OFFSET;
2472             }
2473         }
2474         copy_picture(&s->current_picture, s->current_picture_ptr);
2475
2476         s->picture_number= s->new_picture.display_picture_number;
2477 //printf("dpn:%d\n", s->picture_number);
2478     }else{
2479        memset(&s->new_picture, 0, sizeof(Picture));
2480     }
2481 }
2482
2483 int MPV_encode_picture(AVCodecContext *avctx,
2484                        unsigned char *buf, int buf_size, void *data)
2485 {
2486     MpegEncContext *s = avctx->priv_data;
2487     AVFrame *pic_arg = data;
2488     int i, stuffing_count;
2489
2490     for(i=0; i<avctx->thread_count; i++){
2491         int start_y= s->thread_context[i]->start_mb_y;
2492         int   end_y= s->thread_context[i]->  end_mb_y;
2493         int h= s->mb_height;
2494         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2495         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2496
2497         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2498     }
2499
2500     s->picture_in_gop_number++;
2501
2502     if(load_input_picture(s, pic_arg) < 0)
2503         return -1;
2504
2505     select_input_picture(s);
2506
2507     /* output? */
2508     if(s->new_picture.data[0]){
2509         s->pict_type= s->new_picture.pict_type;
2510 //emms_c();
2511 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2512         MPV_frame_start(s, avctx);
2513 vbv_retry:
2514         if (encode_picture(s, s->picture_number) < 0)
2515             return -1;
2516
2517         avctx->real_pict_num  = s->picture_number;
2518         avctx->header_bits = s->header_bits;
2519         avctx->mv_bits     = s->mv_bits;
2520         avctx->misc_bits   = s->misc_bits;
2521         avctx->i_tex_bits  = s->i_tex_bits;
2522         avctx->p_tex_bits  = s->p_tex_bits;
2523         avctx->i_count     = s->i_count;
2524         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2525         avctx->skip_count  = s->skip_count;
2526
2527         MPV_frame_end(s);
2528
2529         if (s->out_format == FMT_MJPEG)
2530             mjpeg_picture_trailer(s);
2531
2532         if(avctx->rc_buffer_size){
2533             RateControlContext *rcc= &s->rc_context;
2534             int max_size= rcc->buffer_index/3;
2535
2536             if(put_bits_count(&s->pb) > max_size && s->qscale < s->avctx->qmax){
2537                 s->next_lambda= s->lambda*(s->qscale+1) / s->qscale;
2538                 s->mb_skipped = 0;        //done in MPV_frame_start()
2539                 if(s->pict_type==P_TYPE){ //done in encode_picture() so we must undo it
2540                     if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
2541                         s->no_rounding ^= 1;
2542                 }
2543 //                av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
2544                 for(i=0; i<avctx->thread_count; i++){
2545                     PutBitContext *pb= &s->thread_context[i]->pb;
2546                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
2547                 }
2548                 goto vbv_retry;
2549             }
2550
2551             assert(s->avctx->rc_max_rate);
2552         }
2553
2554         if(s->flags&CODEC_FLAG_PASS1)
2555             ff_write_pass1_stats(s);
2556
2557         for(i=0; i<4; i++){
2558             s->current_picture_ptr->error[i]= s->current_picture.error[i];
2559             avctx->error[i] += s->current_picture_ptr->error[i];
2560         }
2561
2562         if(s->flags&CODEC_FLAG_PASS1)
2563             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2564         flush_put_bits(&s->pb);
2565         s->frame_bits  = put_bits_count(&s->pb);
2566
2567         stuffing_count= ff_vbv_update(s, s->frame_bits);
2568         if(stuffing_count){
2569             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2570                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2571                 return -1;
2572             }
2573
2574             switch(s->codec_id){
2575             case CODEC_ID_MPEG1VIDEO:
2576             case CODEC_ID_MPEG2VIDEO:
2577                 while(stuffing_count--){
2578                     put_bits(&s->pb, 8, 0);
2579                 }
2580             break;
2581             case CODEC_ID_MPEG4:
2582                 put_bits(&s->pb, 16, 0);
2583                 put_bits(&s->pb, 16, 0x1C3);
2584                 stuffing_count -= 4;
2585                 while(stuffing_count--){
2586                     put_bits(&s->pb, 8, 0xFF);
2587                 }
2588             break;
2589             default:
2590                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2591             }
2592             flush_put_bits(&s->pb);
2593             s->frame_bits  = put_bits_count(&s->pb);
2594         }
2595
2596         /* update mpeg1/2 vbv_delay for CBR */
2597         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2598            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2599             int vbv_delay;
2600
2601             assert(s->repeat_first_field==0);
2602
2603             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2604             assert(vbv_delay < 0xFFFF);
2605
2606             s->vbv_delay_ptr[0] &= 0xF8;
2607             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2608             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2609             s->vbv_delay_ptr[2] &= 0x07;
2610             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2611         }
2612         s->total_bits += s->frame_bits;
2613         avctx->frame_bits  = s->frame_bits;
2614     }else{
2615         assert((pbBufPtr(&s->pb) == s->pb.buf));
2616         s->frame_bits=0;
2617     }
2618     assert((s->frame_bits&7)==0);
2619
2620     return s->frame_bits/8;
2621 }
2622
2623 #endif //CONFIG_ENCODERS
2624
2625 static inline void gmc1_motion(MpegEncContext *s,
2626                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2627                                uint8_t **ref_picture)
2628 {
2629     uint8_t *ptr;
2630     int offset, src_x, src_y, linesize, uvlinesize;
2631     int motion_x, motion_y;
2632     int emu=0;
2633
2634     motion_x= s->sprite_offset[0][0];
2635     motion_y= s->sprite_offset[0][1];
2636     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2637     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2638     motion_x<<=(3-s->sprite_warping_accuracy);
2639     motion_y<<=(3-s->sprite_warping_accuracy);
2640     src_x = clip(src_x, -16, s->width);
2641     if (src_x == s->width)
2642         motion_x =0;
2643     src_y = clip(src_y, -16, s->height);
2644     if (src_y == s->height)
2645         motion_y =0;
2646
2647     linesize = s->linesize;
2648     uvlinesize = s->uvlinesize;
2649
2650     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2651
2652     if(s->flags&CODEC_FLAG_EMU_EDGE){
2653         if(   (unsigned)src_x >= s->h_edge_pos - 17
2654            || (unsigned)src_y >= s->v_edge_pos - 17){
2655             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2656             ptr= s->edge_emu_buffer;
2657         }
2658     }
2659
2660     if((motion_x|motion_y)&7){
2661         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2662         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2663     }else{
2664         int dxy;
2665
2666         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2667         if (s->no_rounding){
2668             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2669         }else{
2670             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2671         }
2672     }
2673
2674     if(s->flags&CODEC_FLAG_GRAY) return;
2675
2676     motion_x= s->sprite_offset[1][0];
2677     motion_y= s->sprite_offset[1][1];
2678     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2679     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2680     motion_x<<=(3-s->sprite_warping_accuracy);
2681     motion_y<<=(3-s->sprite_warping_accuracy);
2682     src_x = clip(src_x, -8, s->width>>1);
2683     if (src_x == s->width>>1)
2684         motion_x =0;
2685     src_y = clip(src_y, -8, s->height>>1);
2686     if (src_y == s->height>>1)
2687         motion_y =0;
2688
2689     offset = (src_y * uvlinesize) + src_x;
2690     ptr = ref_picture[1] + offset;
2691     if(s->flags&CODEC_FLAG_EMU_EDGE){
2692         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2693            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2694             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2695             ptr= s->edge_emu_buffer;
2696             emu=1;
2697         }
2698     }
2699     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2700
2701     ptr = ref_picture[2] + offset;
2702     if(emu){
2703         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2704         ptr= s->edge_emu_buffer;
2705     }
2706     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2707
2708     return;
2709 }
2710
2711 static inline void gmc_motion(MpegEncContext *s,
2712                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2713                                uint8_t **ref_picture)
2714 {
2715     uint8_t *ptr;
2716     int linesize, uvlinesize;
2717     const int a= s->sprite_warping_accuracy;
2718     int ox, oy;
2719
2720     linesize = s->linesize;
2721     uvlinesize = s->uvlinesize;
2722
2723     ptr = ref_picture[0];
2724
2725     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2726     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2727
2728     s->dsp.gmc(dest_y, ptr, linesize, 16,
2729            ox,
2730            oy,
2731            s->sprite_delta[0][0], s->sprite_delta[0][1],
2732            s->sprite_delta[1][0], s->sprite_delta[1][1],
2733            a+1, (1<<(2*a+1)) - s->no_rounding,
2734            s->h_edge_pos, s->v_edge_pos);
2735     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2736            ox + s->sprite_delta[0][0]*8,
2737            oy + s->sprite_delta[1][0]*8,
2738            s->sprite_delta[0][0], s->sprite_delta[0][1],
2739            s->sprite_delta[1][0], s->sprite_delta[1][1],
2740            a+1, (1<<(2*a+1)) - s->no_rounding,
2741            s->h_edge_pos, s->v_edge_pos);
2742
2743     if(s->flags&CODEC_FLAG_GRAY) return;
2744
2745     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2746     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2747
2748     ptr = ref_picture[1];
2749     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2750            ox,
2751            oy,
2752            s->sprite_delta[0][0], s->sprite_delta[0][1],
2753            s->sprite_delta[1][0], s->sprite_delta[1][1],
2754            a+1, (1<<(2*a+1)) - s->no_rounding,
2755            s->h_edge_pos>>1, s->v_edge_pos>>1);
2756
2757     ptr = ref_picture[2];
2758     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2759            ox,
2760            oy,
2761            s->sprite_delta[0][0], s->sprite_delta[0][1],
2762            s->sprite_delta[1][0], s->sprite_delta[1][1],
2763            a+1, (1<<(2*a+1)) - s->no_rounding,
2764            s->h_edge_pos>>1, s->v_edge_pos>>1);
2765 }
2766
2767 /**
2768  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2769  * @param buf destination buffer
2770  * @param src source buffer
2771  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2772  * @param block_w width of block
2773  * @param block_h height of block
2774  * @param src_x x coordinate of the top left sample of the block in the source buffer
2775  * @param src_y y coordinate of the top left sample of the block in the source buffer
2776  * @param w width of the source buffer
2777  * @param h height of the source buffer
2778  */
2779 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2780                                     int src_x, int src_y, int w, int h){
2781     int x, y;
2782     int start_y, start_x, end_y, end_x;
2783
2784     if(src_y>= h){
2785         src+= (h-1-src_y)*linesize;
2786         src_y=h-1;
2787     }else if(src_y<=-block_h){
2788         src+= (1-block_h-src_y)*linesize;
2789         src_y=1-block_h;
2790     }
2791     if(src_x>= w){
2792         src+= (w-1-src_x);
2793         src_x=w-1;
2794     }else if(src_x<=-block_w){
2795         src+= (1-block_w-src_x);
2796         src_x=1-block_w;
2797     }
2798
2799     start_y= FFMAX(0, -src_y);
2800     start_x= FFMAX(0, -src_x);
2801     end_y= FFMIN(block_h, h-src_y);
2802     end_x= FFMIN(block_w, w-src_x);
2803
2804     // copy existing part
2805     for(y=start_y; y<end_y; y++){
2806         for(x=start_x; x<end_x; x++){
2807             buf[x + y*linesize]= src[x + y*linesize];
2808         }
2809     }
2810
2811     //top
2812     for(y=0; y<start_y; y++){
2813         for(x=start_x; x<end_x; x++){
2814             buf[x + y*linesize]= buf[x + start_y*linesize];
2815         }
2816     }
2817
2818     //bottom
2819     for(y=end_y; y<block_h; y++){
2820         for(x=start_x; x<end_x; x++){
2821             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2822         }
2823     }
2824
2825     for(y=0; y<block_h; y++){
2826        //left
2827         for(x=0; x<start_x; x++){
2828             buf[x + y*linesize]= buf[start_x + y*linesize];
2829         }
2830
2831        //right
2832         for(x=end_x; x<block_w; x++){
2833             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2834         }
2835     }
2836 }
2837
2838 static inline int hpel_motion(MpegEncContext *s,
2839                                   uint8_t *dest, uint8_t *src,
2840                                   int field_based, int field_select,
2841                                   int src_x, int src_y,
2842                                   int width, int height, int stride,
2843                                   int h_edge_pos, int v_edge_pos,
2844                                   int w, int h, op_pixels_func *pix_op,
2845                                   int motion_x, int motion_y)
2846 {
2847     int dxy;
2848     int emu=0;
2849
2850     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2851     src_x += motion_x >> 1;
2852     src_y += motion_y >> 1;
2853
2854     /* WARNING: do no forget half pels */
2855     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2856     if (src_x == width)
2857         dxy &= ~1;
2858     src_y = clip(src_y, -16, height);
2859     if (src_y == height)
2860         dxy &= ~2;
2861     src += src_y * stride + src_x;
2862
2863     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2864         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2865            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2866             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2867                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2868             src= s->edge_emu_buffer;
2869             emu=1;
2870         }
2871     }
2872     if(field_select)
2873         src += s->linesize;
2874     pix_op[dxy](dest, src, stride, h);
2875     return emu;
2876 }
2877
2878 static inline int hpel_motion_lowres(MpegEncContext *s,
2879                                   uint8_t *dest, uint8_t *src,
2880                                   int field_based, int field_select,
2881                                   int src_x, int src_y,
2882                                   int width, int height, int stride,
2883                                   int h_edge_pos, int v_edge_pos,
2884                                   int w, int h, h264_chroma_mc_func *pix_op,
2885                                   int motion_x, int motion_y)
2886 {
2887     const int lowres= s->avctx->lowres;
2888     const int s_mask= (2<<lowres)-1;
2889     int emu=0;
2890     int sx, sy;
2891
2892     if(s->quarter_sample){
2893         motion_x/=2;
2894         motion_y/=2;
2895     }
2896
2897     sx= motion_x & s_mask;
2898     sy= motion_y & s_mask;
2899     src_x += motion_x >> (lowres+1);
2900     src_y += motion_y >> (lowres+1);
2901
2902     src += src_y * stride + src_x;
2903
2904     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2905        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2906         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2907                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2908         src= s->edge_emu_buffer;
2909         emu=1;
2910     }
2911
2912     sx <<= 2 - lowres;
2913     sy <<= 2 - lowres;
2914     if(field_select)
2915         src += s->linesize;
2916     pix_op[lowres](dest, src, stride, h, sx, sy);
2917     return emu;
2918 }
2919
2920 /* apply one mpeg motion vector to the three components */
2921 static always_inline void mpeg_motion(MpegEncContext *s,
2922                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2923                                int field_based, int bottom_field, int field_select,
2924                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2925                                int motion_x, int motion_y, int h)
2926 {
2927     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2928     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2929
2930 #if 0
2931 if(s->quarter_sample)
2932 {
2933     motion_x>>=1;
2934     motion_y>>=1;
2935 }
2936 #endif
2937
2938     v_edge_pos = s->v_edge_pos >> field_based;
2939     linesize   = s->current_picture.linesize[0] << field_based;
2940     uvlinesize = s->current_picture.linesize[1] << field_based;
2941
2942     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2943     src_x = s->mb_x* 16               + (motion_x >> 1);
2944     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2945
2946     if (s->out_format == FMT_H263) {
2947         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2948             mx = (motion_x>>1)|(motion_x&1);
2949             my = motion_y >>1;
2950             uvdxy = ((my & 1) << 1) | (mx & 1);
2951             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2952             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2953         }else{
2954             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2955             uvsrc_x = src_x>>1;
2956             uvsrc_y = src_y>>1;
2957         }
2958     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2959         mx = motion_x / 4;
2960         my = motion_y / 4;
2961         uvdxy = 0;
2962         uvsrc_x = s->mb_x*8 + mx;
2963         uvsrc_y = s->mb_y*8 + my;
2964     } else {
2965         if(s->chroma_y_shift){
2966             mx = motion_x / 2;
2967             my = motion_y / 2;
2968             uvdxy = ((my & 1) << 1) | (mx & 1);
2969             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2970             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2971         } else {
2972             if(s->chroma_x_shift){
2973             //Chroma422
2974                 mx = motion_x / 2;
2975                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2976                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2977                 uvsrc_y = src_y;
2978             } else {
2979             //Chroma444
2980                 uvdxy = dxy;
2981                 uvsrc_x = src_x;
2982                 uvsrc_y = src_y;
2983             }
2984         }
2985     }
2986
2987     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2988     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2989     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2990
2991     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2992        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2993             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2994                s->codec_id == CODEC_ID_MPEG1VIDEO){
2995                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2996                 return ;
2997             }
2998             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2999                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3000             ptr_y = s->edge_emu_buffer;
3001             if(!(s->flags&CODEC_FLAG_GRAY)){
3002                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3003                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3004                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3005                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3006                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3007                 ptr_cb= uvbuf;
3008                 ptr_cr= uvbuf+16;
3009             }
3010     }
3011
3012     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3013         dest_y += s->linesize;
3014         dest_cb+= s->uvlinesize;
3015         dest_cr+= s->uvlinesize;
3016     }
3017
3018     if(field_select){
3019         ptr_y += s->linesize;
3020         ptr_cb+= s->uvlinesize;
3021         ptr_cr+= s->uvlinesize;
3022     }
3023
3024     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
3025
3026     if(!(s->flags&CODEC_FLAG_GRAY)){
3027         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
3028         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
3029     }
3030 #if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
3031     if(s->out_format == FMT_H261){
3032         ff_h261_loop_filter(s);
3033     }
3034 #endif
3035 }
3036
3037 /* apply one mpeg motion vector to the three components */
3038 static always_inline void mpeg_motion_lowres(MpegEncContext *s,
3039                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3040                                int field_based, int bottom_field, int field_select,
3041                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
3042                                int motion_x, int motion_y, int h)
3043 {
3044     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3045     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
3046     const int lowres= s->avctx->lowres;
3047     const int block_s= 8>>lowres;
3048     const int s_mask= (2<<lowres)-1;
3049     const int h_edge_pos = s->h_edge_pos >> lowres;
3050     const int v_edge_pos = s->v_edge_pos >> lowres;
3051     linesize   = s->current_picture.linesize[0] << field_based;
3052     uvlinesize = s->current_picture.linesize[1] << field_based;
3053
3054     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
3055         motion_x/=2;
3056         motion_y/=2;
3057     }
3058
3059     if(field_based){
3060         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
3061     }
3062
3063     sx= motion_x & s_mask;
3064     sy= motion_y & s_mask;
3065     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
3066     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
3067
3068     if (s->out_format == FMT_H263) {
3069         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
3070         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
3071         uvsrc_x = src_x>>1;
3072         uvsrc_y = src_y>>1;
3073     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
3074         mx = motion_x / 4;
3075         my = motion_y / 4;
3076         uvsx = (2*mx) & s_mask;
3077         uvsy = (2*my) & s_mask;
3078         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
3079         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
3080     } else {
3081         mx = motion_x / 2;
3082         my = motion_y / 2;
3083         uvsx = mx & s_mask;
3084         uvsy = my & s_mask;
3085         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
3086         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
3087     }
3088
3089     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3090     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3091     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3092
3093     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
3094        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
3095             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3096                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3097             ptr_y = s->edge_emu_buffer;
3098             if(!(s->flags&CODEC_FLAG_GRAY)){
3099                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3100                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3101                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3102                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3103                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3104                 ptr_cb= uvbuf;
3105                 ptr_cr= uvbuf+16;
3106             }
3107     }
3108
3109     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3110         dest_y += s->linesize;
3111         dest_cb+= s->uvlinesize;
3112         dest_cr+= s->uvlinesize;
3113     }
3114
3115     if(field_select){
3116         ptr_y += s->linesize;
3117         ptr_cb+= s->uvlinesize;
3118         ptr_cr+= s->uvlinesize;
3119     }
3120
3121     sx <<= 2 - lowres;
3122     sy <<= 2 - lowres;
3123     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3124
3125     if(!(s->flags&CODEC_FLAG_GRAY)){
3126         uvsx <<= 2 - lowres;
3127         uvsy <<= 2 - lowres;
3128         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3129         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3130     }
3131     //FIXME h261 lowres loop filter
3132 }
3133
3134 //FIXME move to dsputil, avg variant, 16x16 version
3135 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3136     int x;
3137     uint8_t * const top   = src[1];
3138     uint8_t * const left  = src[2];
3139     uint8_t * const mid   = src[0];
3140     uint8_t * const right = src[3];
3141     uint8_t * const bottom= src[4];
3142 #define OBMC_FILTER(x, t, l, m, r, b)\
3143     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3144 #define OBMC_FILTER4(x, t, l, m, r, b)\
3145     OBMC_FILTER(x         , t, l, m, r, b);\
3146     OBMC_FILTER(x+1       , t, l, m, r, b);\
3147     OBMC_FILTER(x  +stride, t, l, m, r, b);\
3148     OBMC_FILTER(x+1+stride, t, l, m, r, b);
3149
3150     x=0;
3151     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3152     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3153     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3154     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3155     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3156     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3157     x+= stride;
3158     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3159     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3160     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3161     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3162     x+= stride;
3163     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3164     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3165     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3166     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3167     x+= 2*stride;
3168     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3169     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3170     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3171     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3172     x+= 2*stride;
3173     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3174     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3175     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3176     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3177     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3178     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3179     x+= stride;
3180     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3181     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3182     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3183     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3184 }
3185
3186 /* obmc for 1 8x8 luma block */
3187 static inline void obmc_motion(MpegEncContext *s,
3188                                uint8_t *dest, uint8_t *src,
3189                                int src_x, int src_y,
3190                                op_pixels_func *pix_op,
3191                                int16_t mv[5][2]/* mid top left right bottom*/)
3192 #define MID    0
3193 {
3194     int i;
3195     uint8_t *ptr[5];
3196
3197     assert(s->quarter_sample==0);
3198
3199     for(i=0; i<5; i++){
3200         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3201             ptr[i]= ptr[MID];
3202         }else{
3203             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3204             hpel_motion(s, ptr[i], src, 0, 0,
3205                         src_x, src_y,
3206                         s->width, s->height, s->linesize,
3207                         s->h_edge_pos, s->v_edge_pos,
3208                         8, 8, pix_op,
3209                         mv[i][0], mv[i][1]);
3210         }
3211     }
3212
3213     put_obmc(dest, ptr, s->linesize);
3214 }
3215
3216 static inline void qpel_motion(MpegEncContext *s,
3217                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3218                                int field_based, int bottom_field, int field_select,
3219                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3220                                qpel_mc_func (*qpix_op)[16],
3221                                int motion_x, int motion_y, int h)
3222 {
3223     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3224     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3225
3226     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3227     src_x = s->mb_x *  16                 + (motion_x >> 2);
3228     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3229
3230     v_edge_pos = s->v_edge_pos >> field_based;
3231     linesize = s->linesize << field_based;
3232     uvlinesize = s->uvlinesize << field_based;
3233
3234     if(field_based){
3235         mx= motion_x/2;
3236         my= motion_y>>1;
3237     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3238         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3239         mx= (motion_x>>1) + rtab[motion_x&7];
3240         my= (motion_y>>1) + rtab[motion_y&7];
3241     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3242         mx= (motion_x>>1)|(motion_x&1);
3243         my= (motion_y>>1)|(motion_y&1);
3244     }else{
3245         mx= motion_x/2;
3246         my= motion_y/2;
3247     }
3248     mx= (mx>>1)|(mx&1);
3249     my= (my>>1)|(my&1);
3250
3251     uvdxy= (mx&1) | ((my&1)<<1);
3252     mx>>=1;
3253     my>>=1;
3254
3255     uvsrc_x = s->mb_x *  8                 + mx;
3256     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3257
3258     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3259     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3260     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3261
3262     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3263        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3264         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3265                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3266         ptr_y= s->edge_emu_buffer;
3267         if(!(s->flags&CODEC_FLAG_GRAY)){
3268             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3269             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3270                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3271             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3272                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3273             ptr_cb= uvbuf;
3274             ptr_cr= uvbuf + 16;
3275         }
3276     }
3277
3278     if(!field_based)
3279         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3280     else{
3281         if(bottom_field){
3282             dest_y += s->linesize;
3283             dest_cb+= s->uvlinesize;
3284             dest_cr+= s->uvlinesize;
3285         }
3286
3287         if(field_select){
3288             ptr_y  += s->linesize;
3289             ptr_cb += s->uvlinesize;
3290             ptr_cr += s->uvlinesize;
3291         }
3292         //damn interlaced mode
3293         //FIXME boundary mirroring is not exactly correct here
3294         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3295         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3296     }
3297     if(!(s->flags&CODEC_FLAG_GRAY)){
3298         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3299         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3300     }
3301 }
3302
3303 inline int ff_h263_round_chroma(int x){
3304     if (x >= 0)
3305         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3306     else {
3307         x = -x;
3308         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3309     }
3310 }
3311
3312 /**
3313  * h263 chorma 4mv motion compensation.
3314  */
3315 static inline void chroma_4mv_motion(MpegEncContext *s,
3316                                      uint8_t *dest_cb, uint8_t *dest_cr,
3317                                      uint8_t **ref_picture,
3318                                      op_pixels_func *pix_op,
3319                                      int mx, int my){
3320     int dxy, emu=0, src_x, src_y, offset;
3321     uint8_t *ptr;
3322
3323     /* In case of 8X8, we construct a single chroma motion vector
3324        with a special rounding */
3325     mx= ff_h263_round_chroma(mx);
3326     my= ff_h263_round_chroma(my);
3327
3328     dxy = ((my & 1) << 1) | (mx & 1);
3329     mx >>= 1;
3330     my >>= 1;
3331
3332     src_x = s->mb_x * 8 + mx;
3333     src_y = s->mb_y * 8 + my;
3334     src_x = clip(src_x, -8, s->width/2);
3335     if (src_x == s->width/2)
3336         dxy &= ~1;
3337     src_y = clip(src_y, -8, s->height/2);
3338     if (src_y == s->height/2)
3339         dxy &= ~2;
3340
3341     offset = (src_y * (s->uvlinesize)) + src_x;
3342     ptr = ref_picture[1] + offset;
3343     if(s->flags&CODEC_FLAG_EMU_EDGE){
3344         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3345            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3346             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3347             ptr= s->edge_emu_buffer;
3348             emu=1;
3349         }
3350     }
3351     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3352
3353     ptr = ref_picture[2] + offset;
3354     if(emu){
3355         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3356         ptr= s->edge_emu_buffer;
3357     }
3358     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3359 }
3360
3361 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3362                                      uint8_t *dest_cb, uint8_t *dest_cr,
3363                                      uint8_t **ref_picture,
3364                                      h264_chroma_mc_func *pix_op,
3365                                      int mx, int my){
3366     const int lowres= s->avctx->lowres;
3367     const int block_s= 8>>lowres;
3368     const int s_mask= (2<<lowres)-1;
3369     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3370     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3371     int emu=0, src_x, src_y, offset, sx, sy;
3372     uint8_t *ptr;
3373
3374     if(s->quarter_sample){
3375         mx/=2;
3376         my/=2;
3377     }
3378
3379     /* In case of 8X8, we construct a single chroma motion vector
3380        with a special rounding */
3381     mx= ff_h263_round_chroma(mx);
3382     my= ff_h263_round_chroma(my);
3383
3384     sx= mx & s_mask;
3385     sy= my & s_mask;
3386     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3387     src_y = s->mb_y*block_s + (my >> (lowres+1));
3388
3389     offset = src_y * s->uvlinesize + src_x;
3390     ptr = ref_picture[1] + offset;
3391     if(s->flags&CODEC_FLAG_EMU_EDGE){
3392         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3393            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3394             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3395             ptr= s->edge_emu_buffer;
3396             emu=1;
3397         }
3398     }
3399     sx <<= 2 - lowres;
3400     sy <<= 2 - lowres;
3401     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3402
3403     ptr = ref_picture[2] + offset;
3404     if(emu){
3405         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3406         ptr= s->edge_emu_buffer;
3407     }
3408     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3409 }
3410
3411 static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
3412     /* fetch pixels for estimated mv 4 macroblocks ahead
3413      * optimized for 64byte cache lines */
3414     const int shift = s->quarter_sample ? 2 : 1;
3415     const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8;
3416     const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y;
3417     int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
3418     s->dsp.prefetch(pix[0]+off, s->linesize, 4);
3419     off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3420     s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2);
3421 }
3422
3423 /**
3424  * motion compensation of a single macroblock
3425  * @param s context
3426  * @param dest_y luma destination pointer
3427  * @param dest_cb chroma cb/u destination pointer
3428  * @param dest_cr chroma cr/v destination pointer
3429  * @param dir direction (0->forward, 1->backward)
3430  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3431  * @param pic_op halfpel motion compensation function (average or put normally)
3432  * @param pic_op qpel motion compensation function (average or put normally)
3433  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3434  */
3435 static inline void MPV_motion(MpegEncContext *s,
3436                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3437                               int dir, uint8_t **ref_picture,
3438                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3439 {
3440     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3441     int mb_x, mb_y, i;
3442     uint8_t *ptr, *dest;
3443
3444     mb_x = s->mb_x;
3445     mb_y = s->mb_y;
3446
3447     prefetch_motion(s, ref_picture, dir);
3448
3449     if(s->obmc && s->pict_type != B_TYPE){
3450         int16_t mv_cache[4][4][2];
3451         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3452         const int mot_stride= s->b8_stride;
3453         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3454
3455         assert(!s->mb_skipped);
3456
3457         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3458         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3459         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3460
3461         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3462             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3463         }else{
3464             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3465         }
3466
3467         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3468             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3469             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3470         }else{
3471             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3472             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3473         }
3474
3475         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3476             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3477             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3478         }else{
3479             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3480             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3481         }
3482
3483         mx = 0;
3484         my = 0;
3485         for(i=0;i<4;i++) {
3486             const int x= (i&1)+1;
3487             const int y= (i>>1)+1;
3488             int16_t mv[5][2]= {
3489                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3490                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3491                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3492                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3493                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3494             //FIXME cleanup
3495             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3496                         ref_picture[0],
3497                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3498                         pix_op[1],
3499                         mv);
3500
3501             mx += mv[0][0];
3502             my += mv[0][1];
3503         }
3504         if(!(s->flags&CODEC_FLAG_GRAY))
3505             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3506
3507         return;
3508     }
3509
3510     switch(s->mv_type) {
3511     case MV_TYPE_16X16:
3512         if(s->mcsel){
3513             if(s->real_sprite_warping_points==1){
3514                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3515                             ref_picture);
3516             }else{
3517                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3518                             ref_picture);
3519             }
3520         }else if(s->quarter_sample){
3521             qpel_motion(s, dest_y, dest_cb, dest_cr,
3522                         0, 0, 0,
3523                         ref_picture, pix_op, qpix_op,
3524                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3525         }else if(s->mspel){
3526             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3527                         ref_picture, pix_op,
3528                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3529         }else
3530         {
3531             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3532                         0, 0, 0,
3533                         ref_picture, pix_op,
3534                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3535         }
3536         break;
3537     case MV_TYPE_8X8:
3538         mx = 0;
3539         my = 0;
3540         if(s->quarter_sample){
3541             for(i=0;i<4;i++) {
3542                 motion_x = s->mv[dir][i][0];
3543                 motion_y = s->mv[dir][i][1];
3544
3545                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3546                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3547                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3548
3549                 /* WARNING: do no forget half pels */
3550                 src_x = clip(src_x, -16, s->width);
3551                 if (src_x == s->width)
3552                     dxy &= ~3;
3553                 src_y = clip(src_y, -16, s->height);
3554                 if (src_y == s->height)
3555                     dxy &= ~12;
3556
3557                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3558                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3559                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3560                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3561                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3562                         ptr= s->edge_emu_buffer;
3563                     }
3564                 }
3565                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3566                 qpix_op[1][dxy](dest, ptr, s->linesize);
3567
3568                 mx += s->mv[dir][i][0]/2;
3569                 my += s->mv[dir][i][1]/2;
3570             }
3571         }else{
3572             for(i=0;i<4;i++) {
3573                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3574                             ref_picture[0], 0, 0,
3575                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3576                             s->width, s->height, s->linesize,
3577                             s->h_edge_pos, s->v_edge_pos,
3578                             8, 8, pix_op[1],
3579                             s->mv[dir][i][0], s->mv[dir][i][1]);
3580
3581                 mx += s->mv[dir][i][0];
3582                 my += s->mv[dir][i][1];
3583             }
3584         }
3585
3586         if(!(s->flags&CODEC_FLAG_GRAY))
3587             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3588         break;
3589     case MV_TYPE_FIELD:
3590         if (s->picture_structure == PICT_FRAME) {
3591             if(s->quarter_sample){
3592                 for(i=0; i<2; i++){
3593                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3594                                 1, i, s->field_select[dir][i],
3595                                 ref_picture, pix_op, qpix_op,
3596                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3597                 }
3598             }else{
3599                 /* top field */
3600                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3601                             1, 0, s->field_select[dir][0],
3602                             ref_picture, pix_op,
3603                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3604                 /* bottom field */
3605                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3606                             1, 1, s->field_select[dir][1],
3607                             ref_picture, pix_op,
3608                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3609             }
3610         } else {
3611             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3612                 ref_picture= s->current_picture_ptr->data;
3613             }
3614
3615             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3616                         0, 0, s->field_select[dir][0],
3617                         ref_picture, pix_op,
3618                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3619         }
3620         break;
3621     case MV_TYPE_16X8:
3622         for(i=0; i<2; i++){
3623             uint8_t ** ref2picture;
3624
3625             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3626                 ref2picture= ref_picture;
3627             }else{
3628                 ref2picture= s->current_picture_ptr->data;
3629             }
3630
3631             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3632                         0, 0, s->field_select[dir][i],
3633                         ref2picture, pix_op,
3634                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3635
3636             dest_y += 16*s->linesize;
3637             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3638             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3639         }
3640         break;
3641     case MV_TYPE_DMV:
3642         if(s->picture_structure == PICT_FRAME){
3643             for(i=0; i<2; i++){
3644                 int j;
3645                 for(j=0; j<2; j++){
3646                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3647                                 1, j, j^i,
3648                                 ref_picture, pix_op,
3649                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3650                 }
3651                 pix_op = s->dsp.avg_pixels_tab;
3652             }
3653         }else{
3654             for(i=0; i<2; i++){
3655                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3656                             0, 0, s->picture_structure != i+1,
3657                             ref_picture, pix_op,
3658                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3659
3660                 // after put we make avg of the same block
3661                 pix_op=s->dsp.avg_pixels_tab;
3662
3663                 //opposite parity is always in the same frame if this is second field
3664                 if(!s->first_field){
3665                     ref_picture = s->current_picture_ptr->data;
3666                 }
3667             }
3668         }
3669     break;
3670     default: assert(0);
3671     }
3672 }
3673
3674 /**
3675  * motion compensation of a single macroblock
3676  * @param s context
3677  * @param dest_y luma destination pointer
3678  * @param dest_cb chroma cb/u destination pointer
3679  * @param dest_cr chroma cr/v destination pointer
3680  * @param dir direction (0->forward, 1->backward)
3681  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3682  * @param pic_op halfpel motion compensation function (average or put normally)
3683  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3684  */
3685 static inline void MPV_motion_lowres(MpegEncContext *s,
3686                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3687                               int dir, uint8_t **ref_picture,
3688                               h264_chroma_mc_func *pix_op)
3689 {
3690     int mx, my;
3691     int mb_x, mb_y, i;
3692     const int lowres= s->avctx->lowres;
3693     const int block_s= 8>>lowres;
3694
3695     mb_x = s->mb_x;
3696     mb_y = s->mb_y;
3697
3698     switch(s->mv_type) {
3699     case MV_TYPE_16X16:
3700         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3701                     0, 0, 0,
3702                     ref_picture, pix_op,
3703                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3704         break;
3705     case MV_TYPE_8X8:
3706         mx = 0;
3707         my = 0;
3708             for(i=0;i<4;i++) {
3709                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3710                             ref_picture[0], 0, 0,
3711                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3712                             s->width, s->height, s->linesize,
3713                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3714                             block_s, block_s, pix_op,
3715                             s->mv[dir][i][0], s->mv[dir][i][1]);
3716
3717                 mx += s->mv[dir][i][0];
3718                 my += s->mv[dir][i][1];
3719             }
3720
3721         if(!(s->flags&CODEC_FLAG_GRAY))
3722             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3723         break;
3724     case MV_TYPE_FIELD:
3725         if (s->picture_structure == PICT_FRAME) {
3726             /* top field */
3727             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3728                         1, 0, s->field_select[dir][0],
3729                         ref_picture, pix_op,
3730                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3731             /* bottom field */
3732             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3733                         1, 1, s->field_select[dir][1],
3734                         ref_picture, pix_op,
3735                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3736         } else {
3737             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3738                 ref_picture= s->current_picture_ptr->data;
3739             }
3740
3741             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3742                         0, 0, s->field_select[dir][0],
3743                         ref_picture, pix_op,
3744                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3745         }
3746         break;
3747     case MV_TYPE_16X8:
3748         for(i=0; i<2; i++){
3749             uint8_t ** ref2picture;
3750
3751             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3752                 ref2picture= ref_picture;
3753             }else{
3754                 ref2picture= s->current_picture_ptr->data;
3755             }
3756
3757             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3758                         0, 0, s->field_select[dir][i],
3759                         ref2picture, pix_op,
3760                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3761
3762             dest_y += 2*block_s*s->linesize;
3763             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3764             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3765         }
3766         break;
3767     case MV_TYPE_DMV:
3768         if(s->picture_structure == PICT_FRAME){
3769             for(i=0; i<2; i++){
3770                 int j;
3771                 for(j=0; j<2; j++){
3772                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3773                                 1, j, j^i,
3774                                 ref_picture, pix_op,
3775                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3776                 }
3777                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3778             }
3779         }else{
3780             for(i=0; i<2; i++){
3781                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3782                             0, 0, s->picture_structure != i+1,
3783                             ref_picture, pix_op,
3784                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3785
3786                 // after put we make avg of the same block
3787                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3788
3789                 //opposite parity is always in the same frame if this is second field
3790                 if(!s->first_field){
3791                     ref_picture = s->current_picture_ptr->data;
3792                 }
3793             }
3794         }
3795     break;
3796     default: assert(0);
3797     }
3798 }
3799
3800 /* put block[] to dest[] */
3801 static inline void put_dct(MpegEncContext *s,
3802                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3803 {
3804     s->dct_unquantize_intra(s, block, i, qscale);
3805     s->dsp.idct_put (dest, line_size, block);
3806 }
3807
3808 /* add block[] to dest[] */
3809 static inline void add_dct(MpegEncContext *s,
3810                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3811 {
3812     if (s->block_last_index[i] >= 0) {
3813         s->dsp.idct_add (dest, line_size, block);
3814     }
3815 }
3816
3817 static inline void add_dequant_dct(MpegEncContext *s,
3818                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3819 {
3820     if (s->block_last_index[i] >= 0) {
3821         s->dct_unquantize_inter(s, block, i, qscale);
3822
3823         s->dsp.idct_add (dest, line_size, block);
3824     }
3825 }
3826
3827 /**
3828  * cleans dc, ac, coded_block for the current non intra MB
3829  */
3830 void ff_clean_intra_table_entries(MpegEncContext *s)
3831 {
3832     int wrap = s->b8_stride;
3833     int xy = s->block_index[0];
3834
3835     s->dc_val[0][xy           ] =
3836     s->dc_val[0][xy + 1       ] =
3837     s->dc_val[0][xy     + wrap] =
3838     s->dc_val[0][xy + 1 + wrap] = 1024;
3839     /* ac pred */
3840     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3841     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3842     if (s->msmpeg4_version>=3) {
3843         s->coded_block[xy           ] =
3844         s->coded_block[xy + 1       ] =
3845         s->coded_block[xy     + wrap] =
3846         s->coded_block[xy + 1 + wrap] = 0;
3847     }
3848     /* chroma */
3849     wrap = s->mb_stride;
3850     xy = s->mb_x + s->mb_y * wrap;
3851     s->dc_val[1][xy] =
3852     s->dc_val[2][xy] = 1024;
3853     /* ac pred */
3854     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3855     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3856
3857     s->mbintra_table[xy]= 0;
3858 }
3859
3860 /* generic function called after a macroblock has been parsed by the
3861    decoder or after it has been encoded by the encoder.
3862
3863    Important variables used:
3864    s->mb_intra : true if intra macroblock
3865    s->mv_dir   : motion vector direction
3866    s->mv_type  : motion vector type
3867    s->mv       : motion vector
3868    s->interlaced_dct : true if interlaced dct used (mpeg2)
3869  */
3870 static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3871 {
3872     int mb_x, mb_y;
3873     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3874 #ifdef HAVE_XVMC
3875     if(s->avctx->xvmc_acceleration){
3876         XVMC_decode_mb(s);//xvmc uses pblocks
3877         return;
3878     }
3879 #endif
3880
3881     mb_x = s->mb_x;
3882     mb_y = s->mb_y;
3883
3884     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3885        /* save DCT coefficients */
3886        int i,j;
3887        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3888        for(i=0; i<6; i++)
3889            for(j=0; j<64; j++)
3890                *dct++ = block[i][s->dsp.idct_permutation[j]];
3891     }
3892
3893     s->current_picture.qscale_table[mb_xy]= s->qscale;
3894
3895     /* update DC predictors for P macroblocks */
3896     if (!s->mb_intra) {
3897         if (s->h263_pred || s->h263_aic) {
3898             if(s->mbintra_table[mb_xy])
3899                 ff_clean_intra_table_entries(s);
3900         } else {
3901             s->last_dc[0] =
3902             s->last_dc[1] =
3903             s->last_dc[2] = 128 << s->intra_dc_precision;
3904         }
3905     }
3906     else if (s->h263_pred || s->h263_aic)
3907         s->mbintra_table[mb_xy]=1;
3908
3909     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3910         uint8_t *dest_y, *dest_cb, *dest_cr;
3911         int dct_linesize, dct_offset;
3912         op_pixels_func (*op_pix)[4];
3913         qpel_mc_func (*op_qpix)[16];
3914         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3915         const int uvlinesize= s->current_picture.linesize[1];
3916         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3917         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3918
3919         /* avoid copy if macroblock skipped in last frame too */
3920         /* skip only during decoding as we might trash the buffers during encoding a bit */
3921         if(!s->encoding){
3922             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3923             const int age= s->current_picture.age;
3924
3925             assert(age);
3926
3927             if (s->mb_skipped) {
3928                 s->mb_skipped= 0;
3929                 assert(s->pict_type!=I_TYPE);
3930
3931                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3932                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3933
3934                 /* if previous was skipped too, then nothing to do !  */
3935                 if (*mbskip_ptr >= age && s->current_picture.reference){
3936                     return;
3937                 }
3938             } else if(!s->current_picture.reference){
3939                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3940                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3941             } else{
3942                 *mbskip_ptr = 0; /* not skipped */
3943             }
3944         }
3945
3946         dct_linesize = linesize << s->interlaced_dct;
3947         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3948
3949         if(readable){
3950             dest_y=  s->dest[0];
3951             dest_cb= s->dest[1];
3952             dest_cr= s->dest[2];
3953         }else{
3954             dest_y = s->b_scratchpad;
3955             dest_cb= s->b_scratchpad+16*linesize;
3956             dest_cr= s->b_scratchpad+32*linesize;
3957         }
3958
3959         if (!s->mb_intra) {
3960             /* motion handling */
3961             /* decoding or more than one mb_type (MC was already done otherwise) */
3962             if(!s->encoding){
3963                 if(lowres_flag){
3964                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3965
3966                     if (s->mv_dir & MV_DIR_FORWARD) {
3967                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3968                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3969                     }
3970                     if (s->mv_dir & MV_DIR_BACKWARD) {
3971                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3972                     }
3973                 }else{
3974                     op_qpix= s->me.qpel_put;
3975                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
3976                         op_pix = s->dsp.put_pixels_tab;
3977                     }else{
3978                         op_pix = s->dsp.put_no_rnd_pixels_tab;
3979                     }
3980                     if (s->mv_dir & MV_DIR_FORWARD) {
3981                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3982                         op_pix = s->dsp.avg_pixels_tab;
3983                         op_qpix= s->me.qpel_avg;
3984                     }
3985                     if (s->mv_dir & MV_DIR_BACKWARD) {
3986                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3987                     }
3988                 }
3989             }
3990
3991             /* skip dequant / idct if we are really late ;) */
3992             if(s->hurry_up>1) goto skip_idct;
3993             if(s->avctx->skip_idct){
3994                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
3995                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
3996                    || s->avctx->skip_idct >= AVDISCARD_ALL)
3997                     goto skip_idct;
3998             }
3999
4000             /* add dct residue */
4001             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
4002                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
4003                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4004                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4005                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4006                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4007
4008                 if(!(s->flags&CODEC_FLAG_GRAY)){
4009                     if (s->chroma_y_shift){
4010                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4011                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4012                     }else{
4013                         dct_linesize >>= 1;
4014                         dct_offset >>=1;
4015                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4016                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4017                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4018                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4019                     }
4020                 }
4021             } else if(s->codec_id != CODEC_ID_WMV2){
4022                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
4023                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
4024                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
4025                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
4026
4027                 if(!(s->flags&CODEC_FLAG_GRAY)){
4028                     if(s->chroma_y_shift){//Chroma420
4029                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
4030                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
4031                     }else{
4032                         //chroma422
4033                         dct_linesize = uvlinesize << s->interlaced_dct;
4034                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4035
4036                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
4037                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
4038                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
4039                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
4040                         if(!s->chroma_x_shift){//Chroma444
4041                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
4042                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
4043                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
4044                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
4045                         }
4046                     }
4047                 }//fi gray
4048             }
4049             else{
4050                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
4051             }
4052         } else {
4053             /* dct only in intra block */
4054             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
4055                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4056                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4057                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4058                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4059
4060                 if(!(s->flags&CODEC_FLAG_GRAY)){
4061                     if(s->chroma_y_shift){
4062                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4063                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4064                     }else{
4065                         dct_offset >>=1;
4066                         dct_linesize >>=1;
4067                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4068                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4069                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4070                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4071                     }
4072                 }
4073             }else{
4074                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
4075                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
4076                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
4077                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
4078
4079                 if(!(s->flags&CODEC_FLAG_GRAY)){
4080                     if(s->chroma_y_shift){
4081                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
4082                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
4083                     }else{
4084
4085                         dct_linesize = uvlinesize << s->interlaced_dct;
4086                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4087
4088                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
4089                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
4090                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
4091                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
4092                         if(!s->chroma_x_shift){//Chroma444
4093                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
4094                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
4095                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
4096                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
4097                         }
4098                     }
4099                 }//gray
4100             }
4101         }
4102 skip_idct:
4103         if(!readable){
4104             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
4105             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
4106             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
4107         }
4108     }
4109 }
4110
4111 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
4112     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
4113     else                  MPV_decode_mb_internal(s, block, 0);
4114 }
4115
4116 #ifdef CONFIG_ENCODERS
4117
4118 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
4119 {
4120     static const char tab[64]=
4121         {3,2,2,1,1,1,1,1,
4122          1,1,1,1,1,1,1,1,
4123          1,1,1,1,1,1,1,1,
4124          0,0,0,0,0,0,0,0,
4125          0,0,0,0,0,0,0,0,
4126          0,0,0,0,0,0,0,0,
4127          0,0,0,0,0,0,0,0,
4128          0,0,0,0,0,0,0,0};
4129     int score=0;
4130     int run=0;
4131     int i;
4132     DCTELEM *block= s->block[n];
4133     const int last_index= s->block_last_index[n];
4134     int skip_dc;
4135
4136     if(threshold<0){
4137         skip_dc=0;
4138         threshold= -threshold;
4139     }else
4140         skip_dc=1;
4141
4142     /* are all which we could set to zero are allready zero? */
4143     if(last_index<=skip_dc - 1) return;
4144
4145     for(i=0; i<=last_index; i++){
4146         const int j = s->intra_scantable.permutated[i];
4147         const int level = ABS(block[j]);
4148         if(level==1){
4149             if(skip_dc && i==0) continue;
4150             score+= tab[run];
4151             run=0;
4152         }else if(level>1){
4153             return;
4154         }else{
4155             run++;
4156         }
4157     }
4158     if(score >= threshold) return;
4159     for(i=skip_dc; i<=last_index; i++){
4160         const int j = s->intra_scantable.permutated[i];
4161         block[j]=0;
4162     }
4163     if(block[0]) s->block_last_index[n]= 0;
4164     else         s->block_last_index[n]= -1;
4165 }
4166
4167 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4168 {
4169     int i;
4170     const int maxlevel= s->max_qcoeff;
4171     const int minlevel= s->min_qcoeff;
4172     int overflow=0;
4173
4174     if(s->mb_intra){
4175         i=1; //skip clipping of intra dc
4176     }else
4177         i=0;
4178
4179     for(;i<=last_index; i++){
4180         const int j= s->intra_scantable.permutated[i];
4181         int level = block[j];
4182
4183         if     (level>maxlevel){
4184             level=maxlevel;
4185             overflow++;
4186         }else if(level<minlevel){
4187             level=minlevel;
4188             overflow++;
4189         }
4190
4191         block[j]= level;
4192     }
4193
4194     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4195         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4196 }
4197
4198 #endif //CONFIG_ENCODERS
4199
4200 /**
4201  *
4202  * @param h is the normal height, this will be reduced automatically if needed for the last row
4203  */
4204 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4205     if (s->avctx->draw_horiz_band) {
4206         AVFrame *src;
4207         int offset[4];
4208
4209         if(s->picture_structure != PICT_FRAME){
4210             h <<= 1;
4211             y <<= 1;
4212             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4213         }
4214
4215         h= FFMIN(h, s->avctx->height - y);
4216
4217         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4218             src= (AVFrame*)s->current_picture_ptr;
4219         else if(s->last_picture_ptr)
4220             src= (AVFrame*)s->last_picture_ptr;
4221         else
4222             return;
4223
4224         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4225             offset[0]=
4226             offset[1]=
4227             offset[2]=
4228             offset[3]= 0;
4229         }else{
4230             offset[0]= y * s->linesize;;
4231             offset[1]=
4232             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4233             offset[3]= 0;
4234         }
4235
4236         emms_c();
4237
4238         s->avctx->draw_horiz_band(s->avctx, src, offset,
4239                                   y, s->picture_structure, h);
4240     }
4241 }
4242
4243 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4244     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4245     const int uvlinesize= s->current_picture.linesize[1];
4246     const int mb_size= 4 - s->avctx->lowres;
4247
4248     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4249     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4250     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4251     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4252     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4253     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4254     //block_index is not used by mpeg2, so it is not affected by chroma_format
4255
4256     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4257     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4258     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4259
4260     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4261     {
4262         s->dest[0] += s->mb_y *   linesize << mb_size;
4263         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4264         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4265     }
4266 }
4267
4268 #ifdef CONFIG_ENCODERS
4269
4270 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4271     int x, y;
4272 //FIXME optimize
4273     for(y=0; y<8; y++){
4274         for(x=0; x<8; x++){
4275             int x2, y2;
4276             int sum=0;
4277             int sqr=0;
4278             int count=0;
4279
4280             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4281                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4282                     int v= ptr[x2 + y2*stride];
4283                     sum += v;
4284                     sqr += v*v;
4285                     count++;
4286                 }
4287             }
4288             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4289         }
4290     }
4291 }
4292
4293 static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
4294 {
4295     int16_t weight[8][64];
4296     DCTELEM orig[8][64];
4297     const int mb_x= s->mb_x;
4298     const int mb_y= s->mb_y;
4299     int i;
4300     int skip_dct[8];
4301     int dct_offset   = s->linesize*8; //default for progressive frames
4302     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4303     int wrap_y, wrap_c;
4304
4305     for(i=0; i<mb_block_count; i++) skip_dct[i]=0;
4306
4307     if(s->adaptive_quant){
4308         const int last_qp= s->qscale;
4309         const int mb_xy= mb_x + mb_y*s->mb_stride;
4310
4311         s->lambda= s->lambda_table[mb_xy];
4312         update_qscale(s);
4313
4314         if(!(s->flags&CODEC_FLAG_QP_RD)){
4315             s->dquant= s->qscale - last_qp;
4316
4317             if(s->out_format==FMT_H263){
4318                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4319
4320                 if(s->codec_id==CODEC_ID_MPEG4){
4321                     if(!s->mb_intra){
4322                         if(s->pict_type == B_TYPE){
4323                             if(s->dquant&1)
4324                                 s->dquant= (s->dquant/2)*2;
4325                             if(s->mv_dir&MV_DIRECT)
4326                                 s->dquant= 0;
4327                         }
4328                         if(s->mv_type==MV_TYPE_8X8)
4329                             s->dquant=0;
4330                     }
4331                 }
4332             }
4333         }
4334         ff_set_qscale(s, last_qp + s->dquant);
4335     }else if(s->flags&CODEC_FLAG_QP_RD)
4336         ff_set_qscale(s, s->qscale + s->dquant);
4337
4338     wrap_y = s->linesize;
4339     wrap_c = s->uvlinesize;
4340     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4341     ptr_cb = s->new_picture.data[1] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4342     ptr_cr = s->new_picture.data[2] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4343
4344     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4345         uint8_t *ebuf= s->edge_emu_buffer + 32;
4346         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4347         ptr_y= ebuf;
4348         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4349         ptr_cb= ebuf+18*wrap_y;
4350         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4351         ptr_cr= ebuf+18*wrap_y+8;
4352     }
4353
4354     if (s->mb_intra) {
4355         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4356             int progressive_score, interlaced_score;
4357
4358             s->interlaced_dct=0;
4359             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4360                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4361
4362             if(progressive_score > 0){
4363                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4364                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4365                 if(progressive_score > interlaced_score){
4366                     s->interlaced_dct=1;
4367
4368                     dct_offset= wrap_y;
4369                     wrap_y<<=1;
4370                     if (s->chroma_format == CHROMA_422)
4371                         wrap_c<<=1;
4372                 }
4373             }
4374         }
4375
4376         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4377         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4378         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4379         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4380
4381         if(s->flags&CODEC_FLAG_GRAY){
4382             skip_dct[4]= 1;
4383             skip_dct[5]= 1;
4384         }else{
4385             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4386             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4387             if(!s->chroma_y_shift){ /* 422 */
4388                 s->dsp.get_pixels(s->block[6], ptr_cb + (dct_offset>>1), wrap_c);
4389                 s->dsp.get_pixels(s->block[7], ptr_cr + (dct_offset>>1), wrap_c);
4390             }
4391         }
4392     }else{
4393         op_pixels_func (*op_pix)[4];
4394         qpel_mc_func (*op_qpix)[16];
4395         uint8_t *dest_y, *dest_cb, *dest_cr;
4396
4397         dest_y  = s->dest[0];
4398         dest_cb = s->dest[1];
4399         dest_cr = s->dest[2];
4400
4401         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4402             op_pix = s->dsp.put_pixels_tab;
4403             op_qpix= s->dsp.put_qpel_pixels_tab;
4404         }else{
4405             op_pix = s->dsp.put_no_rnd_pixels_tab;
4406             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4407         }
4408
4409         if (s->mv_dir & MV_DIR_FORWARD) {
4410             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4411             op_pix = s->dsp.avg_pixels_tab;
4412             op_qpix= s->dsp.avg_qpel_pixels_tab;
4413         }
4414         if (s->mv_dir & MV_DIR_BACKWARD) {
4415             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4416         }
4417
4418         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4419             int progressive_score, interlaced_score;
4420
4421             s->interlaced_dct=0;
4422             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4423                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4424
4425             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4426
4427             if(progressive_score>0){
4428                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4429                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4430
4431                 if(progressive_score > interlaced_score){
4432                     s->interlaced_dct=1;
4433
4434                     dct_offset= wrap_y;
4435                     wrap_y<<=1;
4436                     if (s->chroma_format == CHROMA_422)
4437                         wrap_c<<=1;
4438                 }
4439             }
4440         }
4441
4442         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4443         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4444         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4445         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4446
4447         if(s->flags&CODEC_FLAG_GRAY){
4448             skip_dct[4]= 1;
4449             skip_dct[5]= 1;
4450         }else{
4451             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4452             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4453             if(!s->chroma_y_shift){ /* 422 */
4454                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset>>1), dest_cb + (dct_offset>>1), wrap_c);
4455                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset>>1), dest_cr + (dct_offset>>1), wrap_c);
4456             }
4457         }
4458         /* pre quantization */
4459         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4460             //FIXME optimize
4461             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4462             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4463             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4464             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4465             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4466             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4467             if(!s->chroma_y_shift){ /* 422 */
4468                 if(s->dsp.sad[1](NULL, ptr_cb +(dct_offset>>1), dest_cb +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[6]= 1;
4469                 if(s->dsp.sad[1](NULL, ptr_cr +(dct_offset>>1), dest_cr +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[7]= 1;
4470             }
4471         }
4472     }
4473
4474     if(s->avctx->quantizer_noise_shaping){
4475         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4476         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4477         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4478         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4479         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4480         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4481         if(!s->chroma_y_shift){ /* 422 */
4482             if(!skip_dct[6]) get_vissual_weight(weight[6], ptr_cb + (dct_offset>>1), wrap_c);
4483             if(!skip_dct[7]) get_vissual_weight(weight[7], ptr_cr + (dct_offset>>1), wrap_c);
4484         }
4485         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*mb_block_count);
4486     }
4487
4488     /* DCT & quantize */
4489     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4490     {
4491         for(i=0;i<mb_block_count;i++) {
4492             if(!skip_dct[i]){
4493                 int overflow;
4494                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4495             // FIXME we could decide to change to quantizer instead of clipping
4496             // JS: I don't think that would be a good idea it could lower quality instead
4497             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4498                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4499             }else
4500                 s->block_last_index[i]= -1;
4501         }
4502         if(s->avctx->quantizer_noise_shaping){
4503             for(i=0;i<mb_block_count;i++) {
4504                 if(!skip_dct[i]){
4505                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4506                 }
4507             }
4508         }
4509
4510         if(s->luma_elim_threshold && !s->mb_intra)
4511             for(i=0; i<4; i++)
4512                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4513         if(s->chroma_elim_threshold && !s->mb_intra)
4514             for(i=4; i<mb_block_count; i++)
4515                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4516
4517         if(s->flags & CODEC_FLAG_CBP_RD){
4518             for(i=0;i<mb_block_count;i++) {
4519                 if(s->block_last_index[i] == -1)
4520                     s->coded_score[i]= INT_MAX/256;
4521             }
4522         }
4523     }
4524
4525     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4526         s->block_last_index[4]=
4527         s->block_last_index[5]= 0;
4528         s->block[4][0]=
4529         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4530     }
4531
4532     //non c quantize code returns incorrect block_last_index FIXME
4533     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4534         for(i=0; i<mb_block_count; i++){
4535             int j;
4536             if(s->block_last_index[i]>0){
4537                 for(j=63; j>0; j--){
4538                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4539                 }
4540                 s->block_last_index[i]= j;
4541             }
4542         }
4543     }
4544
4545     /* huffman encode */
4546     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4547     case CODEC_ID_MPEG1VIDEO:
4548     case CODEC_ID_MPEG2VIDEO:
4549         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4550     case CODEC_ID_MPEG4:
4551         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4552     case CODEC_ID_MSMPEG4V2:
4553     case CODEC_ID_MSMPEG4V3:
4554     case CODEC_ID_WMV1:
4555         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4556     case CODEC_ID_WMV2:
4557          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4558 #ifdef CONFIG_H261_ENCODER
4559     case CODEC_ID_H261:
4560         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4561 #endif
4562     case CODEC_ID_H263:
4563     case CODEC_ID_H263P:
4564     case CODEC_ID_FLV1:
4565     case CODEC_ID_RV10:
4566     case CODEC_ID_RV20:
4567         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4568     case CODEC_ID_MJPEG:
4569         mjpeg_encode_mb(s, s->block); break;
4570     default:
4571         assert(0);
4572     }
4573 }
4574
4575 static always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4576 {
4577     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
4578     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
4579 }
4580
4581 #endif //CONFIG_ENCODERS
4582
4583 void ff_mpeg_flush(AVCodecContext *avctx){
4584     int i;
4585     MpegEncContext *s = avctx->priv_data;
4586
4587     if(s==NULL || s->picture==NULL)
4588         return;
4589
4590     for(i=0; i<MAX_PICTURE_COUNT; i++){
4591        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4592                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4593         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4594     }
4595     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4596
4597     s->mb_x= s->mb_y= 0;
4598
4599     s->parse_context.state= -1;
4600     s->parse_context.frame_start_found= 0;
4601     s->parse_context.overread= 0;
4602     s->parse_context.overread_index= 0;
4603     s->parse_context.index= 0;
4604     s->parse_context.last_index= 0;
4605     s->bitstream_buffer_size=0;
4606 }
4607
4608 #ifdef CONFIG_ENCODERS
4609 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4610 {
4611     const uint16_t *srcw= (uint16_t*)src;
4612     int words= length>>4;
4613     int bits= length&15;
4614     int i;
4615
4616     if(length==0) return;
4617
4618     if(words < 16){
4619         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4620     }else if(put_bits_count(pb)&7){
4621         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4622     }else{
4623         for(i=0; put_bits_count(pb)&31; i++)
4624             put_bits(pb, 8, src[i]);
4625         flush_put_bits(pb);
4626         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4627         skip_put_bytes(pb, 2*words-i);
4628     }
4629
4630     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4631 }
4632
4633 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4634     int i;
4635
4636     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4637
4638     /* mpeg1 */
4639     d->mb_skip_run= s->mb_skip_run;
4640     for(i=0; i<3; i++)
4641         d->last_dc[i]= s->last_dc[i];
4642
4643     /* statistics */
4644     d->mv_bits= s->mv_bits;
4645     d->i_tex_bits= s->i_tex_bits;
4646     d->p_tex_bits= s->p_tex_bits;
4647     d->i_count= s->i_count;
4648     d->f_count= s->f_count;
4649     d->b_count= s->b_count;
4650     d->skip_count= s->skip_count;
4651     d->misc_bits= s->misc_bits;
4652     d->last_bits= 0;
4653
4654     d->mb_skipped= 0;
4655     d->qscale= s->qscale;
4656     d->dquant= s->dquant;
4657 }
4658
4659 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4660     int i;
4661
4662     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4663     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4664
4665     /* mpeg1 */
4666     d->mb_skip_run= s->mb_skip_run;
4667     for(i=0; i<3; i++)
4668         d->last_dc[i]= s->last_dc[i];
4669
4670     /* statistics */
4671     d->mv_bits= s->mv_bits;
4672     d->i_tex_bits= s->i_tex_bits;
4673     d->p_tex_bits= s->p_tex_bits;
4674     d->i_count= s->i_count;
4675     d->f_count= s->f_count;
4676     d->b_count= s->b_count;
4677     d->skip_count= s->skip_count;
4678     d->misc_bits= s->misc_bits;
4679
4680     d->mb_intra= s->mb_intra;
4681     d->mb_skipped= s->mb_skipped;
4682     d->mv_type= s->mv_type;
4683     d->mv_dir= s->mv_dir;
4684     d->pb= s->pb;
4685     if(s->data_partitioning){
4686         d->pb2= s->pb2;
4687         d->tex_pb= s->tex_pb;
4688     }
4689     d->block= s->block;
4690     for(i=0; i<8; i++)
4691         d->block_last_index[i]= s->block_last_index[i];
4692     d->interlaced_dct= s->interlaced_dct;
4693     d->qscale= s->qscale;
4694 }
4695
4696 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4697                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4698                            int *dmin, int *next_block, int motion_x, int motion_y)
4699 {
4700     int score;
4701     uint8_t *dest_backup[3];
4702
4703     copy_context_before_encode(s, backup, type);
4704
4705     s->block= s->blocks[*next_block];
4706     s->pb= pb[*next_block];
4707     if(s->data_partitioning){
4708         s->pb2   = pb2   [*next_block];
4709         s->tex_pb= tex_pb[*next_block];
4710     }
4711
4712     if(*next_block){
4713         memcpy(dest_backup, s->dest, sizeof(s->dest));
4714         s->dest[0] = s->rd_scratchpad;
4715         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4716         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4717         assert(s->linesize >= 32); //FIXME
4718     }
4719
4720     encode_mb(s, motion_x, motion_y);
4721
4722     score= put_bits_count(&s->pb);
4723     if(s->data_partitioning){
4724         score+= put_bits_count(&s->pb2);
4725         score+= put_bits_count(&s->tex_pb);
4726     }
4727
4728     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4729         MPV_decode_mb(s, s->block);
4730
4731         score *= s->lambda2;
4732         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4733     }
4734
4735     if(*next_block){
4736         memcpy(s->dest, dest_backup, sizeof(s->dest));
4737     }
4738
4739     if(score<*dmin){
4740         *dmin= score;
4741         *next_block^=1;
4742
4743         copy_context_after_encode(best, s, type);
4744     }
4745 }
4746
4747 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4748     uint32_t *sq = squareTbl + 256;
4749     int acc=0;
4750     int x,y;
4751
4752     if(w==16 && h==16)
4753         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4754     else if(w==8 && h==8)
4755         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4756
4757     for(y=0; y<h; y++){
4758         for(x=0; x<w; x++){
4759             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4760         }
4761     }
4762
4763     assert(acc>=0);
4764
4765     return acc;
4766 }
4767
4768 static int sse_mb(MpegEncContext *s){
4769     int w= 16;
4770     int h= 16;
4771
4772     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4773     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4774
4775     if(w==16 && h==16)
4776       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4777         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4778                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4779                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4780       }else{
4781         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4782                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4783                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4784       }
4785     else
4786         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4787                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4788                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4789 }
4790
4791 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4792     MpegEncContext *s= arg;
4793
4794
4795     s->me.pre_pass=1;
4796     s->me.dia_size= s->avctx->pre_dia_size;
4797     s->first_slice_line=1;
4798     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4799         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4800             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4801         }
4802         s->first_slice_line=0;
4803     }
4804
4805     s->me.pre_pass=0;
4806
4807     return 0;
4808 }
4809
4810 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4811     MpegEncContext *s= arg;
4812
4813     s->me.dia_size= s->avctx->dia_size;
4814     s->first_slice_line=1;
4815     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4816         s->mb_x=0; //for block init below
4817         ff_init_block_index(s);
4818         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4819             s->block_index[0]+=2;
4820             s->block_index[1]+=2;
4821             s->block_index[2]+=2;
4822             s->block_index[3]+=2;
4823
4824             /* compute motion vector & mb_type and store in context */
4825             if(s->pict_type==B_TYPE)
4826                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4827             else
4828                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4829         }
4830         s->first_slice_line=0;
4831     }
4832     return 0;
4833 }
4834
4835 static int mb_var_thread(AVCodecContext *c, void *arg){
4836     MpegEncContext *s= arg;
4837     int mb_x, mb_y;
4838
4839     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4840         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4841             int xx = mb_x * 16;
4842             int yy = mb_y * 16;
4843             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4844             int varc;
4845             int sum = s->dsp.pix_sum(pix, s->linesize);
4846
4847             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4848
4849             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4850             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4851             s->me.mb_var_sum_temp    += varc;
4852         }
4853     }
4854     return 0;
4855 }
4856
4857 static void write_slice_end(MpegEncContext *s){
4858     if(s->codec_id==CODEC_ID_MPEG4){
4859         if(s->partitioned_frame){
4860             ff_mpeg4_merge_partitions(s);
4861         }
4862
4863         ff_mpeg4_stuffing(&s->pb);
4864     }else if(s->out_format == FMT_MJPEG){
4865         ff_mjpeg_stuffing(&s->pb);
4866     }
4867
4868     align_put_bits(&s->pb);
4869     flush_put_bits(&s->pb);
4870
4871     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4872         s->misc_bits+= get_bits_diff(s);
4873 }
4874
4875 static int encode_thread(AVCodecContext *c, void *arg){
4876     MpegEncContext *s= arg;
4877     int mb_x, mb_y, pdif = 0;
4878     int i, j;
4879     MpegEncContext best_s, backup_s;
4880     uint8_t bit_buf[2][MAX_MB_BYTES];
4881     uint8_t bit_buf2[2][MAX_MB_BYTES];
4882     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4883     PutBitContext pb[2], pb2[2], tex_pb[2];
4884 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4885
4886     for(i=0; i<2; i++){
4887         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4888         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4889         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4890     }
4891
4892     s->last_bits= put_bits_count(&s->pb);
4893     s->mv_bits=0;
4894     s->misc_bits=0;
4895     s->i_tex_bits=0;
4896     s->p_tex_bits=0;
4897     s->i_count=0;
4898     s->f_count=0;
4899     s->b_count=0;
4900     s->skip_count=0;
4901
4902     for(i=0; i<3; i++){
4903         /* init last dc values */
4904         /* note: quant matrix value (8) is implied here */
4905         s->last_dc[i] = 128 << s->intra_dc_precision;
4906
4907         s->current_picture.error[i] = 0;
4908     }
4909     s->mb_skip_run = 0;
4910     memset(s->last_mv, 0, sizeof(s->last_mv));
4911
4912     s->last_mv_dir = 0;
4913
4914     switch(s->codec_id){
4915     case CODEC_ID_H263:
4916     case CODEC_ID_H263P:
4917     case CODEC_ID_FLV1:
4918         s->gob_index = ff_h263_get_gob_height(s);
4919         break;
4920     case CODEC_ID_MPEG4:
4921         if(s->partitioned_frame)
4922             ff_mpeg4_init_partitions(s);
4923         break;
4924     }
4925
4926     s->resync_mb_x=0;
4927     s->resync_mb_y=0;
4928     s->first_slice_line = 1;
4929     s->ptr_lastgob = s->pb.buf;
4930     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4931 //    printf("row %d at %X\n", s->mb_y, (int)s);
4932         s->mb_x=0;
4933         s->mb_y= mb_y;
4934
4935         ff_set_qscale(s, s->qscale);
4936         ff_init_block_index(s);
4937
4938         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4939             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4940             int mb_type= s->mb_type[xy];
4941 //            int d;
4942             int dmin= INT_MAX;
4943             int dir;
4944
4945             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4946                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4947                 return -1;
4948             }
4949             if(s->data_partitioning){
4950                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4951                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4952                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4953                     return -1;
4954                 }
4955             }
4956
4957             s->mb_x = mb_x;
4958             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4959             ff_update_block_index(s);
4960
4961 #ifdef CONFIG_H261_ENCODER
4962             if(s->codec_id == CODEC_ID_H261){
4963                 ff_h261_reorder_mb_index(s);
4964                 xy= s->mb_y*s->mb_stride + s->mb_x;
4965                 mb_type= s->mb_type[xy];
4966             }
4967 #endif
4968
4969             /* write gob / video packet header  */
4970             if(s->rtp_mode){
4971                 int current_packet_size, is_gob_start;
4972
4973                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4974
4975                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
4976
4977                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4978
4979                 switch(s->codec_id){
4980                 case CODEC_ID_H263:
4981                 case CODEC_ID_H263P:
4982                     if(!s->h263_slice_structured)
4983                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
4984                     break;
4985                 case CODEC_ID_MPEG2VIDEO:
4986                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
4987                 case CODEC_ID_MPEG1VIDEO:
4988                     if(s->mb_skip_run) is_gob_start=0;
4989                     break;
4990                 }
4991
4992                 if(is_gob_start){
4993                     if(s->start_mb_y != mb_y || mb_x!=0){
4994                         write_slice_end(s);
4995
4996                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
4997                             ff_mpeg4_init_partitions(s);
4998                         }
4999                     }
5000
5001                     assert((put_bits_count(&s->pb)&7) == 0);
5002                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
5003
5004                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
5005                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
5006                         int d= 100 / s->avctx->error_rate;
5007                         if(r % d == 0){
5008                             current_packet_size=0;
5009 #ifndef ALT_BITSTREAM_WRITER
5010                             s->pb.buf_ptr= s->ptr_lastgob;
5011 #endif
5012                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
5013                         }
5014                     }
5015
5016                     if (s->avctx->rtp_callback){
5017                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
5018                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
5019                     }
5020
5021                     switch(s->codec_id){
5022                     case CODEC_ID_MPEG4:
5023                         ff_mpeg4_encode_video_packet_header(s);
5024                         ff_mpeg4_clean_buffers(s);
5025                     break;
5026                     case CODEC_ID_MPEG1VIDEO:
5027                     case CODEC_ID_MPEG2VIDEO:
5028                         ff_mpeg1_encode_slice_header(s);
5029                         ff_mpeg1_clean_buffers(s);
5030                     break;
5031                     case CODEC_ID_H263:
5032                     case CODEC_ID_H263P:
5033                         h263_encode_gob_header(s, mb_y);
5034                     break;
5035                     }
5036
5037                     if(s->flags&CODEC_FLAG_PASS1){
5038                         int bits= put_bits_count(&s->pb);
5039                         s->misc_bits+= bits - s->last_bits;
5040                         s->last_bits= bits;
5041                     }
5042
5043                     s->ptr_lastgob += current_packet_size;
5044                     s->first_slice_line=1;
5045                     s->resync_mb_x=mb_x;
5046                     s->resync_mb_y=mb_y;
5047                 }
5048             }
5049
5050             if(  (s->resync_mb_x   == s->mb_x)
5051                && s->resync_mb_y+1 == s->mb_y){
5052                 s->first_slice_line=0;
5053             }
5054
5055             s->mb_skipped=0;
5056             s->dquant=0; //only for QP_RD
5057
5058             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
5059                 int next_block=0;
5060                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
5061
5062                 copy_context_before_encode(&backup_s, s, -1);
5063                 backup_s.pb= s->pb;
5064                 best_s.data_partitioning= s->data_partitioning;
5065                 best_s.partitioned_frame= s->partitioned_frame;
5066                 if(s->data_partitioning){
5067                     backup_s.pb2= s->pb2;
5068                     backup_s.tex_pb= s->tex_pb;
5069                 }
5070
5071                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
5072                     s->mv_dir = MV_DIR_FORWARD;
5073                     s->mv_type = MV_TYPE_16X16;
5074                     s->mb_intra= 0;
5075                     s->mv[0][0][0] = s->p_mv_table[xy][0];
5076                     s->mv[0][0][1] = s->p_mv_table[xy][1];
5077                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
5078                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5079                 }
5080                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
5081                     s->mv_dir = MV_DIR_FORWARD;
5082                     s->mv_type = MV_TYPE_FIELD;
5083                     s->mb_intra= 0;
5084                     for(i=0; i<2; i++){
5085                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5086                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5087                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5088                     }
5089                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
5090                                  &dmin, &next_block, 0, 0);
5091                 }
5092                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
5093                     s->mv_dir = MV_DIR_FORWARD;
5094                     s->mv_type = MV_TYPE_16X16;
5095                     s->mb_intra= 0;
5096                     s->mv[0][0][0] = 0;
5097                     s->mv[0][0][1] = 0;
5098                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
5099                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5100                 }
5101                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
5102                     s->mv_dir = MV_DIR_FORWARD;
5103                     s->mv_type = MV_TYPE_8X8;
5104                     s->mb_intra= 0;
5105                     for(i=0; i<4; i++){
5106                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5107                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5108                     }
5109                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
5110                                  &dmin, &next_block, 0, 0);
5111                 }
5112                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
5113                     s->mv_dir = MV_DIR_FORWARD;
5114                     s->mv_type = MV_TYPE_16X16;
5115                     s->mb_intra= 0;
5116                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5117                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5118                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
5119                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5120                 }
5121                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
5122                     s->mv_dir = MV_DIR_BACKWARD;
5123                     s->mv_type = MV_TYPE_16X16;
5124                     s->mb_intra= 0;
5125                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5126                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5127                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
5128                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
5129                 }
5130                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
5131                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5132                     s->mv_type = MV_TYPE_16X16;
5133                     s->mb_intra= 0;
5134                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5135                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5136                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5137                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5138                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
5139                                  &dmin, &next_block, 0, 0);
5140                 }
5141                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
5142                     int mx= s->b_direct_mv_table[xy][0];
5143                     int my= s->b_direct_mv_table[xy][1];
5144
5145                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5146                     s->mb_intra= 0;
5147                     ff_mpeg4_set_direct_mv(s, mx, my);
5148                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5149                                  &dmin, &next_block, mx, my);
5150                 }
5151                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
5152                     s->mv_dir = MV_DIR_FORWARD;
5153                     s->mv_type = MV_TYPE_FIELD;
5154                     s->mb_intra= 0;
5155                     for(i=0; i<2; i++){
5156                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5157                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5158                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5159                     }
5160                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
5161                                  &dmin, &next_block, 0, 0);
5162                 }
5163                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
5164                     s->mv_dir = MV_DIR_BACKWARD;
5165                     s->mv_type = MV_TYPE_FIELD;
5166                     s->mb_intra= 0;
5167                     for(i=0; i<2; i++){
5168                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5169                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5170                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5171                     }
5172                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
5173                                  &dmin, &next_block, 0, 0);
5174                 }
5175                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
5176                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5177                     s->mv_type = MV_TYPE_FIELD;
5178                     s->mb_intra= 0;
5179                     for(dir=0; dir<2; dir++){
5180                         for(i=0; i<2; i++){
5181                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5182                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5183                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5184                         }
5185                     }
5186                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
5187                                  &dmin, &next_block, 0, 0);
5188                 }
5189                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
5190                     s->mv_dir = 0;
5191                     s->mv_type = MV_TYPE_16X16;
5192                     s->mb_intra= 1;
5193                     s->mv[0][0][0] = 0;
5194                     s->mv[0][0][1] = 0;
5195                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
5196                                  &dmin, &next_block, 0, 0);
5197                     if(s->h263_pred || s->h263_aic){
5198                         if(best_s.mb_intra)
5199                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
5200                         else
5201                             ff_clean_intra_table_entries(s); //old mode?
5202                     }
5203                 }
5204
5205                 if(s->flags & CODEC_FLAG_QP_RD){
5206                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
5207                         const int last_qp= backup_s.qscale;
5208                         int dquant, dir, qp, dc[6];
5209                         DCTELEM ac[6][16];
5210                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
5211
5212                         assert(backup_s.dquant == 0);
5213
5214                         //FIXME intra
5215                         s->mv_dir= best_s.mv_dir;
5216                         s->mv_type = MV_TYPE_16X16;
5217                         s->mb_intra= best_s.mb_intra;
5218                         s->mv[0][0][0] = best_s.mv[0][0][0];
5219                         s->mv[0][0][1] = best_s.mv[0][0][1];
5220                         s->mv[1][0][0] = best_s.mv[1][0][0];
5221                         s->mv[1][0][1] = best_s.mv[1][0][1];
5222
5223                         dir= s->pict_type == B_TYPE ? 2 : 1;
5224                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
5225                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
5226                             qp= last_qp + dquant;
5227                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
5228                                 break;
5229                             backup_s.dquant= dquant;
5230                             if(s->mb_intra && s->dc_val[0]){
5231                                 for(i=0; i<6; i++){
5232                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
5233                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
5234                                 }
5235                             }
5236
5237                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5238                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
5239                             if(best_s.qscale != qp){
5240                                 if(s->mb_intra && s->dc_val[0]){
5241                                     for(i=0; i<6; i++){
5242                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
5243                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
5244                                     }
5245                                 }
5246                                 if(dir > 0 && dquant==dir){
5247                                     dquant= 0;
5248                                     dir= -dir;
5249                                 }else
5250                                     break;
5251                             }
5252                         }
5253                         qp= best_s.qscale;
5254                         s->current_picture.qscale_table[xy]= qp;
5255                     }
5256                 }
5257
5258                 copy_context_after_encode(s, &best_s, -1);
5259
5260                 pb_bits_count= put_bits_count(&s->pb);
5261                 flush_put_bits(&s->pb);
5262                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5263                 s->pb= backup_s.pb;
5264
5265                 if(s->data_partitioning){
5266                     pb2_bits_count= put_bits_count(&s->pb2);
5267                     flush_put_bits(&s->pb2);
5268                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5269                     s->pb2= backup_s.pb2;
5270
5271                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5272                     flush_put_bits(&s->tex_pb);
5273                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5274                     s->tex_pb= backup_s.tex_pb;
5275                 }
5276                 s->last_bits= put_bits_count(&s->pb);
5277
5278                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5279                     ff_h263_update_motion_val(s);
5280
5281                 if(next_block==0){ //FIXME 16 vs linesize16
5282                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5283                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5284                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5285                 }
5286
5287                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5288                     MPV_decode_mb(s, s->block);
5289             } else {
5290                 int motion_x, motion_y;
5291                 s->mv_type=MV_TYPE_16X16;
5292                 // only one MB-Type possible
5293
5294                 switch(mb_type){
5295                 case CANDIDATE_MB_TYPE_INTRA:
5296                     s->mv_dir = 0;
5297                     s->mb_intra= 1;
5298                     motion_x= s->mv[0][0][0] = 0;
5299                     motion_y= s->mv[0][0][1] = 0;
5300                     break;
5301                 case CANDIDATE_MB_TYPE_INTER:
5302                     s->mv_dir = MV_DIR_FORWARD;
5303                     s->mb_intra= 0;
5304                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5305                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5306                     break;
5307                 case CANDIDATE_MB_TYPE_INTER_I:
5308                     s->mv_dir = MV_DIR_FORWARD;
5309                     s->mv_type = MV_TYPE_FIELD;
5310                     s->mb_intra= 0;
5311                     for(i=0; i<2; i++){
5312                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5313                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5314                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5315                     }
5316                     motion_x = motion_y = 0;
5317                     break;
5318                 case CANDIDATE_MB_TYPE_INTER4V:
5319                     s->mv_dir = MV_DIR_FORWARD;
5320                     s->mv_type = MV_TYPE_8X8;
5321                     s->mb_intra= 0;
5322                     for(i=0; i<4; i++){
5323                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5324                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5325                     }
5326                     motion_x= motion_y= 0;
5327                     break;
5328                 case CANDIDATE_MB_TYPE_DIRECT:
5329                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5330                     s->mb_intra= 0;
5331                     motion_x=s->b_direct_mv_table[xy][0];
5332                     motion_y=s->b_direct_mv_table[xy][1];
5333                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5334                     break;
5335                 case CANDIDATE_MB_TYPE_BIDIR:
5336                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5337                     s->mb_intra= 0;
5338                     motion_x=0;
5339                     motion_y=0;
5340                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5341                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5342                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5343                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5344                     break;
5345                 case CANDIDATE_MB_TYPE_BACKWARD:
5346                     s->mv_dir = MV_DIR_BACKWARD;
5347                     s->mb_intra= 0;
5348                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5349                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5350                     break;
5351                 case CANDIDATE_MB_TYPE_FORWARD:
5352                     s->mv_dir = MV_DIR_FORWARD;
5353                     s->mb_intra= 0;
5354                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5355                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5356 //                    printf(" %d %d ", motion_x, motion_y);
5357                     break;
5358                 case CANDIDATE_MB_TYPE_FORWARD_I:
5359                     s->mv_dir = MV_DIR_FORWARD;
5360                     s->mv_type = MV_TYPE_FIELD;
5361                     s->mb_intra= 0;
5362                     for(i=0; i<2; i++){
5363                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5364                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5365                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5366                     }
5367                     motion_x=motion_y=0;
5368                     break;
5369                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5370                     s->mv_dir = MV_DIR_BACKWARD;
5371                     s->mv_type = MV_TYPE_FIELD;
5372                     s->mb_intra= 0;
5373                     for(i=0; i<2; i++){
5374                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5375                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5376                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5377                     }
5378                     motion_x=motion_y=0;
5379                     break;
5380                 case CANDIDATE_MB_TYPE_BIDIR_I:
5381                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5382                     s->mv_type = MV_TYPE_FIELD;
5383                     s->mb_intra= 0;
5384                     for(dir=0; dir<2; dir++){
5385                         for(i=0; i<2; i++){
5386                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5387                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5388                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5389                         }
5390                     }
5391                     motion_x=motion_y=0;
5392                     break;
5393                 default:
5394                     motion_x=motion_y=0; //gcc warning fix
5395                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5396                 }
5397
5398                 encode_mb(s, motion_x, motion_y);
5399
5400                 // RAL: Update last macroblock type
5401                 s->last_mv_dir = s->mv_dir;
5402
5403                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5404                     ff_h263_update_motion_val(s);
5405
5406                 MPV_decode_mb(s, s->block);
5407             }
5408
5409             /* clean the MV table in IPS frames for direct mode in B frames */
5410             if(s->mb_intra /* && I,P,S_TYPE */){
5411                 s->p_mv_table[xy][0]=0;
5412                 s->p_mv_table[xy][1]=0;
5413             }
5414
5415             if(s->flags&CODEC_FLAG_PSNR){
5416                 int w= 16;
5417                 int h= 16;
5418
5419                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5420                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5421
5422                 s->current_picture.error[0] += sse(
5423                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5424                     s->dest[0], w, h, s->linesize);
5425                 s->current_picture.error[1] += sse(
5426                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5427                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5428                 s->current_picture.error[2] += sse(
5429                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5430                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5431             }
5432             if(s->loop_filter){
5433                 if(s->out_format == FMT_H263)
5434                     ff_h263_loop_filter(s);
5435             }
5436 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5437         }
5438     }
5439
5440     //not beautiful here but we must write it before flushing so it has to be here
5441     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5442         msmpeg4_encode_ext_header(s);
5443
5444     write_slice_end(s);
5445
5446     /* Send the last GOB if RTP */
5447     if (s->avctx->rtp_callback) {
5448         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5449         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5450         /* Call the RTP callback to send the last GOB */
5451         emms_c();
5452         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5453     }
5454
5455     return 0;
5456 }
5457
5458 #define MERGE(field) dst->field += src->field; src->field=0
5459 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5460     MERGE(me.scene_change_score);
5461     MERGE(me.mc_mb_var_sum_temp);
5462     MERGE(me.mb_var_sum_temp);
5463 }
5464
5465 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5466     int i;
5467
5468     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5469     MERGE(dct_count[1]);
5470     MERGE(mv_bits);
5471     MERGE(i_tex_bits);
5472     MERGE(p_tex_bits);
5473     MERGE(i_count);
5474     MERGE(f_count);
5475     MERGE(b_count);
5476     MERGE(skip_count);
5477     MERGE(misc_bits);
5478     MERGE(error_count);
5479     MERGE(padding_bug_score);
5480     MERGE(current_picture.error[0]);
5481     MERGE(current_picture.error[1]);
5482     MERGE(current_picture.error[2]);
5483
5484     if(dst->avctx->noise_reduction){
5485         for(i=0; i<64; i++){
5486             MERGE(dct_error_sum[0][i]);
5487             MERGE(dct_error_sum[1][i]);
5488         }
5489     }
5490
5491     assert(put_bits_count(&src->pb) % 8 ==0);
5492     assert(put_bits_count(&dst->pb) % 8 ==0);
5493     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5494     flush_put_bits(&dst->pb);
5495 }
5496
5497 static int estimate_qp(MpegEncContext *s, int dry_run){
5498     if (s->next_lambda){
5499         s->current_picture_ptr->quality=
5500         s->current_picture.quality = s->next_lambda;
5501         if(!dry_run) s->next_lambda= 0;
5502     } else if (!s->fixed_qscale) {
5503         s->current_picture_ptr->quality=
5504         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
5505         if (s->current_picture.quality < 0)
5506             return -1;
5507     }
5508
5509     if(s->adaptive_quant){
5510         switch(s->codec_id){
5511         case CODEC_ID_MPEG4:
5512             ff_clean_mpeg4_qscales(s);
5513             break;
5514         case CODEC_ID_H263:
5515         case CODEC_ID_H263P:
5516         case CODEC_ID_FLV1:
5517             ff_clean_h263_qscales(s);
5518             break;
5519         }
5520
5521         s->lambda= s->lambda_table[0];
5522         //FIXME broken
5523     }else
5524         s->lambda= s->current_picture.quality;
5525 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5526     update_qscale(s);
5527     return 0;
5528 }
5529
5530 static int encode_picture(MpegEncContext *s, int picture_number)
5531 {
5532     int i;
5533     int bits;
5534
5535     s->picture_number = picture_number;
5536
5537     /* Reset the average MB variance */
5538     s->me.mb_var_sum_temp    =
5539     s->me.mc_mb_var_sum_temp = 0;
5540
5541     /* we need to initialize some time vars before we can encode b-frames */
5542     // RAL: Condition added for MPEG1VIDEO
5543     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5544         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5545
5546     s->me.scene_change_score=0;
5547
5548 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5549
5550     if(s->pict_type==I_TYPE){
5551         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5552         else                        s->no_rounding=0;
5553     }else if(s->pict_type!=B_TYPE){
5554         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5555             s->no_rounding ^= 1;
5556     }
5557
5558     if(s->flags & CODEC_FLAG_PASS2){
5559         if (estimate_qp(s,1) < 0)
5560             return -1;
5561         ff_get_2pass_fcode(s);
5562     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
5563         if(s->pict_type==B_TYPE)
5564             s->lambda= s->last_lambda_for[s->pict_type];
5565         else
5566             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
5567         update_qscale(s);
5568     }
5569
5570     s->mb_intra=0; //for the rate distortion & bit compare functions
5571     for(i=1; i<s->avctx->thread_count; i++){
5572         ff_update_duplicate_context(s->thread_context[i], s);
5573     }
5574
5575     ff_init_me(s);
5576
5577     /* Estimate motion for every MB */
5578     if(s->pict_type != I_TYPE){
5579         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5580         s->lambda2= (s->lambda2* s->avctx->me_penalty_compensation + 128)>>8;
5581         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5582             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5583                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5584             }
5585         }
5586
5587         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5588     }else /* if(s->pict_type == I_TYPE) */{
5589         /* I-Frame */
5590         for(i=0; i<s->mb_stride*s->mb_height; i++)
5591             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5592
5593         if(!s->fixed_qscale){
5594             /* finding spatial complexity for I-frame rate control */
5595             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5596         }
5597     }
5598     for(i=1; i<s->avctx->thread_count; i++){
5599         merge_context_after_me(s, s->thread_context[i]);
5600     }
5601     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5602     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5603     emms_c();
5604
5605     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5606         s->pict_type= I_TYPE;
5607         for(i=0; i<s->mb_stride*s->mb_height; i++)
5608             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5609 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5610     }
5611
5612     if(!s->umvplus){
5613         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5614             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5615
5616             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5617                 int a,b;
5618                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5619                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5620                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5621             }
5622
5623             ff_fix_long_p_mvs(s);
5624             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5625             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5626                 int j;
5627                 for(i=0; i<2; i++){
5628                     for(j=0; j<2; j++)
5629                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5630                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5631                 }
5632             }
5633         }
5634
5635         if(s->pict_type==B_TYPE){
5636             int a, b;
5637
5638             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5639             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5640             s->f_code = FFMAX(a, b);
5641
5642             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5643             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5644             s->b_code = FFMAX(a, b);
5645
5646             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5647             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5648             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5649             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5650             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5651                 int dir, j;
5652                 for(dir=0; dir<2; dir++){
5653                     for(i=0; i<2; i++){
5654                         for(j=0; j<2; j++){
5655                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5656                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5657                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5658                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5659                         }
5660                     }
5661                 }
5662             }
5663         }
5664     }
5665
5666     if (estimate_qp(s, 0) < 0)
5667         return -1;
5668
5669     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5670         s->qscale= 3; //reduce clipping problems
5671
5672     if (s->out_format == FMT_MJPEG) {
5673         /* for mjpeg, we do include qscale in the matrix */
5674         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5675         for(i=1;i<64;i++){
5676             int j= s->dsp.idct_permutation[i];
5677
5678             s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
5679         }
5680         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5681                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5682         s->qscale= 8;
5683     }
5684
5685     //FIXME var duplication
5686     s->current_picture_ptr->key_frame=
5687     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5688     s->current_picture_ptr->pict_type=
5689     s->current_picture.pict_type= s->pict_type;
5690
5691     if(s->current_picture.key_frame)
5692         s->picture_in_gop_number=0;
5693
5694     s->last_bits= put_bits_count(&s->pb);
5695     switch(s->out_format) {
5696     case FMT_MJPEG:
5697         mjpeg_picture_header(s);
5698         break;
5699 #ifdef CONFIG_H261_ENCODER
5700     case FMT_H261:
5701         ff_h261_encode_picture_header(s, picture_number);
5702         break;
5703 #endif
5704     case FMT_H263:
5705         if (s->codec_id == CODEC_ID_WMV2)
5706             ff_wmv2_encode_picture_header(s, picture_number);
5707         else if (s->h263_msmpeg4)
5708             msmpeg4_encode_picture_header(s, picture_number);
5709         else if (s->h263_pred)
5710             mpeg4_encode_picture_header(s, picture_number);
5711 #ifdef CONFIG_RV10_ENCODER
5712         else if (s->codec_id == CODEC_ID_RV10)
5713             rv10_encode_picture_header(s, picture_number);
5714 #endif
5715 #ifdef CONFIG_RV20_ENCODER
5716         else if (s->codec_id == CODEC_ID_RV20)
5717             rv20_encode_picture_header(s, picture_number);
5718 #endif
5719         else if (s->codec_id == CODEC_ID_FLV1)
5720             ff_flv_encode_picture_header(s, picture_number);
5721         else
5722             h263_encode_picture_header(s, picture_number);
5723         break;
5724     case FMT_MPEG1:
5725         mpeg1_encode_picture_header(s, picture_number);
5726         break;
5727     case FMT_H264:
5728         break;
5729     default:
5730         assert(0);
5731     }
5732     bits= put_bits_count(&s->pb);
5733     s->header_bits= bits - s->last_bits;
5734
5735     for(i=1; i<s->avctx->thread_count; i++){
5736         update_duplicate_context_after_me(s->thread_context[i], s);
5737     }
5738     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5739     for(i=1; i<s->avctx->thread_count; i++){
5740         merge_context_after_encode(s, s->thread_context[i]);
5741     }
5742     emms_c();
5743     return 0;
5744 }
5745
5746 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5747     const int intra= s->mb_intra;
5748     int i;
5749
5750     s->dct_count[intra]++;
5751
5752     for(i=0; i<64; i++){
5753         int level= block[i];
5754
5755         if(level){
5756             if(level>0){
5757                 s->dct_error_sum[intra][i] += level;
5758                 level -= s->dct_offset[intra][i];
5759                 if(level<0) level=0;
5760             }else{
5761                 s->dct_error_sum[intra][i] -= level;
5762                 level += s->dct_offset[intra][i];
5763                 if(level>0) level=0;
5764             }
5765             block[i]= level;
5766         }
5767     }
5768 }
5769
5770 static int dct_quantize_trellis_c(MpegEncContext *s,
5771                         DCTELEM *block, int n,
5772                         int qscale, int *overflow){
5773     const int *qmat;
5774     const uint8_t *scantable= s->intra_scantable.scantable;
5775     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5776     int max=0;
5777     unsigned int threshold1, threshold2;
5778     int bias=0;
5779     int run_tab[65];
5780     int level_tab[65];
5781     int score_tab[65];
5782     int survivor[65];
5783     int survivor_count;
5784     int last_run=0;
5785     int last_level=0;
5786     int last_score= 0;
5787     int last_i;
5788     int coeff[2][64];
5789     int coeff_count[64];
5790     int qmul, qadd, start_i, last_non_zero, i, dc;
5791     const int esc_length= s->ac_esc_length;
5792     uint8_t * length;
5793     uint8_t * last_length;
5794     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5795
5796     s->dsp.fdct (block);
5797
5798     if(s->dct_error_sum)
5799         s->denoise_dct(s, block);
5800     qmul= qscale*16;
5801     qadd= ((qscale-1)|1)*8;
5802
5803     if (s->mb_intra) {
5804         int q;
5805         if (!s->h263_aic) {
5806             if (n < 4)
5807                 q = s->y_dc_scale;
5808             else
5809                 q = s->c_dc_scale;
5810             q = q << 3;
5811         } else{
5812             /* For AIC we skip quant/dequant of INTRADC */
5813             q = 1 << 3;
5814             qadd=0;
5815         }
5816
5817         /* note: block[0] is assumed to be positive */
5818         block[0] = (block[0] + (q >> 1)) / q;
5819         start_i = 1;
5820         last_non_zero = 0;
5821         qmat = s->q_intra_matrix[qscale];
5822         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5823             bias= 1<<(QMAT_SHIFT-1);
5824         length     = s->intra_ac_vlc_length;
5825         last_length= s->intra_ac_vlc_last_length;
5826     } else {
5827         start_i = 0;
5828         last_non_zero = -1;
5829         qmat = s->q_inter_matrix[qscale];
5830         length     = s->inter_ac_vlc_length;
5831         last_length= s->inter_ac_vlc_last_length;
5832     }
5833     last_i= start_i;
5834
5835     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5836     threshold2= (threshold1<<1);
5837
5838     for(i=63; i>=start_i; i--) {
5839         const int j = scantable[i];
5840         int level = block[j] * qmat[j];
5841
5842         if(((unsigned)(level+threshold1))>threshold2){
5843             last_non_zero = i;
5844             break;
5845         }
5846     }
5847
5848     for(i=start_i; i<=last_non_zero; i++) {
5849         const int j = scantable[i];
5850         int level = block[j] * qmat[j];
5851
5852 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5853 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5854         if(((unsigned)(level+threshold1))>threshold2){
5855             if(level>0){
5856                 level= (bias + level)>>QMAT_SHIFT;
5857                 coeff[0][i]= level;
5858                 coeff[1][i]= level-1;
5859 //                coeff[2][k]= level-2;
5860             }else{
5861                 level= (bias - level)>>QMAT_SHIFT;
5862                 coeff[0][i]= -level;
5863                 coeff[1][i]= -level+1;
5864 //                coeff[2][k]= -level+2;
5865             }
5866             coeff_count[i]= FFMIN(level, 2);
5867             assert(coeff_count[i]);
5868             max |=level;
5869         }else{
5870             coeff[0][i]= (level>>31)|1;
5871             coeff_count[i]= 1;
5872         }
5873     }
5874
5875     *overflow= s->max_qcoeff < max; //overflow might have happened
5876
5877     if(last_non_zero < start_i){
5878         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5879         return last_non_zero;
5880     }
5881
5882     score_tab[start_i]= 0;
5883     survivor[0]= start_i;
5884     survivor_count= 1;
5885
5886     for(i=start_i; i<=last_non_zero; i++){
5887         int level_index, j;
5888         const int dct_coeff= ABS(block[ scantable[i] ]);
5889         const int zero_distoration= dct_coeff*dct_coeff;
5890         int best_score=256*256*256*120;
5891         for(level_index=0; level_index < coeff_count[i]; level_index++){
5892             int distoration;
5893             int level= coeff[level_index][i];
5894             const int alevel= ABS(level);
5895             int unquant_coeff;
5896
5897             assert(level);
5898
5899             if(s->out_format == FMT_H263){
5900                 unquant_coeff= alevel*qmul + qadd;
5901             }else{ //MPEG1
5902                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5903                 if(s->mb_intra){
5904                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5905                         unquant_coeff =   (unquant_coeff - 1) | 1;
5906                 }else{
5907                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5908                         unquant_coeff =   (unquant_coeff - 1) | 1;
5909                 }
5910                 unquant_coeff<<= 3;
5911             }
5912
5913             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5914             level+=64;
5915             if((level&(~127)) == 0){
5916                 for(j=survivor_count-1; j>=0; j--){
5917                     int run= i - survivor[j];
5918                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5919                     score += score_tab[i-run];
5920
5921                     if(score < best_score){
5922                         best_score= score;
5923                         run_tab[i+1]= run;
5924                         level_tab[i+1]= level-64;
5925                     }
5926                 }
5927
5928                 if(s->out_format == FMT_H263){
5929                     for(j=survivor_count-1; j>=0; j--){
5930                         int run= i - survivor[j];
5931                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5932                         score += score_tab[i-run];
5933                         if(score < last_score){
5934                             last_score= score;
5935                             last_run= run;
5936                             last_level= level-64;
5937                             last_i= i+1;
5938                         }
5939                     }
5940                 }
5941             }else{
5942                 distoration += esc_length*lambda;
5943                 for(j=survivor_count-1; j>=0; j--){
5944                     int run= i - survivor[j];
5945                     int score= distoration + score_tab[i-run];
5946
5947                     if(score < best_score){
5948                         best_score= score;
5949                         run_tab[i+1]= run;
5950                         level_tab[i+1]= level-64;
5951                     }
5952                 }
5953
5954                 if(s->out_format == FMT_H263){
5955                   for(j=survivor_count-1; j>=0; j--){
5956                         int run= i - survivor[j];
5957                         int score= distoration + score_tab[i-run];
5958                         if(score < last_score){
5959                             last_score= score;
5960                             last_run= run;
5961                             last_level= level-64;
5962                             last_i= i+1;
5963                         }
5964                     }
5965                 }
5966             }
5967         }
5968
5969         score_tab[i+1]= best_score;
5970
5971         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5972         if(last_non_zero <= 27){
5973             for(; survivor_count; survivor_count--){
5974                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5975                     break;
5976             }
5977         }else{
5978             for(; survivor_count; survivor_count--){
5979                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5980                     break;
5981             }
5982         }
5983
5984         survivor[ survivor_count++ ]= i+1;
5985     }
5986
5987     if(s->out_format != FMT_H263){
5988         last_score= 256*256*256*120;
5989         for(i= survivor[0]; i<=last_non_zero + 1; i++){
5990             int score= score_tab[i];
5991             if(i) score += lambda*2; //FIXME exacter?
5992
5993             if(score < last_score){
5994                 last_score= score;
5995                 last_i= i;
5996                 last_level= level_tab[i];
5997                 last_run= run_tab[i];
5998             }
5999         }
6000     }
6001
6002     s->coded_score[n] = last_score;
6003
6004     dc= ABS(block[0]);
6005     last_non_zero= last_i - 1;
6006     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
6007
6008     if(last_non_zero < start_i)
6009         return last_non_zero;
6010
6011     if(last_non_zero == 0 && start_i == 0){
6012         int best_level= 0;
6013         int best_score= dc * dc;
6014
6015         for(i=0; i<coeff_count[0]; i++){
6016             int level= coeff[i][0];
6017             int alevel= ABS(level);
6018             int unquant_coeff, score, distortion;
6019
6020             if(s->out_format == FMT_H263){
6021                     unquant_coeff= (alevel*qmul + qadd)>>3;
6022             }else{ //MPEG1
6023                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
6024                     unquant_coeff =   (unquant_coeff - 1) | 1;
6025             }
6026             unquant_coeff = (unquant_coeff + 4) >> 3;
6027             unquant_coeff<<= 3 + 3;
6028
6029             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
6030             level+=64;
6031             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
6032             else                    score= distortion + esc_length*lambda;
6033
6034             if(score < best_score){
6035                 best_score= score;
6036                 best_level= level - 64;
6037             }
6038         }
6039         block[0]= best_level;
6040         s->coded_score[n] = best_score - dc*dc;
6041         if(best_level == 0) return -1;
6042         else                return last_non_zero;
6043     }
6044
6045     i= last_i;
6046     assert(last_level);
6047
6048     block[ perm_scantable[last_non_zero] ]= last_level;
6049     i -= last_run + 1;
6050
6051     for(; i>start_i; i -= run_tab[i] + 1){
6052         block[ perm_scantable[i-1] ]= level_tab[i];
6053     }
6054
6055     return last_non_zero;
6056 }
6057
6058 //#define REFINE_STATS 1
6059 static int16_t basis[64][64];
6060
6061 static void build_basis(uint8_t *perm){
6062     int i, j, x, y;
6063     emms_c();
6064     for(i=0; i<8; i++){
6065         for(j=0; j<8; j++){
6066             for(y=0; y<8; y++){
6067                 for(x=0; x<8; x++){
6068                     double s= 0.25*(1<<BASIS_SHIFT);
6069                     int index= 8*i + j;
6070                     int perm_index= perm[index];
6071                     if(i==0) s*= sqrt(0.5);
6072                     if(j==0) s*= sqrt(0.5);
6073                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
6074                 }
6075             }
6076         }
6077     }
6078 }
6079
6080 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
6081                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
6082                         int n, int qscale){
6083     int16_t rem[64];
6084     DECLARE_ALIGNED_16(DCTELEM, d1[64]);
6085     const int *qmat;
6086     const uint8_t *scantable= s->intra_scantable.scantable;
6087     const uint8_t *perm_scantable= s->intra_scantable.permutated;
6088 //    unsigned int threshold1, threshold2;
6089 //    int bias=0;
6090     int run_tab[65];
6091     int prev_run=0;
6092     int prev_level=0;
6093     int qmul, qadd, start_i, last_non_zero, i, dc;
6094     uint8_t * length;
6095     uint8_t * last_length;
6096     int lambda;
6097     int rle_index, run, q, sum;
6098 #ifdef REFINE_STATS
6099 static int count=0;
6100 static int after_last=0;
6101 static int to_zero=0;
6102 static int from_zero=0;
6103 static int raise=0;
6104 static int lower=0;
6105 static int messed_sign=0;
6106 #endif
6107
6108     if(basis[0][0] == 0)
6109         build_basis(s->dsp.idct_permutation);
6110
6111     qmul= qscale*2;
6112     qadd= (qscale-1)|1;
6113     if (s->mb_intra) {
6114         if (!s->h263_aic) {
6115             if (n < 4)
6116                 q = s->y_dc_scale;
6117             else
6118                 q = s->c_dc_scale;
6119         } else{
6120             /* For AIC we skip quant/dequant of INTRADC */
6121             q = 1;
6122             qadd=0;
6123         }
6124         q <<= RECON_SHIFT-3;
6125         /* note: block[0] is assumed to be positive */
6126         dc= block[0]*q;
6127 //        block[0] = (block[0] + (q >> 1)) / q;
6128         start_i = 1;
6129         qmat = s->q_intra_matrix[qscale];
6130 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
6131 //            bias= 1<<(QMAT_SHIFT-1);
6132         length     = s->intra_ac_vlc_length;
6133         last_length= s->intra_ac_vlc_last_length;
6134     } else {
6135         dc= 0;
6136         start_i = 0;
6137         qmat = s->q_inter_matrix[qscale];
6138         length     = s->inter_ac_vlc_length;
6139         last_length= s->inter_ac_vlc_last_length;
6140     }
6141     last_non_zero = s->block_last_index[n];
6142
6143 #ifdef REFINE_STATS
6144 {START_TIMER
6145 #endif
6146     dc += (1<<(RECON_SHIFT-1));
6147     for(i=0; i<64; i++){
6148         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
6149     }
6150 #ifdef REFINE_STATS
6151 STOP_TIMER("memset rem[]")}
6152 #endif
6153     sum=0;
6154     for(i=0; i<64; i++){
6155         int one= 36;
6156         int qns=4;
6157         int w;
6158
6159         w= ABS(weight[i]) + qns*one;
6160         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
6161
6162         weight[i] = w;
6163 //        w=weight[i] = (63*qns + (w/2)) / w;
6164
6165         assert(w>0);
6166         assert(w<(1<<6));
6167         sum += w*w;
6168     }
6169     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
6170 #ifdef REFINE_STATS
6171 {START_TIMER
6172 #endif
6173     run=0;
6174     rle_index=0;
6175     for(i=start_i; i<=last_non_zero; i++){
6176         int j= perm_scantable[i];
6177         const int level= block[j];
6178         int coeff;
6179
6180         if(level){
6181             if(level<0) coeff= qmul*level - qadd;
6182             else        coeff= qmul*level + qadd;
6183             run_tab[rle_index++]=run;
6184             run=0;
6185
6186             s->dsp.add_8x8basis(rem, basis[j], coeff);
6187         }else{
6188             run++;
6189         }
6190     }
6191 #ifdef REFINE_STATS
6192 if(last_non_zero>0){
6193 STOP_TIMER("init rem[]")
6194 }
6195 }
6196
6197 {START_TIMER
6198 #endif
6199     for(;;){
6200         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
6201         int best_coeff=0;
6202         int best_change=0;
6203         int run2, best_unquant_change=0, analyze_gradient;
6204 #ifdef REFINE_STATS
6205 {START_TIMER
6206 #endif
6207         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
6208
6209         if(analyze_gradient){
6210 #ifdef REFINE_STATS
6211 {START_TIMER
6212 #endif
6213             for(i=0; i<64; i++){
6214                 int w= weight[i];
6215
6216                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
6217             }
6218 #ifdef REFINE_STATS
6219 STOP_TIMER("rem*w*w")}
6220 {START_TIMER
6221 #endif
6222             s->dsp.fdct(d1);
6223 #ifdef REFINE_STATS
6224 STOP_TIMER("dct")}
6225 #endif
6226         }
6227
6228         if(start_i){
6229             const int level= block[0];
6230             int change, old_coeff;
6231
6232             assert(s->mb_intra);
6233
6234             old_coeff= q*level;
6235
6236             for(change=-1; change<=1; change+=2){
6237                 int new_level= level + change;
6238                 int score, new_coeff;
6239
6240                 new_coeff= q*new_level;
6241                 if(new_coeff >= 2048 || new_coeff < 0)
6242                     continue;
6243
6244                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
6245                 if(score<best_score){
6246                     best_score= score;
6247                     best_coeff= 0;
6248                     best_change= change;
6249                     best_unquant_change= new_coeff - old_coeff;
6250                 }
6251             }
6252         }
6253
6254         run=0;
6255         rle_index=0;
6256         run2= run_tab[rle_index++];
6257         prev_level=0;
6258         prev_run=0;
6259
6260         for(i=start_i; i<64; i++){
6261             int j= perm_scantable[i];
6262             const int level= block[j];
6263             int change, old_coeff;
6264
6265             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
6266                 break;
6267
6268             if(level){
6269                 if(level<0) old_coeff= qmul*level - qadd;
6270                 else        old_coeff= qmul*level + qadd;
6271                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
6272             }else{
6273                 old_coeff=0;
6274                 run2--;
6275                 assert(run2>=0 || i >= last_non_zero );
6276             }
6277
6278             for(change=-1; change<=1; change+=2){
6279                 int new_level= level + change;
6280                 int score, new_coeff, unquant_change;
6281
6282                 score=0;
6283                 if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
6284                    continue;
6285
6286                 if(new_level){
6287                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6288                     else            new_coeff= qmul*new_level + qadd;
6289                     if(new_coeff >= 2048 || new_coeff <= -2048)
6290                         continue;
6291                     //FIXME check for overflow
6292
6293                     if(level){
6294                         if(level < 63 && level > -63){
6295                             if(i < last_non_zero)
6296                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6297                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6298                             else
6299                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6300                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6301                         }
6302                     }else{
6303                         assert(ABS(new_level)==1);
6304
6305                         if(analyze_gradient){
6306                             int g= d1[ scantable[i] ];
6307                             if(g && (g^new_level) >= 0)
6308                                 continue;
6309                         }
6310
6311                         if(i < last_non_zero){
6312                             int next_i= i + run2 + 1;
6313                             int next_level= block[ perm_scantable[next_i] ] + 64;
6314
6315                             if(next_level&(~127))
6316                                 next_level= 0;
6317
6318                             if(next_i < last_non_zero)
6319                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6320                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6321                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6322                             else
6323                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6324                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6325                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6326                         }else{
6327                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6328                             if(prev_level){
6329                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6330                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6331                             }
6332                         }
6333                     }
6334                 }else{
6335                     new_coeff=0;
6336                     assert(ABS(level)==1);
6337
6338                     if(i < last_non_zero){
6339                         int next_i= i + run2 + 1;
6340                         int next_level= block[ perm_scantable[next_i] ] + 64;
6341
6342                         if(next_level&(~127))
6343                             next_level= 0;
6344
6345                         if(next_i < last_non_zero)
6346                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6347                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6348                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6349                         else
6350                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6351                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6352                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6353                     }else{
6354                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6355                         if(prev_level){
6356                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6357                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6358                         }
6359                     }
6360                 }
6361
6362                 score *= lambda;
6363
6364                 unquant_change= new_coeff - old_coeff;
6365                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6366
6367                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6368                 if(score<best_score){
6369                     best_score= score;
6370                     best_coeff= i;
6371                     best_change= change;
6372                     best_unquant_change= unquant_change;
6373                 }
6374             }
6375             if(level){
6376                 prev_level= level + 64;
6377                 if(prev_level&(~127))
6378                     prev_level= 0;
6379                 prev_run= run;
6380                 run=0;
6381             }else{
6382                 run++;
6383             }
6384         }
6385 #ifdef REFINE_STATS
6386 STOP_TIMER("iterative step")}
6387 #endif
6388
6389         if(best_change){
6390             int j= perm_scantable[ best_coeff ];
6391
6392             block[j] += best_change;
6393
6394             if(best_coeff > last_non_zero){
6395                 last_non_zero= best_coeff;
6396                 assert(block[j]);
6397 #ifdef REFINE_STATS
6398 after_last++;
6399 #endif
6400             }else{
6401 #ifdef REFINE_STATS
6402 if(block[j]){
6403     if(block[j] - best_change){
6404         if(ABS(block[j]) > ABS(block[j] - best_change)){
6405             raise++;
6406         }else{
6407             lower++;
6408         }
6409     }else{
6410         from_zero++;
6411     }
6412 }else{
6413     to_zero++;
6414 }
6415 #endif
6416                 for(; last_non_zero>=start_i; last_non_zero--){
6417                     if(block[perm_scantable[last_non_zero]])
6418                         break;
6419                 }
6420             }
6421 #ifdef REFINE_STATS
6422 count++;
6423 if(256*256*256*64 % count == 0){
6424     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6425 }
6426 #endif
6427             run=0;
6428             rle_index=0;
6429             for(i=start_i; i<=last_non_zero; i++){
6430                 int j= perm_scantable[i];
6431                 const int level= block[j];
6432
6433                  if(level){
6434                      run_tab[rle_index++]=run;
6435                      run=0;
6436                  }else{
6437                      run++;
6438                  }
6439             }
6440
6441             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6442         }else{
6443             break;
6444         }
6445     }
6446 #ifdef REFINE_STATS
6447 if(last_non_zero>0){
6448 STOP_TIMER("iterative search")
6449 }
6450 }
6451 #endif
6452
6453     return last_non_zero;
6454 }
6455
6456 static int dct_quantize_c(MpegEncContext *s,
6457                         DCTELEM *block, int n,
6458                         int qscale, int *overflow)
6459 {
6460     int i, j, level, last_non_zero, q, start_i;
6461     const int *qmat;
6462     const uint8_t *scantable= s->intra_scantable.scantable;
6463     int bias;
6464     int max=0;
6465     unsigned int threshold1, threshold2;
6466
6467     s->dsp.fdct (block);
6468
6469     if(s->dct_error_sum)
6470         s->denoise_dct(s, block);
6471
6472     if (s->mb_intra) {
6473         if (!s->h263_aic) {
6474             if (n < 4)
6475                 q = s->y_dc_scale;
6476             else
6477                 q = s->c_dc_scale;
6478             q = q << 3;
6479         } else
6480             /* For AIC we skip quant/dequant of INTRADC */
6481             q = 1 << 3;
6482
6483         /* note: block[0] is assumed to be positive */
6484         block[0] = (block[0] + (q >> 1)) / q;
6485         start_i = 1;
6486         last_non_zero = 0;
6487         qmat = s->q_intra_matrix[qscale];
6488         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6489     } else {
6490         start_i = 0;
6491         last_non_zero = -1;
6492         qmat = s->q_inter_matrix[qscale];
6493         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6494     }
6495     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6496     threshold2= (threshold1<<1);
6497     for(i=63;i>=start_i;i--) {
6498         j = scantable[i];
6499         level = block[j] * qmat[j];
6500
6501         if(((unsigned)(level+threshold1))>threshold2){
6502             last_non_zero = i;
6503             break;
6504         }else{
6505             block[j]=0;
6506         }
6507     }
6508     for(i=start_i; i<=last_non_zero; i++) {
6509         j = scantable[i];
6510         level = block[j] * qmat[j];
6511
6512 //        if(   bias+level >= (1<<QMAT_SHIFT)
6513 //           || bias-level >= (1<<QMAT_SHIFT)){
6514         if(((unsigned)(level+threshold1))>threshold2){
6515             if(level>0){
6516                 level= (bias + level)>>QMAT_SHIFT;
6517                 block[j]= level;
6518             }else{
6519                 level= (bias - level)>>QMAT_SHIFT;
6520                 block[j]= -level;
6521             }
6522             max |=level;
6523         }else{
6524             block[j]=0;
6525         }
6526     }
6527     *overflow= s->max_qcoeff < max; //overflow might have happened
6528
6529     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6530     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6531         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6532
6533     return last_non_zero;
6534 }
6535
6536 #endif //CONFIG_ENCODERS
6537
6538 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6539                                    DCTELEM *block, int n, int qscale)
6540 {
6541     int i, level, nCoeffs;
6542     const uint16_t *quant_matrix;
6543
6544     nCoeffs= s->block_last_index[n];
6545
6546     if (n < 4)
6547         block[0] = block[0] * s->y_dc_scale;
6548     else
6549         block[0] = block[0] * s->c_dc_scale;
6550     /* XXX: only mpeg1 */
6551     quant_matrix = s->intra_matrix;
6552     for(i=1;i<=nCoeffs;i++) {
6553         int j= s->intra_scantable.permutated[i];
6554         level = block[j];
6555         if (level) {
6556             if (level < 0) {
6557                 level = -level;
6558                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6559                 level = (level - 1) | 1;
6560                 level = -level;
6561             } else {
6562                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6563                 level = (level - 1) | 1;
6564             }
6565             block[j] = level;
6566         }
6567     }
6568 }
6569
6570 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6571                                    DCTELEM *block, int n, int qscale)
6572 {
6573     int i, level, nCoeffs;
6574     const uint16_t *quant_matrix;
6575
6576     nCoeffs= s->block_last_index[n];
6577
6578     quant_matrix = s->inter_matrix;
6579     for(i=0; i<=nCoeffs; i++) {
6580         int j= s->intra_scantable.permutated[i];
6581         level = block[j];
6582         if (level) {
6583             if (level < 0) {
6584                 level = -level;
6585                 level = (((level << 1) + 1) * qscale *
6586                          ((int) (quant_matrix[j]))) >> 4;
6587                 level = (level - 1) | 1;
6588                 level = -level;
6589             } else {
6590                 level = (((level << 1) + 1) * qscale *
6591                          ((int) (quant_matrix[j]))) >> 4;
6592                 level = (level - 1) | 1;
6593             }
6594             block[j] = level;
6595         }
6596     }
6597 }
6598
6599 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6600                                    DCTELEM *block, int n, int qscale)
6601 {
6602     int i, level, nCoeffs;
6603     const uint16_t *quant_matrix;
6604
6605     if(s->alternate_scan) nCoeffs= 63;
6606     else nCoeffs= s->block_last_index[n];
6607
6608     if (n < 4)
6609         block[0] = block[0] * s->y_dc_scale;
6610     else
6611         block[0] = block[0] * s->c_dc_scale;
6612     quant_matrix = s->intra_matrix;
6613     for(i=1;i<=nCoeffs;i++) {
6614         int j= s->intra_scantable.permutated[i];
6615         level = block[j];
6616         if (level) {
6617             if (level < 0) {
6618                 level = -level;
6619                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6620                 level = -level;
6621             } else {
6622                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6623             }
6624             block[j] = level;
6625         }
6626     }
6627 }
6628
6629 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
6630                                    DCTELEM *block, int n, int qscale)
6631 {
6632     int i, level, nCoeffs;
6633     const uint16_t *quant_matrix;
6634     int sum=-1;
6635
6636     if(s->alternate_scan) nCoeffs= 63;
6637     else nCoeffs= s->block_last_index[n];
6638
6639     if (n < 4)
6640         block[0] = block[0] * s->y_dc_scale;
6641     else
6642         block[0] = block[0] * s->c_dc_scale;
6643     quant_matrix = s->intra_matrix;
6644     for(i=1;i<=nCoeffs;i++) {
6645         int j= s->intra_scantable.permutated[i];
6646         level = block[j];
6647         if (level) {
6648             if (level < 0) {
6649                 level = -level;
6650                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6651                 level = -level;
6652             } else {
6653                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6654             }
6655             block[j] = level;
6656             sum+=level;
6657         }
6658     }
6659     block[63]^=sum&1;
6660 }
6661
6662 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6663                                    DCTELEM *block, int n, int qscale)
6664 {
6665     int i, level, nCoeffs;
6666     const uint16_t *quant_matrix;
6667     int sum=-1;
6668
6669     if(s->alternate_scan) nCoeffs= 63;
6670     else nCoeffs= s->block_last_index[n];
6671
6672     quant_matrix = s->inter_matrix;
6673     for(i=0; i<=nCoeffs; i++) {
6674         int j= s->intra_scantable.permutated[i];
6675         level = block[j];
6676         if (level) {
6677             if (level < 0) {
6678                 level = -level;
6679                 level = (((level << 1) + 1) * qscale *
6680                          ((int) (quant_matrix[j]))) >> 4;
6681                 level = -level;
6682             } else {
6683                 level = (((level << 1) + 1) * qscale *
6684                          ((int) (quant_matrix[j]))) >> 4;
6685             }
6686             block[j] = level;
6687             sum+=level;
6688         }
6689     }
6690     block[63]^=sum&1;
6691 }
6692
6693 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6694                                   DCTELEM *block, int n, int qscale)
6695 {
6696     int i, level, qmul, qadd;
6697     int nCoeffs;
6698
6699     assert(s->block_last_index[n]>=0);
6700
6701     qmul = qscale << 1;
6702
6703     if (!s->h263_aic) {
6704         if (n < 4)
6705             block[0] = block[0] * s->y_dc_scale;
6706         else
6707             block[0] = block[0] * s->c_dc_scale;
6708         qadd = (qscale - 1) | 1;
6709     }else{
6710         qadd = 0;
6711     }
6712     if(s->ac_pred)
6713         nCoeffs=63;
6714     else
6715         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6716
6717     for(i=1; i<=nCoeffs; i++) {
6718         level = block[i];
6719         if (level) {
6720             if (level < 0) {
6721                 level = level * qmul - qadd;
6722             } else {
6723                 level = level * qmul + qadd;
6724             }
6725             block[i] = level;
6726         }
6727     }
6728 }
6729
6730 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6731                                   DCTELEM *block, int n, int qscale)
6732 {
6733     int i, level, qmul, qadd;
6734     int nCoeffs;
6735
6736     assert(s->block_last_index[n]>=0);
6737
6738     qadd = (qscale - 1) | 1;
6739     qmul = qscale << 1;
6740
6741     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6742
6743     for(i=0; i<=nCoeffs; i++) {
6744         level = block[i];
6745         if (level) {
6746             if (level < 0) {
6747                 level = level * qmul - qadd;
6748             } else {
6749                 level = level * qmul + qadd;
6750             }
6751             block[i] = level;
6752         }
6753     }
6754 }
6755
6756 #ifdef CONFIG_ENCODERS
6757 AVCodec h263_encoder = {
6758     "h263",
6759     CODEC_TYPE_VIDEO,
6760     CODEC_ID_H263,
6761     sizeof(MpegEncContext),
6762     MPV_encode_init,
6763     MPV_encode_picture,
6764     MPV_encode_end,
6765     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6766 };
6767
6768 AVCodec h263p_encoder = {
6769     "h263p",
6770     CODEC_TYPE_VIDEO,
6771     CODEC_ID_H263P,
6772     sizeof(MpegEncContext),
6773     MPV_encode_init,
6774     MPV_encode_picture,
6775     MPV_encode_end,
6776     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6777 };
6778
6779 AVCodec flv_encoder = {
6780     "flv",
6781     CODEC_TYPE_VIDEO,
6782     CODEC_ID_FLV1,
6783     sizeof(MpegEncContext),
6784     MPV_encode_init,
6785     MPV_encode_picture,
6786     MPV_encode_end,
6787     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6788 };
6789
6790 AVCodec rv10_encoder = {
6791     "rv10",
6792     CODEC_TYPE_VIDEO,
6793     CODEC_ID_RV10,
6794     sizeof(MpegEncContext),
6795     MPV_encode_init,
6796     MPV_encode_picture,
6797     MPV_encode_end,
6798     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6799 };
6800
6801 AVCodec rv20_encoder = {
6802     "rv20",
6803     CODEC_TYPE_VIDEO,
6804     CODEC_ID_RV20,
6805     sizeof(MpegEncContext),
6806     MPV_encode_init,
6807     MPV_encode_picture,
6808     MPV_encode_end,
6809     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6810 };
6811
6812 AVCodec mpeg4_encoder = {
6813     "mpeg4",
6814     CODEC_TYPE_VIDEO,
6815     CODEC_ID_MPEG4,
6816     sizeof(MpegEncContext),
6817     MPV_encode_init,
6818     MPV_encode_picture,
6819     MPV_encode_end,
6820     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6821     .capabilities= CODEC_CAP_DELAY,
6822 };
6823
6824 AVCodec msmpeg4v1_encoder = {
6825     "msmpeg4v1",
6826     CODEC_TYPE_VIDEO,
6827     CODEC_ID_MSMPEG4V1,
6828     sizeof(MpegEncContext),
6829     MPV_encode_init,
6830     MPV_encode_picture,
6831     MPV_encode_end,
6832     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6833 };
6834
6835 AVCodec msmpeg4v2_encoder = {
6836     "msmpeg4v2",
6837     CODEC_TYPE_VIDEO,
6838     CODEC_ID_MSMPEG4V2,
6839     sizeof(MpegEncContext),
6840     MPV_encode_init,
6841     MPV_encode_picture,
6842     MPV_encode_end,
6843     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6844 };
6845
6846 AVCodec msmpeg4v3_encoder = {
6847     "msmpeg4",
6848     CODEC_TYPE_VIDEO,
6849     CODEC_ID_MSMPEG4V3,
6850     sizeof(MpegEncContext),
6851     MPV_encode_init,
6852     MPV_encode_picture,
6853     MPV_encode_end,
6854     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6855 };
6856
6857 AVCodec wmv1_encoder = {
6858     "wmv1",
6859     CODEC_TYPE_VIDEO,
6860     CODEC_ID_WMV1,
6861     sizeof(MpegEncContext),
6862     MPV_encode_init,
6863     MPV_encode_picture,
6864     MPV_encode_end,
6865     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6866 };
6867
6868 AVCodec mjpeg_encoder = {
6869     "mjpeg",
6870     CODEC_TYPE_VIDEO,
6871     CODEC_ID_MJPEG,
6872     sizeof(MpegEncContext),
6873     MPV_encode_init,
6874     MPV_encode_picture,
6875     MPV_encode_end,
6876     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, PIX_FMT_YUVJ422P, -1},
6877 };
6878
6879 #endif //CONFIG_ENCODERS