]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
10l: Rename missed occurrences of CONFIG_EBX_AVAILABLE to HAVE_EBX_AVAILABLE.
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  *
22  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
23  */
24
25 /**
26  * @file mpegvideo.c
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "avcodec.h"
31 #include "dsputil.h"
32 #include "mpegvideo.h"
33 #include "faandct.h"
34 #include <limits.h>
35
36 #ifdef USE_FASTMEMCPY
37 #include "libvo/fastmemcpy.h"
38 #endif
39
40 //#undef NDEBUG
41 //#include <assert.h>
42
43 #ifdef CONFIG_ENCODERS
44 static int encode_picture(MpegEncContext *s, int picture_number);
45 #endif //CONFIG_ENCODERS
46 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
47                                    DCTELEM *block, int n, int qscale);
48 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
49                                    DCTELEM *block, int n, int qscale);
50 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
51                                    DCTELEM *block, int n, int qscale);
52 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
53                                    DCTELEM *block, int n, int qscale);
54 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
55                                    DCTELEM *block, int n, int qscale);
56 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
57                                   DCTELEM *block, int n, int qscale);
58 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
59                                   DCTELEM *block, int n, int qscale);
60 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
61 #ifdef CONFIG_ENCODERS
62 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
63 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
64 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
65 static int sse_mb(MpegEncContext *s);
66 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
67 #endif //CONFIG_ENCODERS
68
69 #ifdef HAVE_XVMC
70 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
71 extern void XVMC_field_end(MpegEncContext *s);
72 extern void XVMC_decode_mb(MpegEncContext *s);
73 #endif
74
75 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
76
77
78 /* enable all paranoid tests for rounding, overflows, etc... */
79 //#define PARANOID
80
81 //#define DEBUG
82
83
84 /* for jpeg fast DCT */
85 #define CONST_BITS 14
86
87 static const uint16_t aanscales[64] = {
88     /* precomputed values scaled up by 14 bits */
89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
90     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
91     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
92     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
93     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
94     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
95     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
96     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
97 };
98
99 static const uint8_t h263_chroma_roundtab[16] = {
100 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
101     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
102 };
103
104 static const uint8_t ff_default_chroma_qscale_table[32]={
105 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
106     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
107 };
108
109 #ifdef CONFIG_ENCODERS
110 static uint8_t default_mv_penalty[MAX_FCODE+1][MAX_MV*2+1];
111 static uint8_t default_fcode_tab[MAX_MV*2+1];
112
113 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
114
115 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
116                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
117 {
118     int qscale;
119     int shift=0;
120
121     for(qscale=qmin; qscale<=qmax; qscale++){
122         int i;
123         if (dsp->fdct == ff_jpeg_fdct_islow
124 #ifdef FAAN_POSTSCALE
125             || dsp->fdct == ff_faandct
126 #endif
127             ) {
128             for(i=0;i<64;i++) {
129                 const int j= dsp->idct_permutation[i];
130                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
131                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
132                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
133                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
134
135                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
136                                 (qscale * quant_matrix[j]));
137             }
138         } else if (dsp->fdct == fdct_ifast
139 #ifndef FAAN_POSTSCALE
140                    || dsp->fdct == ff_faandct
141 #endif
142                    ) {
143             for(i=0;i<64;i++) {
144                 const int j= dsp->idct_permutation[i];
145                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
146                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
147                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
148                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
149
150                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
151                                 (aanscales[i] * qscale * quant_matrix[j]));
152             }
153         } else {
154             for(i=0;i<64;i++) {
155                 const int j= dsp->idct_permutation[i];
156                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
157                    So 16           <= qscale * quant_matrix[i]             <= 7905
158                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
159                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
160                 */
161                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
162 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
163                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
164
165                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
166                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
167             }
168         }
169
170         for(i=intra; i<64; i++){
171             int64_t max= 8191;
172             if (dsp->fdct == fdct_ifast
173 #ifndef FAAN_POSTSCALE
174                    || dsp->fdct == ff_faandct
175 #endif
176                    ) {
177                 max= (8191LL*aanscales[i]) >> 14;
178             }
179             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
180                 shift++;
181             }
182         }
183     }
184     if(shift){
185         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger than %d, overflows possible\n", QMAT_SHIFT - shift);
186     }
187 }
188
189 static inline void update_qscale(MpegEncContext *s){
190     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
191     s->qscale= av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
192
193     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
194 }
195 #endif //CONFIG_ENCODERS
196
197 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
198     int i;
199     int end;
200
201     st->scantable= src_scantable;
202
203     for(i=0; i<64; i++){
204         int j;
205         j = src_scantable[i];
206         st->permutated[i] = permutation[j];
207 #ifdef ARCH_POWERPC
208         st->inverse[j] = i;
209 #endif
210     }
211
212     end=-1;
213     for(i=0; i<64; i++){
214         int j;
215         j = st->permutated[i];
216         if(j>end) end=j;
217         st->raster_end[i]= end;
218     }
219 }
220
221 #ifdef CONFIG_ENCODERS
222 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix){
223     int i;
224
225     if(matrix){
226         put_bits(pb, 1, 1);
227         for(i=0;i<64;i++) {
228             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
229         }
230     }else
231         put_bits(pb, 1, 0);
232 }
233 #endif //CONFIG_ENCODERS
234
235 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
236     int i;
237
238     assert(p<=end);
239     if(p>=end)
240         return end;
241
242     for(i=0; i<3; i++){
243         uint32_t tmp= *state << 8;
244         *state= tmp + *(p++);
245         if(tmp == 0x100 || p==end)
246             return p;
247     }
248
249     while(p<end){
250         if     (p[-1] > 1      ) p+= 3;
251         else if(p[-2]          ) p+= 2;
252         else if(p[-3]|(p[-1]-1)) p++;
253         else{
254             p++;
255             break;
256         }
257     }
258
259     p= FFMIN(p, end)-4;
260     *state=  be2me_32(unaligned32(p));
261
262     return p+4;
263 }
264
265 /* init common dct for both encoder and decoder */
266 int DCT_common_init(MpegEncContext *s)
267 {
268     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
269     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
270     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
271     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
272     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
273     if(s->flags & CODEC_FLAG_BITEXACT)
274         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
275     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
276
277 #ifdef CONFIG_ENCODERS
278     s->dct_quantize= dct_quantize_c;
279     s->denoise_dct= denoise_dct_c;
280 #endif //CONFIG_ENCODERS
281
282 #ifdef HAVE_MMX
283     MPV_common_init_mmx(s);
284 #endif
285 #ifdef ARCH_ALPHA
286     MPV_common_init_axp(s);
287 #endif
288 #ifdef HAVE_MLIB
289     MPV_common_init_mlib(s);
290 #endif
291 #ifdef HAVE_MMI
292     MPV_common_init_mmi(s);
293 #endif
294 #ifdef ARCH_ARMV4L
295     MPV_common_init_armv4l(s);
296 #endif
297 #ifdef ARCH_POWERPC
298     MPV_common_init_ppc(s);
299 #endif
300
301 #ifdef CONFIG_ENCODERS
302     s->fast_dct_quantize= s->dct_quantize;
303
304     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
305         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
306     }
307
308 #endif //CONFIG_ENCODERS
309
310     /* load & permutate scantables
311        note: only wmv uses different ones
312     */
313     if(s->alternate_scan){
314         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
315         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
316     }else{
317         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
318         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
319     }
320     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
321     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
322
323     return 0;
324 }
325
326 static void copy_picture(Picture *dst, Picture *src){
327     *dst = *src;
328     dst->type= FF_BUFFER_TYPE_COPY;
329 }
330
331 #ifdef CONFIG_ENCODERS
332 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
333     int i;
334
335     dst->pict_type              = src->pict_type;
336     dst->quality                = src->quality;
337     dst->coded_picture_number   = src->coded_picture_number;
338     dst->display_picture_number = src->display_picture_number;
339 //    dst->reference              = src->reference;
340     dst->pts                    = src->pts;
341     dst->interlaced_frame       = src->interlaced_frame;
342     dst->top_field_first        = src->top_field_first;
343
344     if(s->avctx->me_threshold){
345         if(!src->motion_val[0])
346             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
347         if(!src->mb_type)
348             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
349         if(!src->ref_index[0])
350             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
351         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
352             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
353             src->motion_subsample_log2, dst->motion_subsample_log2);
354
355         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
356
357         for(i=0; i<2; i++){
358             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
359             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
360
361             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
362                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
363             }
364             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
365                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
366             }
367         }
368     }
369 }
370 #endif
371
372 /**
373  * allocates a Picture
374  * The pixels are allocated/set by calling get_buffer() if shared=0
375  */
376 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
377     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
378     const int mb_array_size= s->mb_stride*s->mb_height;
379     const int b8_array_size= s->b8_stride*s->mb_height*2;
380     const int b4_array_size= s->b4_stride*s->mb_height*4;
381     int i;
382
383     if(shared){
384         assert(pic->data[0]);
385         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
386         pic->type= FF_BUFFER_TYPE_SHARED;
387     }else{
388         int r;
389
390         assert(!pic->data[0]);
391
392         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
393
394         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
395             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
396             return -1;
397         }
398
399         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
400             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
401             return -1;
402         }
403
404         if(pic->linesize[1] != pic->linesize[2]){
405             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
406             return -1;
407         }
408
409         s->linesize  = pic->linesize[0];
410         s->uvlinesize= pic->linesize[1];
411     }
412
413     if(pic->qscale_table==NULL){
414         if (s->encoding) {
415             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
416             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
417             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
418         }
419
420         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
421         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
422         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
423         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
424         if(s->out_format == FMT_H264){
425             for(i=0; i<2; i++){
426                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
427                 pic->motion_val[i]= pic->motion_val_base[i]+4;
428                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
429             }
430             pic->motion_subsample_log2= 2;
431         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
432             for(i=0; i<2; i++){
433                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
434                 pic->motion_val[i]= pic->motion_val_base[i]+4;
435                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
436             }
437             pic->motion_subsample_log2= 3;
438         }
439         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
440             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
441         }
442         pic->qstride= s->mb_stride;
443         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
444     }
445
446     //it might be nicer if the application would keep track of these but it would require a API change
447     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
448     s->prev_pict_types[0]= s->pict_type;
449     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
450         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
451
452     return 0;
453 fail: //for the CHECKED_ALLOCZ macro
454     return -1;
455 }
456
457 /**
458  * deallocates a picture
459  */
460 static void free_picture(MpegEncContext *s, Picture *pic){
461     int i;
462
463     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
464         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
465     }
466
467     av_freep(&pic->mb_var);
468     av_freep(&pic->mc_mb_var);
469     av_freep(&pic->mb_mean);
470     av_freep(&pic->mbskip_table);
471     av_freep(&pic->qscale_table);
472     av_freep(&pic->mb_type_base);
473     av_freep(&pic->dct_coeff);
474     av_freep(&pic->pan_scan);
475     pic->mb_type= NULL;
476     for(i=0; i<2; i++){
477         av_freep(&pic->motion_val_base[i]);
478         av_freep(&pic->ref_index[i]);
479     }
480
481     if(pic->type == FF_BUFFER_TYPE_SHARED){
482         for(i=0; i<4; i++){
483             pic->base[i]=
484             pic->data[i]= NULL;
485         }
486         pic->type= 0;
487     }
488 }
489
490 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
491     int i;
492
493     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
494     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
495     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
496
497      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
498     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
499     s->rd_scratchpad=   s->me.scratchpad;
500     s->b_scratchpad=    s->me.scratchpad;
501     s->obmc_scratchpad= s->me.scratchpad + 16;
502     if (s->encoding) {
503         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
504         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
505         if(s->avctx->noise_reduction){
506             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
507         }
508     }
509     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
510     s->block= s->blocks[0];
511
512     for(i=0;i<12;i++){
513         s->pblocks[i] = (short *)(&s->block[i]);
514     }
515     return 0;
516 fail:
517     return -1; //free() through MPV_common_end()
518 }
519
520 static void free_duplicate_context(MpegEncContext *s){
521     if(s==NULL) return;
522
523     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
524     av_freep(&s->me.scratchpad);
525     s->rd_scratchpad=
526     s->b_scratchpad=
527     s->obmc_scratchpad= NULL;
528
529     av_freep(&s->dct_error_sum);
530     av_freep(&s->me.map);
531     av_freep(&s->me.score_map);
532     av_freep(&s->blocks);
533     s->block= NULL;
534 }
535
536 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
537 #define COPY(a) bak->a= src->a
538     COPY(allocated_edge_emu_buffer);
539     COPY(edge_emu_buffer);
540     COPY(me.scratchpad);
541     COPY(rd_scratchpad);
542     COPY(b_scratchpad);
543     COPY(obmc_scratchpad);
544     COPY(me.map);
545     COPY(me.score_map);
546     COPY(blocks);
547     COPY(block);
548     COPY(start_mb_y);
549     COPY(end_mb_y);
550     COPY(me.map_generation);
551     COPY(pb);
552     COPY(dct_error_sum);
553     COPY(dct_count[0]);
554     COPY(dct_count[1]);
555 #undef COPY
556 }
557
558 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
559     MpegEncContext bak;
560     int i;
561     //FIXME copy only needed parts
562 //START_TIMER
563     backup_duplicate_context(&bak, dst);
564     memcpy(dst, src, sizeof(MpegEncContext));
565     backup_duplicate_context(dst, &bak);
566     for(i=0;i<12;i++){
567         dst->pblocks[i] = (short *)(&dst->block[i]);
568     }
569 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
570 }
571
572 #ifdef CONFIG_ENCODERS
573 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
574 #define COPY(a) dst->a= src->a
575     COPY(pict_type);
576     COPY(current_picture);
577     COPY(f_code);
578     COPY(b_code);
579     COPY(qscale);
580     COPY(lambda);
581     COPY(lambda2);
582     COPY(picture_in_gop_number);
583     COPY(gop_picture_number);
584     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
585     COPY(progressive_frame); //FIXME don't set in encode_header
586     COPY(partitioned_frame); //FIXME don't set in encode_header
587 #undef COPY
588 }
589 #endif
590
591 /**
592  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
593  * the changed fields will not depend upon the prior state of the MpegEncContext.
594  */
595 static void MPV_common_defaults(MpegEncContext *s){
596     s->y_dc_scale_table=
597     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
598     s->chroma_qscale_table= ff_default_chroma_qscale_table;
599     s->progressive_frame= 1;
600     s->progressive_sequence= 1;
601     s->picture_structure= PICT_FRAME;
602
603     s->coded_picture_number = 0;
604     s->picture_number = 0;
605     s->input_picture_number = 0;
606
607     s->picture_in_gop_number = 0;
608
609     s->f_code = 1;
610     s->b_code = 1;
611 }
612
613 /**
614  * sets the given MpegEncContext to defaults for decoding.
615  * the changed fields will not depend upon the prior state of the MpegEncContext.
616  */
617 void MPV_decode_defaults(MpegEncContext *s){
618     MPV_common_defaults(s);
619 }
620
621 /**
622  * sets the given MpegEncContext to defaults for encoding.
623  * the changed fields will not depend upon the prior state of the MpegEncContext.
624  */
625
626 #ifdef CONFIG_ENCODERS
627 static void MPV_encode_defaults(MpegEncContext *s){
628     int i;
629     MPV_common_defaults(s);
630
631     for(i=-16; i<16; i++){
632         default_fcode_tab[i + MAX_MV]= 1;
633     }
634     s->me.mv_penalty= default_mv_penalty;
635     s->fcode_tab= default_fcode_tab;
636 }
637 #endif //CONFIG_ENCODERS
638
639 /**
640  * init common structure for both encoder and decoder.
641  * this assumes that some variables like width/height are already set
642  */
643 int MPV_common_init(MpegEncContext *s)
644 {
645     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
646
647     s->mb_height = (s->height + 15) / 16;
648
649     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
650         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
651         return -1;
652     }
653
654     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
655         return -1;
656
657     dsputil_init(&s->dsp, s->avctx);
658     DCT_common_init(s);
659
660     s->flags= s->avctx->flags;
661     s->flags2= s->avctx->flags2;
662
663     s->mb_width  = (s->width  + 15) / 16;
664     s->mb_stride = s->mb_width + 1;
665     s->b8_stride = s->mb_width*2 + 1;
666     s->b4_stride = s->mb_width*4 + 1;
667     mb_array_size= s->mb_height * s->mb_stride;
668     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
669
670     /* set chroma shifts */
671     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
672                                                     &(s->chroma_y_shift) );
673
674     /* set default edge pos, will be overriden in decode_header if needed */
675     s->h_edge_pos= s->mb_width*16;
676     s->v_edge_pos= s->mb_height*16;
677
678     s->mb_num = s->mb_width * s->mb_height;
679
680     s->block_wrap[0]=
681     s->block_wrap[1]=
682     s->block_wrap[2]=
683     s->block_wrap[3]= s->b8_stride;
684     s->block_wrap[4]=
685     s->block_wrap[5]= s->mb_stride;
686
687     y_size = s->b8_stride * (2 * s->mb_height + 1);
688     c_size = s->mb_stride * (s->mb_height + 1);
689     yc_size = y_size + 2 * c_size;
690
691     /* convert fourcc to upper case */
692     s->codec_tag=          toupper( s->avctx->codec_tag     &0xFF)
693                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
694                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
695                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
696
697     s->stream_codec_tag=          toupper( s->avctx->stream_codec_tag     &0xFF)
698                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
699                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
700                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
701
702     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
703
704     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
705     for(y=0; y<s->mb_height; y++){
706         for(x=0; x<s->mb_width; x++){
707             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
708         }
709     }
710     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
711
712     if (s->encoding) {
713         /* Allocate MV tables */
714         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
715         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
716         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
717         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
718         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
719         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
720         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
721         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
722         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
723         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
724         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
725         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
726
727         if(s->msmpeg4_version){
728             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
729         }
730         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
731
732         /* Allocate MB type table */
733         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
734
735         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
736
737         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
738         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
739         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
740         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
741         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
742         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
743
744         if(s->avctx->noise_reduction){
745             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
746         }
747     }
748     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
749
750     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
751
752     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
753         /* interlaced direct mode decoding tables */
754             for(i=0; i<2; i++){
755                 int j, k;
756                 for(j=0; j<2; j++){
757                     for(k=0; k<2; k++){
758                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
759                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
760                     }
761                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
762                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
763                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
764                 }
765                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
766             }
767     }
768     if (s->out_format == FMT_H263) {
769         /* ac values */
770         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
771         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
772         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
773         s->ac_val[2] = s->ac_val[1] + c_size;
774
775         /* cbp values */
776         CHECKED_ALLOCZ(s->coded_block_base, y_size);
777         s->coded_block= s->coded_block_base + s->b8_stride + 1;
778
779         /* cbp, ac_pred, pred_dir */
780         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
781         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
782     }
783
784     if (s->h263_pred || s->h263_plus || !s->encoding) {
785         /* dc values */
786         //MN: we need these for error resilience of intra-frames
787         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
788         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
789         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
790         s->dc_val[2] = s->dc_val[1] + c_size;
791         for(i=0;i<yc_size;i++)
792             s->dc_val_base[i] = 1024;
793     }
794
795     /* which mb is a intra block */
796     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
797     memset(s->mbintra_table, 1, mb_array_size);
798
799     /* init macroblock skip table */
800     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
801     //Note the +1 is for a quicker mpeg4 slice_end detection
802     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
803
804     s->parse_context.state= -1;
805     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
806        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
807        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
808        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
809     }
810
811     s->context_initialized = 1;
812
813     s->thread_context[0]= s;
814     for(i=1; i<s->avctx->thread_count; i++){
815         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
816         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
817     }
818
819     for(i=0; i<s->avctx->thread_count; i++){
820         if(init_duplicate_context(s->thread_context[i], s) < 0)
821            goto fail;
822         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
823         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
824     }
825
826     return 0;
827  fail:
828     MPV_common_end(s);
829     return -1;
830 }
831
832 /* init common structure for both encoder and decoder */
833 void MPV_common_end(MpegEncContext *s)
834 {
835     int i, j, k;
836
837     for(i=0; i<s->avctx->thread_count; i++){
838         free_duplicate_context(s->thread_context[i]);
839     }
840     for(i=1; i<s->avctx->thread_count; i++){
841         av_freep(&s->thread_context[i]);
842     }
843
844     av_freep(&s->parse_context.buffer);
845     s->parse_context.buffer_size=0;
846
847     av_freep(&s->mb_type);
848     av_freep(&s->p_mv_table_base);
849     av_freep(&s->b_forw_mv_table_base);
850     av_freep(&s->b_back_mv_table_base);
851     av_freep(&s->b_bidir_forw_mv_table_base);
852     av_freep(&s->b_bidir_back_mv_table_base);
853     av_freep(&s->b_direct_mv_table_base);
854     s->p_mv_table= NULL;
855     s->b_forw_mv_table= NULL;
856     s->b_back_mv_table= NULL;
857     s->b_bidir_forw_mv_table= NULL;
858     s->b_bidir_back_mv_table= NULL;
859     s->b_direct_mv_table= NULL;
860     for(i=0; i<2; i++){
861         for(j=0; j<2; j++){
862             for(k=0; k<2; k++){
863                 av_freep(&s->b_field_mv_table_base[i][j][k]);
864                 s->b_field_mv_table[i][j][k]=NULL;
865             }
866             av_freep(&s->b_field_select_table[i][j]);
867             av_freep(&s->p_field_mv_table_base[i][j]);
868             s->p_field_mv_table[i][j]=NULL;
869         }
870         av_freep(&s->p_field_select_table[i]);
871     }
872
873     av_freep(&s->dc_val_base);
874     av_freep(&s->ac_val_base);
875     av_freep(&s->coded_block_base);
876     av_freep(&s->mbintra_table);
877     av_freep(&s->cbp_table);
878     av_freep(&s->pred_dir_table);
879
880     av_freep(&s->mbskip_table);
881     av_freep(&s->prev_pict_types);
882     av_freep(&s->bitstream_buffer);
883     s->allocated_bitstream_buffer_size=0;
884
885     av_freep(&s->avctx->stats_out);
886     av_freep(&s->ac_stats);
887     av_freep(&s->error_status_table);
888     av_freep(&s->mb_index2xy);
889     av_freep(&s->lambda_table);
890     av_freep(&s->q_intra_matrix);
891     av_freep(&s->q_inter_matrix);
892     av_freep(&s->q_intra_matrix16);
893     av_freep(&s->q_inter_matrix16);
894     av_freep(&s->input_picture);
895     av_freep(&s->reordered_input_picture);
896     av_freep(&s->dct_offset);
897
898     if(s->picture){
899         for(i=0; i<MAX_PICTURE_COUNT; i++){
900             free_picture(s, &s->picture[i]);
901         }
902     }
903     av_freep(&s->picture);
904     s->context_initialized = 0;
905     s->last_picture_ptr=
906     s->next_picture_ptr=
907     s->current_picture_ptr= NULL;
908     s->linesize= s->uvlinesize= 0;
909
910     for(i=0; i<3; i++)
911         av_freep(&s->visualization_buffer[i]);
912
913     avcodec_default_free_buffers(s->avctx);
914 }
915
916 #ifdef CONFIG_ENCODERS
917
918 /* init video encoder */
919 int MPV_encode_init(AVCodecContext *avctx)
920 {
921     MpegEncContext *s = avctx->priv_data;
922     int i;
923     int chroma_h_shift, chroma_v_shift;
924
925     MPV_encode_defaults(s);
926
927     switch (avctx->codec_id) {
928     case CODEC_ID_MPEG2VIDEO:
929         if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P){
930             av_log(avctx, AV_LOG_ERROR, "only YUV420 and YUV422 are supported\n");
931             return -1;
932         }
933         break;
934     case CODEC_ID_LJPEG:
935     case CODEC_ID_MJPEG:
936         if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUVJ422P &&
937            ((avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P) || avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL)){
938             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
939             return -1;
940         }
941         break;
942     default:
943         if(avctx->pix_fmt != PIX_FMT_YUV420P){
944             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
945             return -1;
946         }
947     }
948
949     switch (avctx->pix_fmt) {
950     case PIX_FMT_YUVJ422P:
951     case PIX_FMT_YUV422P:
952         s->chroma_format = CHROMA_422;
953         break;
954     case PIX_FMT_YUVJ420P:
955     case PIX_FMT_YUV420P:
956     default:
957         s->chroma_format = CHROMA_420;
958         break;
959     }
960
961     s->bit_rate = avctx->bit_rate;
962     s->width = avctx->width;
963     s->height = avctx->height;
964     if(avctx->gop_size > 600 && avctx->strict_std_compliance>FF_COMPLIANCE_EXPERIMENTAL){
965         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
966         avctx->gop_size=600;
967     }
968     s->gop_size = avctx->gop_size;
969     s->avctx = avctx;
970     s->flags= avctx->flags;
971     s->flags2= avctx->flags2;
972     s->max_b_frames= avctx->max_b_frames;
973     s->codec_id= avctx->codec->id;
974     s->luma_elim_threshold  = avctx->luma_elim_threshold;
975     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
976     s->strict_std_compliance= avctx->strict_std_compliance;
977     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
978     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
979     s->mpeg_quant= avctx->mpeg_quant;
980     s->rtp_mode= !!avctx->rtp_payload_size;
981     s->intra_dc_precision= avctx->intra_dc_precision;
982     s->user_specified_pts = AV_NOPTS_VALUE;
983
984     if (s->gop_size <= 1) {
985         s->intra_only = 1;
986         s->gop_size = 12;
987     } else {
988         s->intra_only = 0;
989     }
990
991     s->me_method = avctx->me_method;
992
993     /* Fixed QSCALE */
994     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
995
996     s->adaptive_quant= (   s->avctx->lumi_masking
997                         || s->avctx->dark_masking
998                         || s->avctx->temporal_cplx_masking
999                         || s->avctx->spatial_cplx_masking
1000                         || s->avctx->p_masking
1001                         || s->avctx->border_masking
1002                         || (s->flags&CODEC_FLAG_QP_RD))
1003                        && !s->fixed_qscale;
1004
1005     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
1006     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
1007     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
1008     s->intra_vlc_format= !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
1009     s->q_scale_type= !!(s->flags2 & CODEC_FLAG2_NON_LINEAR_QUANT);
1010
1011     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
1012         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
1013         return -1;
1014     }
1015
1016     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
1017         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
1018     }
1019
1020     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
1021         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
1022         return -1;
1023     }
1024
1025     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
1026         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
1027         return -1;
1028     }
1029
1030     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
1031        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
1032        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
1033
1034         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
1035     }
1036
1037     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
1038        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
1039         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
1040         return -1;
1041     }
1042
1043     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
1044         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
1045         return -1;
1046     }
1047
1048     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
1049         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
1050         return -1;
1051     }
1052
1053     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1054         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1055         return -1;
1056     }
1057
1058     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1059         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1060         return -1;
1061     }
1062
1063     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1064         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1065         return -1;
1066     }
1067
1068     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1069        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1070         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1071         return -1;
1072     }
1073
1074     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1075         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1076         return -1;
1077     }
1078
1079     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1080         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1081         return -1;
1082     }
1083
1084     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1085         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1086         return -1;
1087     }
1088
1089     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1090         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet, set threshold to 1000000000\n");
1091         return -1;
1092     }
1093
1094     if((s->flags2 & CODEC_FLAG2_INTRA_VLC) && s->codec_id != CODEC_ID_MPEG2VIDEO){
1095         av_log(avctx, AV_LOG_ERROR, "intra vlc table not supported by codec\n");
1096         return -1;
1097     }
1098
1099     if(s->flags & CODEC_FLAG_LOW_DELAY){
1100         if (s->codec_id != CODEC_ID_MPEG2VIDEO && s->codec_id != CODEC_ID_MPEG1VIDEO){
1101             av_log(avctx, AV_LOG_ERROR, "low delay forcing is only available for mpeg1/2\n");
1102             return -1;
1103         }
1104         if (s->max_b_frames != 0){
1105             av_log(avctx, AV_LOG_ERROR, "b frames cannot be used with low delay\n");
1106             return -1;
1107         }
1108     }
1109
1110     if(s->q_scale_type == 1){
1111         if(s->codec_id != CODEC_ID_MPEG2VIDEO){
1112             av_log(avctx, AV_LOG_ERROR, "non linear quant is only available for mpeg2\n");
1113             return -1;
1114         }
1115         if(avctx->qmax > 12){
1116             av_log(avctx, AV_LOG_ERROR, "non linear quant only supports qmax <= 12 currently\n");
1117             return -1;
1118         }
1119     }
1120
1121     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1122        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1123        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1124         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1125         return -1;
1126     }
1127
1128     if(s->avctx->thread_count > 1)
1129         s->rtp_mode= 1;
1130
1131     if(!avctx->time_base.den || !avctx->time_base.num){
1132         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1133         return -1;
1134     }
1135
1136     i= (INT_MAX/2+128)>>8;
1137     if(avctx->me_threshold >= i){
1138         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1139         return -1;
1140     }
1141     if(avctx->mb_threshold >= i){
1142         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1143         return -1;
1144     }
1145
1146     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1147         av_log(avctx, AV_LOG_INFO, "notice: b_frame_strategy only affects the first pass\n");
1148         avctx->b_frame_strategy = 0;
1149     }
1150
1151     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1152     if(i > 1){
1153         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1154         avctx->time_base.den /= i;
1155         avctx->time_base.num /= i;
1156 //        return -1;
1157     }
1158
1159     if(s->codec_id==CODEC_ID_MJPEG){
1160         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1161         s->inter_quant_bias= 0;
1162     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1163         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1164         s->inter_quant_bias= 0;
1165     }else{
1166         s->intra_quant_bias=0;
1167         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1168     }
1169
1170     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1171         s->intra_quant_bias= avctx->intra_quant_bias;
1172     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1173         s->inter_quant_bias= avctx->inter_quant_bias;
1174
1175     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1176
1177     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1178         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1179         return -1;
1180     }
1181     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1182
1183     switch(avctx->codec->id) {
1184     case CODEC_ID_MPEG1VIDEO:
1185         s->out_format = FMT_MPEG1;
1186         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
1187         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1188         break;
1189     case CODEC_ID_MPEG2VIDEO:
1190         s->out_format = FMT_MPEG1;
1191         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
1192         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1193         s->rtp_mode= 1;
1194         break;
1195     case CODEC_ID_LJPEG:
1196     case CODEC_ID_JPEGLS:
1197     case CODEC_ID_MJPEG:
1198         s->out_format = FMT_MJPEG;
1199         s->intra_only = 1; /* force intra only for jpeg */
1200         s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
1201         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1202         s->mjpeg_vsample[0] = 2;
1203         s->mjpeg_vsample[1] = 2>>chroma_v_shift;
1204         s->mjpeg_vsample[2] = 2>>chroma_v_shift;
1205         s->mjpeg_hsample[0] = 2;
1206         s->mjpeg_hsample[1] = 2>>chroma_h_shift;
1207         s->mjpeg_hsample[2] = 2>>chroma_h_shift;
1208         if (mjpeg_init(s) < 0)
1209             return -1;
1210         avctx->delay=0;
1211         s->low_delay=1;
1212         break;
1213 #ifdef CONFIG_H261_ENCODER
1214     case CODEC_ID_H261:
1215         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
1216             av_log(avctx, AV_LOG_ERROR, "The specified picture size of %dx%d is not valid for the H.261 codec.\nValid sizes are 176x144, 352x288\n", s->width, s->height);
1217             return -1;
1218         }
1219         s->out_format = FMT_H261;
1220         avctx->delay=0;
1221         s->low_delay=1;
1222         break;
1223 #endif
1224     case CODEC_ID_H263:
1225         if (h263_get_picture_format(s->width, s->height) == 7) {
1226             av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
1227             return -1;
1228         }
1229         s->out_format = FMT_H263;
1230         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1231         avctx->delay=0;
1232         s->low_delay=1;
1233         break;
1234     case CODEC_ID_H263P:
1235         s->out_format = FMT_H263;
1236         s->h263_plus = 1;
1237         /* Fx */
1238         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1239         s->h263_aic= (avctx->flags & CODEC_FLAG_AC_PRED) ? 1:0;
1240         s->modified_quant= s->h263_aic;
1241         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1242         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1243         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1244         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1245         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1246
1247         /* /Fx */
1248         /* These are just to be sure */
1249         avctx->delay=0;
1250         s->low_delay=1;
1251         break;
1252     case CODEC_ID_FLV1:
1253         s->out_format = FMT_H263;
1254         s->h263_flv = 2; /* format = 1; 11-bit codes */
1255         s->unrestricted_mv = 1;
1256         s->rtp_mode=0; /* don't allow GOB */
1257         avctx->delay=0;
1258         s->low_delay=1;
1259         break;
1260     case CODEC_ID_RV10:
1261         s->out_format = FMT_H263;
1262         avctx->delay=0;
1263         s->low_delay=1;
1264         break;
1265     case CODEC_ID_RV20:
1266         s->out_format = FMT_H263;
1267         avctx->delay=0;
1268         s->low_delay=1;
1269         s->modified_quant=1;
1270         s->h263_aic=1;
1271         s->h263_plus=1;
1272         s->loop_filter=1;
1273         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1274         break;
1275     case CODEC_ID_MPEG4:
1276         s->out_format = FMT_H263;
1277         s->h263_pred = 1;
1278         s->unrestricted_mv = 1;
1279         s->low_delay= s->max_b_frames ? 0 : 1;
1280         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1281         break;
1282     case CODEC_ID_MSMPEG4V1:
1283         s->out_format = FMT_H263;
1284         s->h263_msmpeg4 = 1;
1285         s->h263_pred = 1;
1286         s->unrestricted_mv = 1;
1287         s->msmpeg4_version= 1;
1288         avctx->delay=0;
1289         s->low_delay=1;
1290         break;
1291     case CODEC_ID_MSMPEG4V2:
1292         s->out_format = FMT_H263;
1293         s->h263_msmpeg4 = 1;
1294         s->h263_pred = 1;
1295         s->unrestricted_mv = 1;
1296         s->msmpeg4_version= 2;
1297         avctx->delay=0;
1298         s->low_delay=1;
1299         break;
1300     case CODEC_ID_MSMPEG4V3:
1301         s->out_format = FMT_H263;
1302         s->h263_msmpeg4 = 1;
1303         s->h263_pred = 1;
1304         s->unrestricted_mv = 1;
1305         s->msmpeg4_version= 3;
1306         s->flipflop_rounding=1;
1307         avctx->delay=0;
1308         s->low_delay=1;
1309         break;
1310     case CODEC_ID_WMV1:
1311         s->out_format = FMT_H263;
1312         s->h263_msmpeg4 = 1;
1313         s->h263_pred = 1;
1314         s->unrestricted_mv = 1;
1315         s->msmpeg4_version= 4;
1316         s->flipflop_rounding=1;
1317         avctx->delay=0;
1318         s->low_delay=1;
1319         break;
1320     case CODEC_ID_WMV2:
1321         s->out_format = FMT_H263;
1322         s->h263_msmpeg4 = 1;
1323         s->h263_pred = 1;
1324         s->unrestricted_mv = 1;
1325         s->msmpeg4_version= 5;
1326         s->flipflop_rounding=1;
1327         avctx->delay=0;
1328         s->low_delay=1;
1329         break;
1330     default:
1331         return -1;
1332     }
1333
1334     avctx->has_b_frames= !s->low_delay;
1335
1336     s->encoding = 1;
1337
1338     /* init */
1339     if (MPV_common_init(s) < 0)
1340         return -1;
1341
1342     if(s->modified_quant)
1343         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1344     s->progressive_frame=
1345     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN));
1346     s->quant_precision=5;
1347
1348     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1349     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1350
1351 #ifdef CONFIG_H261_ENCODER
1352     if (s->out_format == FMT_H261)
1353         ff_h261_encode_init(s);
1354 #endif
1355     if (s->out_format == FMT_H263)
1356         h263_encode_init(s);
1357     if(s->msmpeg4_version)
1358         ff_msmpeg4_encode_init(s);
1359     if (s->out_format == FMT_MPEG1)
1360         ff_mpeg1_encode_init(s);
1361
1362     /* init q matrix */
1363     for(i=0;i<64;i++) {
1364         int j= s->dsp.idct_permutation[i];
1365         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1366             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1367             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1368         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1369             s->intra_matrix[j] =
1370             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1371         }else
1372         { /* mpeg1/2 */
1373             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1374             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1375         }
1376         if(s->avctx->intra_matrix)
1377             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1378         if(s->avctx->inter_matrix)
1379             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1380     }
1381
1382     /* precompute matrix */
1383     /* for mjpeg, we do include qscale in the matrix */
1384     if (s->out_format != FMT_MJPEG) {
1385         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1386                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1387         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1388                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1389     }
1390
1391     if(ff_rate_control_init(s) < 0)
1392         return -1;
1393
1394     return 0;
1395 }
1396
1397 int MPV_encode_end(AVCodecContext *avctx)
1398 {
1399     MpegEncContext *s = avctx->priv_data;
1400
1401     ff_rate_control_uninit(s);
1402
1403     MPV_common_end(s);
1404     if (s->out_format == FMT_MJPEG)
1405         mjpeg_close(s);
1406
1407     av_freep(&avctx->extradata);
1408
1409     return 0;
1410 }
1411
1412 #endif //CONFIG_ENCODERS
1413
1414 void init_rl(RLTable *rl, uint8_t static_store[2][2*MAX_RUN + MAX_LEVEL + 3])
1415 {
1416     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1417     uint8_t index_run[MAX_RUN+1];
1418     int last, run, level, start, end, i;
1419
1420     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1421     if(static_store && rl->max_level[0])
1422         return;
1423
1424     /* compute max_level[], max_run[] and index_run[] */
1425     for(last=0;last<2;last++) {
1426         if (last == 0) {
1427             start = 0;
1428             end = rl->last;
1429         } else {
1430             start = rl->last;
1431             end = rl->n;
1432         }
1433
1434         memset(max_level, 0, MAX_RUN + 1);
1435         memset(max_run, 0, MAX_LEVEL + 1);
1436         memset(index_run, rl->n, MAX_RUN + 1);
1437         for(i=start;i<end;i++) {
1438             run = rl->table_run[i];
1439             level = rl->table_level[i];
1440             if (index_run[run] == rl->n)
1441                 index_run[run] = i;
1442             if (level > max_level[run])
1443                 max_level[run] = level;
1444             if (run > max_run[level])
1445                 max_run[level] = run;
1446         }
1447         if(static_store)
1448             rl->max_level[last] = static_store[last];
1449         else
1450             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1451         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1452         if(static_store)
1453             rl->max_run[last] = static_store[last] + MAX_RUN + 1;
1454         else
1455             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1456         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1457         if(static_store)
1458             rl->index_run[last] = static_store[last] + MAX_RUN + MAX_LEVEL + 2;
1459         else
1460             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1461         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1462     }
1463 }
1464
1465 /* draw the edges of width 'w' of an image of size width, height */
1466 //FIXME check that this is ok for mpeg4 interlaced
1467 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1468 {
1469     uint8_t *ptr, *last_line;
1470     int i;
1471
1472     last_line = buf + (height - 1) * wrap;
1473     for(i=0;i<w;i++) {
1474         /* top and bottom */
1475         memcpy(buf - (i + 1) * wrap, buf, width);
1476         memcpy(last_line + (i + 1) * wrap, last_line, width);
1477     }
1478     /* left and right */
1479     ptr = buf;
1480     for(i=0;i<height;i++) {
1481         memset(ptr - w, ptr[0], w);
1482         memset(ptr + width, ptr[width-1], w);
1483         ptr += wrap;
1484     }
1485     /* corners */
1486     for(i=0;i<w;i++) {
1487         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1488         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1489         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1490         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1491     }
1492 }
1493
1494 int ff_find_unused_picture(MpegEncContext *s, int shared){
1495     int i;
1496
1497     if(shared){
1498         for(i=0; i<MAX_PICTURE_COUNT; i++){
1499             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1500         }
1501     }else{
1502         for(i=0; i<MAX_PICTURE_COUNT; i++){
1503             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1504         }
1505         for(i=0; i<MAX_PICTURE_COUNT; i++){
1506             if(s->picture[i].data[0]==NULL) return i;
1507         }
1508     }
1509
1510     assert(0);
1511     return -1;
1512 }
1513
1514 static void update_noise_reduction(MpegEncContext *s){
1515     int intra, i;
1516
1517     for(intra=0; intra<2; intra++){
1518         if(s->dct_count[intra] > (1<<16)){
1519             for(i=0; i<64; i++){
1520                 s->dct_error_sum[intra][i] >>=1;
1521             }
1522             s->dct_count[intra] >>= 1;
1523         }
1524
1525         for(i=0; i<64; i++){
1526             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1527         }
1528     }
1529 }
1530
1531 /**
1532  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1533  */
1534 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1535 {
1536     int i;
1537     AVFrame *pic;
1538     s->mb_skipped = 0;
1539
1540     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1541
1542     /* mark&release old frames */
1543     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1544       if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1545         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1546
1547         /* release forgotten pictures */
1548         /* if(mpeg124/h263) */
1549         if(!s->encoding){
1550             for(i=0; i<MAX_PICTURE_COUNT; i++){
1551                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1552                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1553                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1554                 }
1555             }
1556         }
1557       }
1558     }
1559 alloc:
1560     if(!s->encoding){
1561         /* release non reference frames */
1562         for(i=0; i<MAX_PICTURE_COUNT; i++){
1563             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1564                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1565             }
1566         }
1567
1568         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1569             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1570         else{
1571             i= ff_find_unused_picture(s, 0);
1572             pic= (AVFrame*)&s->picture[i];
1573         }
1574
1575         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1576                         && !s->dropable ? 3 : 0;
1577
1578         pic->coded_picture_number= s->coded_picture_number++;
1579
1580         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1581             return -1;
1582
1583         s->current_picture_ptr= (Picture*)pic;
1584         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1585         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1586     }
1587
1588     s->current_picture_ptr->pict_type= s->pict_type;
1589 //    if(s->flags && CODEC_FLAG_QSCALE)
1590   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1591     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1592
1593     copy_picture(&s->current_picture, s->current_picture_ptr);
1594
1595     if (s->pict_type != B_TYPE) {
1596         s->last_picture_ptr= s->next_picture_ptr;
1597         if(!s->dropable)
1598             s->next_picture_ptr= s->current_picture_ptr;
1599     }
1600 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1601         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1602         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1603         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1604         s->pict_type, s->dropable);*/
1605
1606     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1607     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1608
1609     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL) && !s->dropable){
1610         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1611         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1612         goto alloc;
1613     }
1614
1615     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1616
1617     if(s->picture_structure!=PICT_FRAME){
1618         int i;
1619         for(i=0; i<4; i++){
1620             if(s->picture_structure == PICT_BOTTOM_FIELD){
1621                  s->current_picture.data[i] += s->current_picture.linesize[i];
1622             }
1623             s->current_picture.linesize[i] *= 2;
1624             s->last_picture.linesize[i] *=2;
1625             s->next_picture.linesize[i] *=2;
1626         }
1627     }
1628
1629     s->hurry_up= s->avctx->hurry_up;
1630     s->error_resilience= avctx->error_resilience;
1631
1632     /* set dequantizer, we can't do it during init as it might change for mpeg4
1633        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1634     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1635         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1636         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1637     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1638         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1639         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1640     }else{
1641         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1642         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1643     }
1644
1645     if(s->dct_error_sum){
1646         assert(s->avctx->noise_reduction && s->encoding);
1647
1648         update_noise_reduction(s);
1649     }
1650
1651 #ifdef HAVE_XVMC
1652     if(s->avctx->xvmc_acceleration)
1653         return XVMC_field_start(s, avctx);
1654 #endif
1655     return 0;
1656 }
1657
1658 /* generic function for encode/decode called after a frame has been coded/decoded */
1659 void MPV_frame_end(MpegEncContext *s)
1660 {
1661     int i;
1662     /* draw edge for correct motion prediction if outside */
1663 #ifdef HAVE_XVMC
1664 //just to make sure that all data is rendered.
1665     if(s->avctx->xvmc_acceleration){
1666         XVMC_field_end(s);
1667     }else
1668 #endif
1669     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1670             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1671             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1672             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1673     }
1674     emms_c();
1675
1676     s->last_pict_type    = s->pict_type;
1677     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1678     if(s->pict_type!=B_TYPE){
1679         s->last_non_b_pict_type= s->pict_type;
1680     }
1681 #if 0
1682         /* copy back current_picture variables */
1683     for(i=0; i<MAX_PICTURE_COUNT; i++){
1684         if(s->picture[i].data[0] == s->current_picture.data[0]){
1685             s->picture[i]= s->current_picture;
1686             break;
1687         }
1688     }
1689     assert(i<MAX_PICTURE_COUNT);
1690 #endif
1691
1692     if(s->encoding){
1693         /* release non-reference frames */
1694         for(i=0; i<MAX_PICTURE_COUNT; i++){
1695             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1696                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1697             }
1698         }
1699     }
1700     // clear copies, to avoid confusion
1701 #if 0
1702     memset(&s->last_picture, 0, sizeof(Picture));
1703     memset(&s->next_picture, 0, sizeof(Picture));
1704     memset(&s->current_picture, 0, sizeof(Picture));
1705 #endif
1706     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1707 }
1708
1709 /**
1710  * draws an line from (ex, ey) -> (sx, sy).
1711  * @param w width of the image
1712  * @param h height of the image
1713  * @param stride stride/linesize of the image
1714  * @param color color of the arrow
1715  */
1716 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1717     int x, y, fr, f;
1718
1719     sx= av_clip(sx, 0, w-1);
1720     sy= av_clip(sy, 0, h-1);
1721     ex= av_clip(ex, 0, w-1);
1722     ey= av_clip(ey, 0, h-1);
1723
1724     buf[sy*stride + sx]+= color;
1725
1726     if(FFABS(ex - sx) > FFABS(ey - sy)){
1727         if(sx > ex){
1728             FFSWAP(int, sx, ex);
1729             FFSWAP(int, sy, ey);
1730         }
1731         buf+= sx + sy*stride;
1732         ex-= sx;
1733         f= ((ey-sy)<<16)/ex;
1734         for(x= 0; x <= ex; x++){
1735             y = (x*f)>>16;
1736             fr= (x*f)&0xFFFF;
1737             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1738             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1739         }
1740     }else{
1741         if(sy > ey){
1742             FFSWAP(int, sx, ex);
1743             FFSWAP(int, sy, ey);
1744         }
1745         buf+= sx + sy*stride;
1746         ey-= sy;
1747         if(ey) f= ((ex-sx)<<16)/ey;
1748         else   f= 0;
1749         for(y= 0; y <= ey; y++){
1750             x = (y*f)>>16;
1751             fr= (y*f)&0xFFFF;
1752             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1753             buf[y*stride + x+1]+= (color*         fr )>>16;;
1754         }
1755     }
1756 }
1757
1758 /**
1759  * draws an arrow from (ex, ey) -> (sx, sy).
1760  * @param w width of the image
1761  * @param h height of the image
1762  * @param stride stride/linesize of the image
1763  * @param color color of the arrow
1764  */
1765 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1766     int dx,dy;
1767
1768     sx= av_clip(sx, -100, w+100);
1769     sy= av_clip(sy, -100, h+100);
1770     ex= av_clip(ex, -100, w+100);
1771     ey= av_clip(ey, -100, h+100);
1772
1773     dx= ex - sx;
1774     dy= ey - sy;
1775
1776     if(dx*dx + dy*dy > 3*3){
1777         int rx=  dx + dy;
1778         int ry= -dx + dy;
1779         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1780
1781         //FIXME subpixel accuracy
1782         rx= ROUNDED_DIV(rx*3<<4, length);
1783         ry= ROUNDED_DIV(ry*3<<4, length);
1784
1785         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1786         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1787     }
1788     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1789 }
1790
1791 /**
1792  * prints debuging info for the given picture.
1793  */
1794 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1795
1796     if(!pict || !pict->mb_type) return;
1797
1798     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1799         int x,y;
1800
1801         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1802         switch (pict->pict_type) {
1803             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1804             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1805             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1806             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1807             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1808             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1809         }
1810         for(y=0; y<s->mb_height; y++){
1811             for(x=0; x<s->mb_width; x++){
1812                 if(s->avctx->debug&FF_DEBUG_SKIP){
1813                     int count= s->mbskip_table[x + y*s->mb_stride];
1814                     if(count>9) count=9;
1815                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1816                 }
1817                 if(s->avctx->debug&FF_DEBUG_QP){
1818                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1819                 }
1820                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1821                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1822                     //Type & MV direction
1823                     if(IS_PCM(mb_type))
1824                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1825                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1826                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1827                     else if(IS_INTRA4x4(mb_type))
1828                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1829                     else if(IS_INTRA16x16(mb_type))
1830                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1831                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1832                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1833                     else if(IS_DIRECT(mb_type))
1834                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1835                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1836                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1837                     else if(IS_GMC(mb_type))
1838                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1839                     else if(IS_SKIP(mb_type))
1840                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1841                     else if(!USES_LIST(mb_type, 1))
1842                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1843                     else if(!USES_LIST(mb_type, 0))
1844                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1845                     else{
1846                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1847                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1848                     }
1849
1850                     //segmentation
1851                     if(IS_8X8(mb_type))
1852                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1853                     else if(IS_16X8(mb_type))
1854                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1855                     else if(IS_8X16(mb_type))
1856                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1857                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1858                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1859                     else
1860                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1861
1862
1863                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1864                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1865                     else
1866                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1867                 }
1868 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1869             }
1870             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1871         }
1872     }
1873
1874     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1875         const int shift= 1 + s->quarter_sample;
1876         int mb_y;
1877         uint8_t *ptr;
1878         int i;
1879         int h_chroma_shift, v_chroma_shift;
1880         const int width = s->avctx->width;
1881         const int height= s->avctx->height;
1882         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1883         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1884         s->low_delay=0; //needed to see the vectors without trashing the buffers
1885
1886         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1887         for(i=0; i<3; i++){
1888             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1889             pict->data[i]= s->visualization_buffer[i];
1890         }
1891         pict->type= FF_BUFFER_TYPE_COPY;
1892         ptr= pict->data[0];
1893
1894         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1895             int mb_x;
1896             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1897                 const int mb_index= mb_x + mb_y*s->mb_stride;
1898                 if((s->avctx->debug_mv) && pict->motion_val){
1899                   int type;
1900                   for(type=0; type<3; type++){
1901                     int direction = 0;
1902                     switch (type) {
1903                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1904                                 continue;
1905                               direction = 0;
1906                               break;
1907                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1908                                 continue;
1909                               direction = 0;
1910                               break;
1911                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1912                                 continue;
1913                               direction = 1;
1914                               break;
1915                     }
1916                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1917                         continue;
1918
1919                     if(IS_8X8(pict->mb_type[mb_index])){
1920                       int i;
1921                       for(i=0; i<4; i++){
1922                         int sx= mb_x*16 + 4 + 8*(i&1);
1923                         int sy= mb_y*16 + 4 + 8*(i>>1);
1924                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1925                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1926                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1927                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1928                       }
1929                     }else if(IS_16X8(pict->mb_type[mb_index])){
1930                       int i;
1931                       for(i=0; i<2; i++){
1932                         int sx=mb_x*16 + 8;
1933                         int sy=mb_y*16 + 4 + 8*i;
1934                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1935                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1936                         int my=(pict->motion_val[direction][xy][1]>>shift);
1937
1938                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1939                             my*=2;
1940
1941                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1942                       }
1943                     }else if(IS_8X16(pict->mb_type[mb_index])){
1944                       int i;
1945                       for(i=0; i<2; i++){
1946                         int sx=mb_x*16 + 4 + 8*i;
1947                         int sy=mb_y*16 + 8;
1948                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1949                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1950                         int my=(pict->motion_val[direction][xy][1]>>shift);
1951
1952                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1953                             my*=2;
1954
1955                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1956                       }
1957                     }else{
1958                       int sx= mb_x*16 + 8;
1959                       int sy= mb_y*16 + 8;
1960                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1961                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1962                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1963                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1964                     }
1965                   }
1966                 }
1967                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1968                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1969                     int y;
1970                     for(y=0; y<8; y++){
1971                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1972                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1973                     }
1974                 }
1975                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1976                     int mb_type= pict->mb_type[mb_index];
1977                     uint64_t u,v;
1978                     int y;
1979 #define COLOR(theta, r)\
1980 u= (int)(128 + r*cos(theta*3.141592/180));\
1981 v= (int)(128 + r*sin(theta*3.141592/180));
1982
1983
1984                     u=v=128;
1985                     if(IS_PCM(mb_type)){
1986                         COLOR(120,48)
1987                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1988                         COLOR(30,48)
1989                     }else if(IS_INTRA4x4(mb_type)){
1990                         COLOR(90,48)
1991                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1992 //                        COLOR(120,48)
1993                     }else if(IS_DIRECT(mb_type)){
1994                         COLOR(150,48)
1995                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1996                         COLOR(170,48)
1997                     }else if(IS_GMC(mb_type)){
1998                         COLOR(190,48)
1999                     }else if(IS_SKIP(mb_type)){
2000 //                        COLOR(180,48)
2001                     }else if(!USES_LIST(mb_type, 1)){
2002                         COLOR(240,48)
2003                     }else if(!USES_LIST(mb_type, 0)){
2004                         COLOR(0,48)
2005                     }else{
2006                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
2007                         COLOR(300,48)
2008                     }
2009
2010                     u*= 0x0101010101010101ULL;
2011                     v*= 0x0101010101010101ULL;
2012                     for(y=0; y<8; y++){
2013                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
2014                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
2015                     }
2016
2017                     //segmentation
2018                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
2019                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
2020                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
2021                     }
2022                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
2023                         for(y=0; y<16; y++)
2024                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
2025                     }
2026                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
2027                         int dm= 1 << (mv_sample_log2-2);
2028                         for(i=0; i<4; i++){
2029                             int sx= mb_x*16 + 8*(i&1);
2030                             int sy= mb_y*16 + 8*(i>>1);
2031                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
2032                             //FIXME bidir
2033                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
2034                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
2035                                 for(y=0; y<8; y++)
2036                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
2037                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
2038                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
2039                         }
2040                     }
2041
2042                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
2043                         // hmm
2044                     }
2045                 }
2046                 s->mbskip_table[mb_index]=0;
2047             }
2048         }
2049     }
2050 }
2051
2052 #ifdef CONFIG_ENCODERS
2053
2054 static int get_sae(uint8_t *src, int ref, int stride){
2055     int x,y;
2056     int acc=0;
2057
2058     for(y=0; y<16; y++){
2059         for(x=0; x<16; x++){
2060             acc+= FFABS(src[x+y*stride] - ref);
2061         }
2062     }
2063
2064     return acc;
2065 }
2066
2067 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
2068     int x, y, w, h;
2069     int acc=0;
2070
2071     w= s->width &~15;
2072     h= s->height&~15;
2073
2074     for(y=0; y<h; y+=16){
2075         for(x=0; x<w; x+=16){
2076             int offset= x + y*stride;
2077             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
2078             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
2079             int sae = get_sae(src + offset, mean, stride);
2080
2081             acc+= sae + 500 < sad;
2082         }
2083     }
2084     return acc;
2085 }
2086
2087
2088 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2089     AVFrame *pic=NULL;
2090     int64_t pts;
2091     int i;
2092     const int encoding_delay= s->max_b_frames;
2093     int direct=1;
2094
2095     if(pic_arg){
2096         pts= pic_arg->pts;
2097         pic_arg->display_picture_number= s->input_picture_number++;
2098
2099         if(pts != AV_NOPTS_VALUE){
2100             if(s->user_specified_pts != AV_NOPTS_VALUE){
2101                 int64_t time= pts;
2102                 int64_t last= s->user_specified_pts;
2103
2104                 if(time <= last){
2105                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2106                     return -1;
2107                 }
2108             }
2109             s->user_specified_pts= pts;
2110         }else{
2111             if(s->user_specified_pts != AV_NOPTS_VALUE){
2112                 s->user_specified_pts=
2113                 pts= s->user_specified_pts + 1;
2114                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2115             }else{
2116                 pts= pic_arg->display_picture_number;
2117             }
2118         }
2119     }
2120
2121   if(pic_arg){
2122     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2123     if(pic_arg->linesize[0] != s->linesize) direct=0;
2124     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2125     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2126
2127 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2128
2129     if(direct){
2130         i= ff_find_unused_picture(s, 1);
2131
2132         pic= (AVFrame*)&s->picture[i];
2133         pic->reference= 3;
2134
2135         for(i=0; i<4; i++){
2136             pic->data[i]= pic_arg->data[i];
2137             pic->linesize[i]= pic_arg->linesize[i];
2138         }
2139         alloc_picture(s, (Picture*)pic, 1);
2140     }else{
2141         i= ff_find_unused_picture(s, 0);
2142
2143         pic= (AVFrame*)&s->picture[i];
2144         pic->reference= 3;
2145
2146         alloc_picture(s, (Picture*)pic, 0);
2147
2148         if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
2149            && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
2150            && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
2151        // empty
2152         }else{
2153             int h_chroma_shift, v_chroma_shift;
2154             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2155
2156             for(i=0; i<3; i++){
2157                 int src_stride= pic_arg->linesize[i];
2158                 int dst_stride= i ? s->uvlinesize : s->linesize;
2159                 int h_shift= i ? h_chroma_shift : 0;
2160                 int v_shift= i ? v_chroma_shift : 0;
2161                 int w= s->width >>h_shift;
2162                 int h= s->height>>v_shift;
2163                 uint8_t *src= pic_arg->data[i];
2164                 uint8_t *dst= pic->data[i];
2165
2166                 if(!s->avctx->rc_buffer_size)
2167                     dst +=INPLACE_OFFSET;
2168
2169                 if(src_stride==dst_stride)
2170                     memcpy(dst, src, src_stride*h);
2171                 else{
2172                     while(h--){
2173                         memcpy(dst, src, w);
2174                         dst += dst_stride;
2175                         src += src_stride;
2176                     }
2177                 }
2178             }
2179         }
2180     }
2181     copy_picture_attributes(s, pic, pic_arg);
2182     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2183   }
2184
2185     /* shift buffer entries */
2186     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2187         s->input_picture[i-1]= s->input_picture[i];
2188
2189     s->input_picture[encoding_delay]= (Picture*)pic;
2190
2191     return 0;
2192 }
2193
2194 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2195     int x, y, plane;
2196     int score=0;
2197     int64_t score64=0;
2198
2199     for(plane=0; plane<3; plane++){
2200         const int stride= p->linesize[plane];
2201         const int bw= plane ? 1 : 2;
2202         for(y=0; y<s->mb_height*bw; y++){
2203             for(x=0; x<s->mb_width*bw; x++){
2204                 int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2205                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2206
2207                 switch(s->avctx->frame_skip_exp){
2208                     case 0: score= FFMAX(score, v); break;
2209                     case 1: score+= FFABS(v);break;
2210                     case 2: score+= v*v;break;
2211                     case 3: score64+= FFABS(v*v*(int64_t)v);break;
2212                     case 4: score64+= v*v*(int64_t)(v*v);break;
2213                 }
2214             }
2215         }
2216     }
2217
2218     if(score) score64= score;
2219
2220     if(score64 < s->avctx->frame_skip_threshold)
2221         return 1;
2222     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2223         return 1;
2224     return 0;
2225 }
2226
2227 static int estimate_best_b_count(MpegEncContext *s){
2228     AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2229     AVCodecContext *c= avcodec_alloc_context();
2230     AVFrame input[FF_MAX_B_FRAMES+2];
2231     const int scale= s->avctx->brd_scale;
2232     int i, j, out_size, p_lambda, b_lambda, lambda2;
2233     int outbuf_size= s->width * s->height; //FIXME
2234     uint8_t *outbuf= av_malloc(outbuf_size);
2235     int64_t best_rd= INT64_MAX;
2236     int best_b_count= -1;
2237
2238     assert(scale>=0 && scale <=3);
2239
2240 //    emms_c();
2241     p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
2242     b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
2243     if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
2244     lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
2245
2246     c->width = s->width >> scale;
2247     c->height= s->height>> scale;
2248     c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2249     c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2250     c->mb_decision= s->avctx->mb_decision;
2251     c->me_cmp= s->avctx->me_cmp;
2252     c->mb_cmp= s->avctx->mb_cmp;
2253     c->me_sub_cmp= s->avctx->me_sub_cmp;
2254     c->pix_fmt = PIX_FMT_YUV420P;
2255     c->time_base= s->avctx->time_base;
2256     c->max_b_frames= s->max_b_frames;
2257
2258     if (avcodec_open(c, codec) < 0)
2259         return -1;
2260
2261     for(i=0; i<s->max_b_frames+2; i++){
2262         int ysize= c->width*c->height;
2263         int csize= (c->width/2)*(c->height/2);
2264         Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
2265
2266         avcodec_get_frame_defaults(&input[i]);
2267         input[i].data[0]= av_malloc(ysize + 2*csize);
2268         input[i].data[1]= input[i].data[0] + ysize;
2269         input[i].data[2]= input[i].data[1] + csize;
2270         input[i].linesize[0]= c->width;
2271         input[i].linesize[1]=
2272         input[i].linesize[2]= c->width/2;
2273
2274         if(pre_input_ptr && (!i || s->input_picture[i-1])) {
2275             pre_input= *pre_input_ptr;
2276
2277             if(pre_input.type != FF_BUFFER_TYPE_SHARED && i) {
2278                 pre_input.data[0]+=INPLACE_OFFSET;
2279                 pre_input.data[1]+=INPLACE_OFFSET;
2280                 pre_input.data[2]+=INPLACE_OFFSET;
2281             }
2282
2283             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0], pre_input.data[0], pre_input.linesize[0], c->width, c->height);
2284             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1], pre_input.data[1], pre_input.linesize[1], c->width>>1, c->height>>1);
2285             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2], pre_input.data[2], pre_input.linesize[2], c->width>>1, c->height>>1);
2286         }
2287     }
2288
2289     for(j=0; j<s->max_b_frames+1; j++){
2290         int64_t rd=0;
2291
2292         if(!s->input_picture[j])
2293             break;
2294
2295         c->error[0]= c->error[1]= c->error[2]= 0;
2296
2297         input[0].pict_type= I_TYPE;
2298         input[0].quality= 1 * FF_QP2LAMBDA;
2299         out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2300 //        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2301
2302         for(i=0; i<s->max_b_frames+1; i++){
2303             int is_p= i % (j+1) == j || i==s->max_b_frames;
2304
2305             input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2306             input[i+1].quality= is_p ? p_lambda : b_lambda;
2307             out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2308             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2309         }
2310
2311         /* get the delayed frames */
2312         while(out_size){
2313             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2314             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2315         }
2316
2317         rd += c->error[0] + c->error[1] + c->error[2];
2318
2319         if(rd < best_rd){
2320             best_rd= rd;
2321             best_b_count= j;
2322         }
2323     }
2324
2325     av_freep(&outbuf);
2326     avcodec_close(c);
2327     av_freep(&c);
2328
2329     for(i=0; i<s->max_b_frames+2; i++){
2330         av_freep(&input[i].data[0]);
2331     }
2332
2333     return best_b_count;
2334 }
2335
2336 static void select_input_picture(MpegEncContext *s){
2337     int i;
2338
2339     for(i=1; i<MAX_PICTURE_COUNT; i++)
2340         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2341     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2342
2343     /* set next picture type & ordering */
2344     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2345         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2346             s->reordered_input_picture[0]= s->input_picture[0];
2347             s->reordered_input_picture[0]->pict_type= I_TYPE;
2348             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2349         }else{
2350             int b_frames;
2351
2352             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2353                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2354                 //FIXME check that te gop check above is +-1 correct
2355 //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2356
2357                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2358                         for(i=0; i<4; i++)
2359                             s->input_picture[0]->data[i]= NULL;
2360                         s->input_picture[0]->type= 0;
2361                     }else{
2362                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2363                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2364
2365                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2366                     }
2367
2368                     emms_c();
2369                     ff_vbv_update(s, 0);
2370
2371                     goto no_output_pic;
2372                 }
2373             }
2374
2375             if(s->flags&CODEC_FLAG_PASS2){
2376                 for(i=0; i<s->max_b_frames+1; i++){
2377                     int pict_num= s->input_picture[0]->display_picture_number + i;
2378
2379                     if(pict_num >= s->rc_context.num_entries)
2380                         break;
2381                     if(!s->input_picture[i]){
2382                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2383                         break;
2384                     }
2385
2386                     s->input_picture[i]->pict_type=
2387                         s->rc_context.entry[pict_num].new_pict_type;
2388                 }
2389             }
2390
2391             if(s->avctx->b_frame_strategy==0){
2392                 b_frames= s->max_b_frames;
2393                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2394             }else if(s->avctx->b_frame_strategy==1){
2395                 for(i=1; i<s->max_b_frames+1; i++){
2396                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2397                         s->input_picture[i]->b_frame_score=
2398                             get_intra_count(s, s->input_picture[i  ]->data[0],
2399                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2400                     }
2401                 }
2402                 for(i=0; i<s->max_b_frames+1; i++){
2403                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/s->avctx->b_sensitivity) break;
2404                 }
2405
2406                 b_frames= FFMAX(0, i-1);
2407
2408                 /* reset scores */
2409                 for(i=0; i<b_frames+1; i++){
2410                     s->input_picture[i]->b_frame_score=0;
2411                 }
2412             }else if(s->avctx->b_frame_strategy==2){
2413                 b_frames= estimate_best_b_count(s);
2414             }else{
2415                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2416                 b_frames=0;
2417             }
2418
2419             emms_c();
2420 //static int b_count=0;
2421 //b_count+= b_frames;
2422 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2423
2424             for(i= b_frames - 1; i>=0; i--){
2425                 int type= s->input_picture[i]->pict_type;
2426                 if(type && type != B_TYPE)
2427                     b_frames= i;
2428             }
2429             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2430                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2431             }
2432
2433             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2434               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2435                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2436               }else{
2437                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2438                     b_frames=0;
2439                 s->input_picture[b_frames]->pict_type= I_TYPE;
2440               }
2441             }
2442
2443             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2444                && b_frames
2445                && s->input_picture[b_frames]->pict_type== I_TYPE)
2446                 b_frames--;
2447
2448             s->reordered_input_picture[0]= s->input_picture[b_frames];
2449             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2450                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2451             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2452             for(i=0; i<b_frames; i++){
2453                 s->reordered_input_picture[i+1]= s->input_picture[i];
2454                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2455                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2456             }
2457         }
2458     }
2459 no_output_pic:
2460     if(s->reordered_input_picture[0]){
2461         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2462
2463         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2464
2465         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED || s->avctx->rc_buffer_size){
2466             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2467
2468             int i= ff_find_unused_picture(s, 0);
2469             Picture *pic= &s->picture[i];
2470
2471             pic->reference              = s->reordered_input_picture[0]->reference;
2472             alloc_picture(s, pic, 0);
2473
2474             /* mark us unused / free shared pic */
2475             if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_INTERNAL)
2476                 s->avctx->release_buffer(s->avctx, (AVFrame*)s->reordered_input_picture[0]);
2477             for(i=0; i<4; i++)
2478                 s->reordered_input_picture[0]->data[i]= NULL;
2479             s->reordered_input_picture[0]->type= 0;
2480
2481             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2482
2483             s->current_picture_ptr= pic;
2484         }else{
2485             // input is not a shared pix -> reuse buffer for current_pix
2486
2487             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2488                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2489
2490             s->current_picture_ptr= s->reordered_input_picture[0];
2491             for(i=0; i<4; i++){
2492                 s->new_picture.data[i]+= INPLACE_OFFSET;
2493             }
2494         }
2495         copy_picture(&s->current_picture, s->current_picture_ptr);
2496
2497         s->picture_number= s->new_picture.display_picture_number;
2498 //printf("dpn:%d\n", s->picture_number);
2499     }else{
2500        memset(&s->new_picture, 0, sizeof(Picture));
2501     }
2502 }
2503
2504 int MPV_encode_picture(AVCodecContext *avctx,
2505                        unsigned char *buf, int buf_size, void *data)
2506 {
2507     MpegEncContext *s = avctx->priv_data;
2508     AVFrame *pic_arg = data;
2509     int i, stuffing_count;
2510
2511     for(i=0; i<avctx->thread_count; i++){
2512         int start_y= s->thread_context[i]->start_mb_y;
2513         int   end_y= s->thread_context[i]->  end_mb_y;
2514         int h= s->mb_height;
2515         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2516         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2517
2518         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2519     }
2520
2521     s->picture_in_gop_number++;
2522
2523     if(load_input_picture(s, pic_arg) < 0)
2524         return -1;
2525
2526     select_input_picture(s);
2527
2528     /* output? */
2529     if(s->new_picture.data[0]){
2530         s->pict_type= s->new_picture.pict_type;
2531 //emms_c();
2532 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2533         MPV_frame_start(s, avctx);
2534 vbv_retry:
2535         if (encode_picture(s, s->picture_number) < 0)
2536             return -1;
2537
2538         avctx->real_pict_num  = s->picture_number;
2539         avctx->header_bits = s->header_bits;
2540         avctx->mv_bits     = s->mv_bits;
2541         avctx->misc_bits   = s->misc_bits;
2542         avctx->i_tex_bits  = s->i_tex_bits;
2543         avctx->p_tex_bits  = s->p_tex_bits;
2544         avctx->i_count     = s->i_count;
2545         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2546         avctx->skip_count  = s->skip_count;
2547
2548         MPV_frame_end(s);
2549
2550         if (s->out_format == FMT_MJPEG)
2551             mjpeg_picture_trailer(s);
2552
2553         if(avctx->rc_buffer_size){
2554             RateControlContext *rcc= &s->rc_context;
2555             int max_size= rcc->buffer_index/3;
2556
2557             if(put_bits_count(&s->pb) > max_size && s->lambda < s->avctx->lmax){
2558                 s->next_lambda= FFMAX(s->lambda+1, s->lambda*(s->qscale+1) / s->qscale);
2559                 if(s->adaptive_quant){
2560                     int i;
2561                     for(i=0; i<s->mb_height*s->mb_stride; i++)
2562                         s->lambda_table[i]= FFMAX(s->lambda_table[i]+1, s->lambda_table[i]*(s->qscale+1) / s->qscale);
2563                 }
2564                 s->mb_skipped = 0;        //done in MPV_frame_start()
2565                 if(s->pict_type==P_TYPE){ //done in encode_picture() so we must undo it
2566                     if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
2567                         s->no_rounding ^= 1;
2568                 }
2569                 if(s->pict_type!=B_TYPE){
2570                     s->time_base= s->last_time_base;
2571                     s->last_non_b_time= s->time - s->pp_time;
2572                 }
2573 //                av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
2574                 for(i=0; i<avctx->thread_count; i++){
2575                     PutBitContext *pb= &s->thread_context[i]->pb;
2576                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
2577                 }
2578                 goto vbv_retry;
2579             }
2580
2581             assert(s->avctx->rc_max_rate);
2582         }
2583
2584         if(s->flags&CODEC_FLAG_PASS1)
2585             ff_write_pass1_stats(s);
2586
2587         for(i=0; i<4; i++){
2588             s->current_picture_ptr->error[i]= s->current_picture.error[i];
2589             avctx->error[i] += s->current_picture_ptr->error[i];
2590         }
2591
2592         if(s->flags&CODEC_FLAG_PASS1)
2593             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2594         flush_put_bits(&s->pb);
2595         s->frame_bits  = put_bits_count(&s->pb);
2596
2597         stuffing_count= ff_vbv_update(s, s->frame_bits);
2598         if(stuffing_count){
2599             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2600                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2601                 return -1;
2602             }
2603
2604             switch(s->codec_id){
2605             case CODEC_ID_MPEG1VIDEO:
2606             case CODEC_ID_MPEG2VIDEO:
2607                 while(stuffing_count--){
2608                     put_bits(&s->pb, 8, 0);
2609                 }
2610             break;
2611             case CODEC_ID_MPEG4:
2612                 put_bits(&s->pb, 16, 0);
2613                 put_bits(&s->pb, 16, 0x1C3);
2614                 stuffing_count -= 4;
2615                 while(stuffing_count--){
2616                     put_bits(&s->pb, 8, 0xFF);
2617                 }
2618             break;
2619             default:
2620                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2621             }
2622             flush_put_bits(&s->pb);
2623             s->frame_bits  = put_bits_count(&s->pb);
2624         }
2625
2626         /* update mpeg1/2 vbv_delay for CBR */
2627         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2628            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2629             int vbv_delay;
2630
2631             assert(s->repeat_first_field==0);
2632
2633             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2634             assert(vbv_delay < 0xFFFF);
2635
2636             s->vbv_delay_ptr[0] &= 0xF8;
2637             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2638             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2639             s->vbv_delay_ptr[2] &= 0x07;
2640             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2641         }
2642         s->total_bits += s->frame_bits;
2643         avctx->frame_bits  = s->frame_bits;
2644     }else{
2645         assert((pbBufPtr(&s->pb) == s->pb.buf));
2646         s->frame_bits=0;
2647     }
2648     assert((s->frame_bits&7)==0);
2649
2650     return s->frame_bits/8;
2651 }
2652
2653 #endif //CONFIG_ENCODERS
2654
2655 static inline void gmc1_motion(MpegEncContext *s,
2656                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2657                                uint8_t **ref_picture)
2658 {
2659     uint8_t *ptr;
2660     int offset, src_x, src_y, linesize, uvlinesize;
2661     int motion_x, motion_y;
2662     int emu=0;
2663
2664     motion_x= s->sprite_offset[0][0];
2665     motion_y= s->sprite_offset[0][1];
2666     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2667     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2668     motion_x<<=(3-s->sprite_warping_accuracy);
2669     motion_y<<=(3-s->sprite_warping_accuracy);
2670     src_x = av_clip(src_x, -16, s->width);
2671     if (src_x == s->width)
2672         motion_x =0;
2673     src_y = av_clip(src_y, -16, s->height);
2674     if (src_y == s->height)
2675         motion_y =0;
2676
2677     linesize = s->linesize;
2678     uvlinesize = s->uvlinesize;
2679
2680     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2681
2682     if(s->flags&CODEC_FLAG_EMU_EDGE){
2683         if(   (unsigned)src_x >= s->h_edge_pos - 17
2684            || (unsigned)src_y >= s->v_edge_pos - 17){
2685             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2686             ptr= s->edge_emu_buffer;
2687         }
2688     }
2689
2690     if((motion_x|motion_y)&7){
2691         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2692         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2693     }else{
2694         int dxy;
2695
2696         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2697         if (s->no_rounding){
2698             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2699         }else{
2700             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2701         }
2702     }
2703
2704     if(s->flags&CODEC_FLAG_GRAY) return;
2705
2706     motion_x= s->sprite_offset[1][0];
2707     motion_y= s->sprite_offset[1][1];
2708     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2709     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2710     motion_x<<=(3-s->sprite_warping_accuracy);
2711     motion_y<<=(3-s->sprite_warping_accuracy);
2712     src_x = av_clip(src_x, -8, s->width>>1);
2713     if (src_x == s->width>>1)
2714         motion_x =0;
2715     src_y = av_clip(src_y, -8, s->height>>1);
2716     if (src_y == s->height>>1)
2717         motion_y =0;
2718
2719     offset = (src_y * uvlinesize) + src_x;
2720     ptr = ref_picture[1] + offset;
2721     if(s->flags&CODEC_FLAG_EMU_EDGE){
2722         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2723            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2724             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2725             ptr= s->edge_emu_buffer;
2726             emu=1;
2727         }
2728     }
2729     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2730
2731     ptr = ref_picture[2] + offset;
2732     if(emu){
2733         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2734         ptr= s->edge_emu_buffer;
2735     }
2736     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2737
2738     return;
2739 }
2740
2741 static inline void gmc_motion(MpegEncContext *s,
2742                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2743                                uint8_t **ref_picture)
2744 {
2745     uint8_t *ptr;
2746     int linesize, uvlinesize;
2747     const int a= s->sprite_warping_accuracy;
2748     int ox, oy;
2749
2750     linesize = s->linesize;
2751     uvlinesize = s->uvlinesize;
2752
2753     ptr = ref_picture[0];
2754
2755     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2756     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2757
2758     s->dsp.gmc(dest_y, ptr, linesize, 16,
2759            ox,
2760            oy,
2761            s->sprite_delta[0][0], s->sprite_delta[0][1],
2762            s->sprite_delta[1][0], s->sprite_delta[1][1],
2763            a+1, (1<<(2*a+1)) - s->no_rounding,
2764            s->h_edge_pos, s->v_edge_pos);
2765     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2766            ox + s->sprite_delta[0][0]*8,
2767            oy + s->sprite_delta[1][0]*8,
2768            s->sprite_delta[0][0], s->sprite_delta[0][1],
2769            s->sprite_delta[1][0], s->sprite_delta[1][1],
2770            a+1, (1<<(2*a+1)) - s->no_rounding,
2771            s->h_edge_pos, s->v_edge_pos);
2772
2773     if(s->flags&CODEC_FLAG_GRAY) return;
2774
2775     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2776     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2777
2778     ptr = ref_picture[1];
2779     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2780            ox,
2781            oy,
2782            s->sprite_delta[0][0], s->sprite_delta[0][1],
2783            s->sprite_delta[1][0], s->sprite_delta[1][1],
2784            a+1, (1<<(2*a+1)) - s->no_rounding,
2785            s->h_edge_pos>>1, s->v_edge_pos>>1);
2786
2787     ptr = ref_picture[2];
2788     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2789            ox,
2790            oy,
2791            s->sprite_delta[0][0], s->sprite_delta[0][1],
2792            s->sprite_delta[1][0], s->sprite_delta[1][1],
2793            a+1, (1<<(2*a+1)) - s->no_rounding,
2794            s->h_edge_pos>>1, s->v_edge_pos>>1);
2795 }
2796
2797 /**
2798  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2799  * @param buf destination buffer
2800  * @param src source buffer
2801  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2802  * @param block_w width of block
2803  * @param block_h height of block
2804  * @param src_x x coordinate of the top left sample of the block in the source buffer
2805  * @param src_y y coordinate of the top left sample of the block in the source buffer
2806  * @param w width of the source buffer
2807  * @param h height of the source buffer
2808  */
2809 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2810                                     int src_x, int src_y, int w, int h){
2811     int x, y;
2812     int start_y, start_x, end_y, end_x;
2813
2814     if(src_y>= h){
2815         src+= (h-1-src_y)*linesize;
2816         src_y=h-1;
2817     }else if(src_y<=-block_h){
2818         src+= (1-block_h-src_y)*linesize;
2819         src_y=1-block_h;
2820     }
2821     if(src_x>= w){
2822         src+= (w-1-src_x);
2823         src_x=w-1;
2824     }else if(src_x<=-block_w){
2825         src+= (1-block_w-src_x);
2826         src_x=1-block_w;
2827     }
2828
2829     start_y= FFMAX(0, -src_y);
2830     start_x= FFMAX(0, -src_x);
2831     end_y= FFMIN(block_h, h-src_y);
2832     end_x= FFMIN(block_w, w-src_x);
2833
2834     // copy existing part
2835     for(y=start_y; y<end_y; y++){
2836         for(x=start_x; x<end_x; x++){
2837             buf[x + y*linesize]= src[x + y*linesize];
2838         }
2839     }
2840
2841     //top
2842     for(y=0; y<start_y; y++){
2843         for(x=start_x; x<end_x; x++){
2844             buf[x + y*linesize]= buf[x + start_y*linesize];
2845         }
2846     }
2847
2848     //bottom
2849     for(y=end_y; y<block_h; y++){
2850         for(x=start_x; x<end_x; x++){
2851             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2852         }
2853     }
2854
2855     for(y=0; y<block_h; y++){
2856        //left
2857         for(x=0; x<start_x; x++){
2858             buf[x + y*linesize]= buf[start_x + y*linesize];
2859         }
2860
2861        //right
2862         for(x=end_x; x<block_w; x++){
2863             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2864         }
2865     }
2866 }
2867
2868 static inline int hpel_motion(MpegEncContext *s,
2869                                   uint8_t *dest, uint8_t *src,
2870                                   int field_based, int field_select,
2871                                   int src_x, int src_y,
2872                                   int width, int height, int stride,
2873                                   int h_edge_pos, int v_edge_pos,
2874                                   int w, int h, op_pixels_func *pix_op,
2875                                   int motion_x, int motion_y)
2876 {
2877     int dxy;
2878     int emu=0;
2879
2880     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2881     src_x += motion_x >> 1;
2882     src_y += motion_y >> 1;
2883
2884     /* WARNING: do no forget half pels */
2885     src_x = av_clip(src_x, -16, width); //FIXME unneeded for emu?
2886     if (src_x == width)
2887         dxy &= ~1;
2888     src_y = av_clip(src_y, -16, height);
2889     if (src_y == height)
2890         dxy &= ~2;
2891     src += src_y * stride + src_x;
2892
2893     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2894         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2895            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2896             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2897                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2898             src= s->edge_emu_buffer;
2899             emu=1;
2900         }
2901     }
2902     if(field_select)
2903         src += s->linesize;
2904     pix_op[dxy](dest, src, stride, h);
2905     return emu;
2906 }
2907
2908 static inline int hpel_motion_lowres(MpegEncContext *s,
2909                                   uint8_t *dest, uint8_t *src,
2910                                   int field_based, int field_select,
2911                                   int src_x, int src_y,
2912                                   int width, int height, int stride,
2913                                   int h_edge_pos, int v_edge_pos,
2914                                   int w, int h, h264_chroma_mc_func *pix_op,
2915                                   int motion_x, int motion_y)
2916 {
2917     const int lowres= s->avctx->lowres;
2918     const int s_mask= (2<<lowres)-1;
2919     int emu=0;
2920     int sx, sy;
2921
2922     if(s->quarter_sample){
2923         motion_x/=2;
2924         motion_y/=2;
2925     }
2926
2927     sx= motion_x & s_mask;
2928     sy= motion_y & s_mask;
2929     src_x += motion_x >> (lowres+1);
2930     src_y += motion_y >> (lowres+1);
2931
2932     src += src_y * stride + src_x;
2933
2934     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2935        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2936         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2937                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2938         src= s->edge_emu_buffer;
2939         emu=1;
2940     }
2941
2942     sx <<= 2 - lowres;
2943     sy <<= 2 - lowres;
2944     if(field_select)
2945         src += s->linesize;
2946     pix_op[lowres](dest, src, stride, h, sx, sy);
2947     return emu;
2948 }
2949
2950 /* apply one mpeg motion vector to the three components */
2951 static av_always_inline void mpeg_motion(MpegEncContext *s,
2952                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2953                                int field_based, int bottom_field, int field_select,
2954                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2955                                int motion_x, int motion_y, int h)
2956 {
2957     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2958     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2959
2960 #if 0
2961 if(s->quarter_sample)
2962 {
2963     motion_x>>=1;
2964     motion_y>>=1;
2965 }
2966 #endif
2967
2968     v_edge_pos = s->v_edge_pos >> field_based;
2969     linesize   = s->current_picture.linesize[0] << field_based;
2970     uvlinesize = s->current_picture.linesize[1] << field_based;
2971
2972     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2973     src_x = s->mb_x* 16               + (motion_x >> 1);
2974     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2975
2976     if (s->out_format == FMT_H263) {
2977         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2978             mx = (motion_x>>1)|(motion_x&1);
2979             my = motion_y >>1;
2980             uvdxy = ((my & 1) << 1) | (mx & 1);
2981             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2982             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2983         }else{
2984             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2985             uvsrc_x = src_x>>1;
2986             uvsrc_y = src_y>>1;
2987         }
2988     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2989         mx = motion_x / 4;
2990         my = motion_y / 4;
2991         uvdxy = 0;
2992         uvsrc_x = s->mb_x*8 + mx;
2993         uvsrc_y = s->mb_y*8 + my;
2994     } else {
2995         if(s->chroma_y_shift){
2996             mx = motion_x / 2;
2997             my = motion_y / 2;
2998             uvdxy = ((my & 1) << 1) | (mx & 1);
2999             uvsrc_x = s->mb_x* 8               + (mx >> 1);
3000             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
3001         } else {
3002             if(s->chroma_x_shift){
3003             //Chroma422
3004                 mx = motion_x / 2;
3005                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
3006                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
3007                 uvsrc_y = src_y;
3008             } else {
3009             //Chroma444
3010                 uvdxy = dxy;
3011                 uvsrc_x = src_x;
3012                 uvsrc_y = src_y;
3013             }
3014         }
3015     }
3016
3017     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3018     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3019     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3020
3021     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
3022        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
3023             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
3024                s->codec_id == CODEC_ID_MPEG1VIDEO){
3025                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
3026                 return ;
3027             }
3028             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3029                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3030             ptr_y = s->edge_emu_buffer;
3031             if(!(s->flags&CODEC_FLAG_GRAY)){
3032                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3033                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3034                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3035                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3036                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3037                 ptr_cb= uvbuf;
3038                 ptr_cr= uvbuf+16;
3039             }
3040     }
3041
3042     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3043         dest_y += s->linesize;
3044         dest_cb+= s->uvlinesize;
3045         dest_cr+= s->uvlinesize;
3046     }
3047
3048     if(field_select){
3049         ptr_y += s->linesize;
3050         ptr_cb+= s->uvlinesize;
3051         ptr_cr+= s->uvlinesize;
3052     }
3053
3054     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
3055
3056     if(!(s->flags&CODEC_FLAG_GRAY)){
3057         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
3058         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
3059     }
3060 #if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
3061     if(s->out_format == FMT_H261){
3062         ff_h261_loop_filter(s);
3063     }
3064 #endif
3065 }
3066
3067 /* apply one mpeg motion vector to the three components */
3068 static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
3069                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3070                                int field_based, int bottom_field, int field_select,
3071                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
3072                                int motion_x, int motion_y, int h)
3073 {
3074     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3075     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
3076     const int lowres= s->avctx->lowres;
3077     const int block_s= 8>>lowres;
3078     const int s_mask= (2<<lowres)-1;
3079     const int h_edge_pos = s->h_edge_pos >> lowres;
3080     const int v_edge_pos = s->v_edge_pos >> lowres;
3081     linesize   = s->current_picture.linesize[0] << field_based;
3082     uvlinesize = s->current_picture.linesize[1] << field_based;
3083
3084     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
3085         motion_x/=2;
3086         motion_y/=2;
3087     }
3088
3089     if(field_based){
3090         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
3091     }
3092
3093     sx= motion_x & s_mask;
3094     sy= motion_y & s_mask;
3095     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
3096     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
3097
3098     if (s->out_format == FMT_H263) {
3099         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
3100         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
3101         uvsrc_x = src_x>>1;
3102         uvsrc_y = src_y>>1;
3103     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
3104         mx = motion_x / 4;
3105         my = motion_y / 4;
3106         uvsx = (2*mx) & s_mask;
3107         uvsy = (2*my) & s_mask;
3108         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
3109         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
3110     } else {
3111         mx = motion_x / 2;
3112         my = motion_y / 2;
3113         uvsx = mx & s_mask;
3114         uvsy = my & s_mask;
3115         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
3116         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
3117     }
3118
3119     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3120     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3121     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3122
3123     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
3124        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
3125             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3126                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3127             ptr_y = s->edge_emu_buffer;
3128             if(!(s->flags&CODEC_FLAG_GRAY)){
3129                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3130                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3131                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3132                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3133                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3134                 ptr_cb= uvbuf;
3135                 ptr_cr= uvbuf+16;
3136             }
3137     }
3138
3139     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3140         dest_y += s->linesize;
3141         dest_cb+= s->uvlinesize;
3142         dest_cr+= s->uvlinesize;
3143     }
3144
3145     if(field_select){
3146         ptr_y += s->linesize;
3147         ptr_cb+= s->uvlinesize;
3148         ptr_cr+= s->uvlinesize;
3149     }
3150
3151     sx <<= 2 - lowres;
3152     sy <<= 2 - lowres;
3153     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3154
3155     if(!(s->flags&CODEC_FLAG_GRAY)){
3156         uvsx <<= 2 - lowres;
3157         uvsy <<= 2 - lowres;
3158         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3159         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3160     }
3161     //FIXME h261 lowres loop filter
3162 }
3163
3164 //FIXME move to dsputil, avg variant, 16x16 version
3165 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3166     int x;
3167     uint8_t * const top   = src[1];
3168     uint8_t * const left  = src[2];
3169     uint8_t * const mid   = src[0];
3170     uint8_t * const right = src[3];
3171     uint8_t * const bottom= src[4];
3172 #define OBMC_FILTER(x, t, l, m, r, b)\
3173     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3174 #define OBMC_FILTER4(x, t, l, m, r, b)\
3175     OBMC_FILTER(x         , t, l, m, r, b);\
3176     OBMC_FILTER(x+1       , t, l, m, r, b);\
3177     OBMC_FILTER(x  +stride, t, l, m, r, b);\
3178     OBMC_FILTER(x+1+stride, t, l, m, r, b);
3179
3180     x=0;
3181     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3182     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3183     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3184     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3185     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3186     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3187     x+= stride;
3188     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3189     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3190     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3191     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3192     x+= stride;
3193     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3194     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3195     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3196     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3197     x+= 2*stride;
3198     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3199     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3200     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3201     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3202     x+= 2*stride;
3203     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3204     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3205     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3206     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3207     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3208     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3209     x+= stride;
3210     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3211     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3212     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3213     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3214 }
3215
3216 /* obmc for 1 8x8 luma block */
3217 static inline void obmc_motion(MpegEncContext *s,
3218                                uint8_t *dest, uint8_t *src,
3219                                int src_x, int src_y,
3220                                op_pixels_func *pix_op,
3221                                int16_t mv[5][2]/* mid top left right bottom*/)
3222 #define MID    0
3223 {
3224     int i;
3225     uint8_t *ptr[5];
3226
3227     assert(s->quarter_sample==0);
3228
3229     for(i=0; i<5; i++){
3230         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3231             ptr[i]= ptr[MID];
3232         }else{
3233             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3234             hpel_motion(s, ptr[i], src, 0, 0,
3235                         src_x, src_y,
3236                         s->width, s->height, s->linesize,
3237                         s->h_edge_pos, s->v_edge_pos,
3238                         8, 8, pix_op,
3239                         mv[i][0], mv[i][1]);
3240         }
3241     }
3242
3243     put_obmc(dest, ptr, s->linesize);
3244 }
3245
3246 static inline void qpel_motion(MpegEncContext *s,
3247                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3248                                int field_based, int bottom_field, int field_select,
3249                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3250                                qpel_mc_func (*qpix_op)[16],
3251                                int motion_x, int motion_y, int h)
3252 {
3253     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3254     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3255
3256     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3257     src_x = s->mb_x *  16                 + (motion_x >> 2);
3258     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3259
3260     v_edge_pos = s->v_edge_pos >> field_based;
3261     linesize = s->linesize << field_based;
3262     uvlinesize = s->uvlinesize << field_based;
3263
3264     if(field_based){
3265         mx= motion_x/2;
3266         my= motion_y>>1;
3267     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3268         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3269         mx= (motion_x>>1) + rtab[motion_x&7];
3270         my= (motion_y>>1) + rtab[motion_y&7];
3271     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3272         mx= (motion_x>>1)|(motion_x&1);
3273         my= (motion_y>>1)|(motion_y&1);
3274     }else{
3275         mx= motion_x/2;
3276         my= motion_y/2;
3277     }
3278     mx= (mx>>1)|(mx&1);
3279     my= (my>>1)|(my&1);
3280
3281     uvdxy= (mx&1) | ((my&1)<<1);
3282     mx>>=1;
3283     my>>=1;
3284
3285     uvsrc_x = s->mb_x *  8                 + mx;
3286     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3287
3288     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3289     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3290     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3291
3292     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3293        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3294         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3295                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3296         ptr_y= s->edge_emu_buffer;
3297         if(!(s->flags&CODEC_FLAG_GRAY)){
3298             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3299             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3300                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3301             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3302                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3303             ptr_cb= uvbuf;
3304             ptr_cr= uvbuf + 16;
3305         }
3306     }
3307
3308     if(!field_based)
3309         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3310     else{
3311         if(bottom_field){
3312             dest_y += s->linesize;
3313             dest_cb+= s->uvlinesize;
3314             dest_cr+= s->uvlinesize;
3315         }
3316
3317         if(field_select){
3318             ptr_y  += s->linesize;
3319             ptr_cb += s->uvlinesize;
3320             ptr_cr += s->uvlinesize;
3321         }
3322         //damn interlaced mode
3323         //FIXME boundary mirroring is not exactly correct here
3324         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3325         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3326     }
3327     if(!(s->flags&CODEC_FLAG_GRAY)){
3328         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3329         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3330     }
3331 }
3332
3333 inline int ff_h263_round_chroma(int x){
3334     if (x >= 0)
3335         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3336     else {
3337         x = -x;
3338         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3339     }
3340 }
3341
3342 /**
3343  * h263 chorma 4mv motion compensation.
3344  */
3345 static inline void chroma_4mv_motion(MpegEncContext *s,
3346                                      uint8_t *dest_cb, uint8_t *dest_cr,
3347                                      uint8_t **ref_picture,
3348                                      op_pixels_func *pix_op,
3349                                      int mx, int my){
3350     int dxy, emu=0, src_x, src_y, offset;
3351     uint8_t *ptr;
3352
3353     /* In case of 8X8, we construct a single chroma motion vector
3354        with a special rounding */
3355     mx= ff_h263_round_chroma(mx);
3356     my= ff_h263_round_chroma(my);
3357
3358     dxy = ((my & 1) << 1) | (mx & 1);
3359     mx >>= 1;
3360     my >>= 1;
3361
3362     src_x = s->mb_x * 8 + mx;
3363     src_y = s->mb_y * 8 + my;
3364     src_x = av_clip(src_x, -8, s->width/2);
3365     if (src_x == s->width/2)
3366         dxy &= ~1;
3367     src_y = av_clip(src_y, -8, s->height/2);
3368     if (src_y == s->height/2)
3369         dxy &= ~2;
3370
3371     offset = (src_y * (s->uvlinesize)) + src_x;
3372     ptr = ref_picture[1] + offset;
3373     if(s->flags&CODEC_FLAG_EMU_EDGE){
3374         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3375            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3376             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3377             ptr= s->edge_emu_buffer;
3378             emu=1;
3379         }
3380     }
3381     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3382
3383     ptr = ref_picture[2] + offset;
3384     if(emu){
3385         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3386         ptr= s->edge_emu_buffer;
3387     }
3388     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3389 }
3390
3391 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3392                                      uint8_t *dest_cb, uint8_t *dest_cr,
3393                                      uint8_t **ref_picture,
3394                                      h264_chroma_mc_func *pix_op,
3395                                      int mx, int my){
3396     const int lowres= s->avctx->lowres;
3397     const int block_s= 8>>lowres;
3398     const int s_mask= (2<<lowres)-1;
3399     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3400     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3401     int emu=0, src_x, src_y, offset, sx, sy;
3402     uint8_t *ptr;
3403
3404     if(s->quarter_sample){
3405         mx/=2;
3406         my/=2;
3407     }
3408
3409     /* In case of 8X8, we construct a single chroma motion vector
3410        with a special rounding */
3411     mx= ff_h263_round_chroma(mx);
3412     my= ff_h263_round_chroma(my);
3413
3414     sx= mx & s_mask;
3415     sy= my & s_mask;
3416     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3417     src_y = s->mb_y*block_s + (my >> (lowres+1));
3418
3419     offset = src_y * s->uvlinesize + src_x;
3420     ptr = ref_picture[1] + offset;
3421     if(s->flags&CODEC_FLAG_EMU_EDGE){
3422         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3423            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3424             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3425             ptr= s->edge_emu_buffer;
3426             emu=1;
3427         }
3428     }
3429     sx <<= 2 - lowres;
3430     sy <<= 2 - lowres;
3431     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3432
3433     ptr = ref_picture[2] + offset;
3434     if(emu){
3435         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3436         ptr= s->edge_emu_buffer;
3437     }
3438     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3439 }
3440
3441 static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
3442     /* fetch pixels for estimated mv 4 macroblocks ahead
3443      * optimized for 64byte cache lines */
3444     const int shift = s->quarter_sample ? 2 : 1;
3445     const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8;
3446     const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y;
3447     int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
3448     s->dsp.prefetch(pix[0]+off, s->linesize, 4);
3449     off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3450     s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2);
3451 }
3452
3453 /**
3454  * motion compensation of a single macroblock
3455  * @param s context
3456  * @param dest_y luma destination pointer
3457  * @param dest_cb chroma cb/u destination pointer
3458  * @param dest_cr chroma cr/v destination pointer
3459  * @param dir direction (0->forward, 1->backward)
3460  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3461  * @param pic_op halfpel motion compensation function (average or put normally)
3462  * @param pic_op qpel motion compensation function (average or put normally)
3463  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3464  */
3465 static inline void MPV_motion(MpegEncContext *s,
3466                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3467                               int dir, uint8_t **ref_picture,
3468                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3469 {
3470     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3471     int mb_x, mb_y, i;
3472     uint8_t *ptr, *dest;
3473
3474     mb_x = s->mb_x;
3475     mb_y = s->mb_y;
3476
3477     prefetch_motion(s, ref_picture, dir);
3478
3479     if(s->obmc && s->pict_type != B_TYPE){
3480         int16_t mv_cache[4][4][2];
3481         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3482         const int mot_stride= s->b8_stride;
3483         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3484
3485         assert(!s->mb_skipped);
3486
3487         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3488         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3489         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3490
3491         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3492             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3493         }else{
3494             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3495         }
3496
3497         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3498             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3499             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3500         }else{
3501             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3502             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3503         }
3504
3505         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3506             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3507             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3508         }else{
3509             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3510             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3511         }
3512
3513         mx = 0;
3514         my = 0;
3515         for(i=0;i<4;i++) {
3516             const int x= (i&1)+1;
3517             const int y= (i>>1)+1;
3518             int16_t mv[5][2]= {
3519                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3520                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3521                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3522                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3523                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3524             //FIXME cleanup
3525             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3526                         ref_picture[0],
3527                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3528                         pix_op[1],
3529                         mv);
3530
3531             mx += mv[0][0];
3532             my += mv[0][1];
3533         }
3534         if(!(s->flags&CODEC_FLAG_GRAY))
3535             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3536
3537         return;
3538     }
3539
3540     switch(s->mv_type) {
3541     case MV_TYPE_16X16:
3542         if(s->mcsel){
3543             if(s->real_sprite_warping_points==1){
3544                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3545                             ref_picture);
3546             }else{
3547                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3548                             ref_picture);
3549             }
3550         }else if(s->quarter_sample){
3551             qpel_motion(s, dest_y, dest_cb, dest_cr,
3552                         0, 0, 0,
3553                         ref_picture, pix_op, qpix_op,
3554                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3555         }else if(s->mspel){
3556             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3557                         ref_picture, pix_op,
3558                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3559         }else
3560         {
3561             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3562                         0, 0, 0,
3563                         ref_picture, pix_op,
3564                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3565         }
3566         break;
3567     case MV_TYPE_8X8:
3568         mx = 0;
3569         my = 0;
3570         if(s->quarter_sample){
3571             for(i=0;i<4;i++) {
3572                 motion_x = s->mv[dir][i][0];
3573                 motion_y = s->mv[dir][i][1];
3574
3575                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3576                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3577                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3578
3579                 /* WARNING: do no forget half pels */
3580                 src_x = av_clip(src_x, -16, s->width);
3581                 if (src_x == s->width)
3582                     dxy &= ~3;
3583                 src_y = av_clip(src_y, -16, s->height);
3584                 if (src_y == s->height)
3585                     dxy &= ~12;
3586
3587                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3588                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3589                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3590                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3591                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3592                         ptr= s->edge_emu_buffer;
3593                     }
3594                 }
3595                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3596                 qpix_op[1][dxy](dest, ptr, s->linesize);
3597
3598                 mx += s->mv[dir][i][0]/2;
3599                 my += s->mv[dir][i][1]/2;
3600             }
3601         }else{
3602             for(i=0;i<4;i++) {
3603                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3604                             ref_picture[0], 0, 0,
3605                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3606                             s->width, s->height, s->linesize,
3607                             s->h_edge_pos, s->v_edge_pos,
3608                             8, 8, pix_op[1],
3609                             s->mv[dir][i][0], s->mv[dir][i][1]);
3610
3611                 mx += s->mv[dir][i][0];
3612                 my += s->mv[dir][i][1];
3613             }
3614         }
3615
3616         if(!(s->flags&CODEC_FLAG_GRAY))
3617             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3618         break;
3619     case MV_TYPE_FIELD:
3620         if (s->picture_structure == PICT_FRAME) {
3621             if(s->quarter_sample){
3622                 for(i=0; i<2; i++){
3623                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3624                                 1, i, s->field_select[dir][i],
3625                                 ref_picture, pix_op, qpix_op,
3626                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3627                 }
3628             }else{
3629                 /* top field */
3630                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3631                             1, 0, s->field_select[dir][0],
3632                             ref_picture, pix_op,
3633                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3634                 /* bottom field */
3635                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3636                             1, 1, s->field_select[dir][1],
3637                             ref_picture, pix_op,
3638                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3639             }
3640         } else {
3641             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3642                 ref_picture= s->current_picture_ptr->data;
3643             }
3644
3645             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3646                         0, 0, s->field_select[dir][0],
3647                         ref_picture, pix_op,
3648                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3649         }
3650         break;
3651     case MV_TYPE_16X8:
3652         for(i=0; i<2; i++){
3653             uint8_t ** ref2picture;
3654
3655             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3656                 ref2picture= ref_picture;
3657             }else{
3658                 ref2picture= s->current_picture_ptr->data;
3659             }
3660
3661             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3662                         0, 0, s->field_select[dir][i],
3663                         ref2picture, pix_op,
3664                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3665
3666             dest_y += 16*s->linesize;
3667             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3668             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3669         }
3670         break;
3671     case MV_TYPE_DMV:
3672         if(s->picture_structure == PICT_FRAME){
3673             for(i=0; i<2; i++){
3674                 int j;
3675                 for(j=0; j<2; j++){
3676                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3677                                 1, j, j^i,
3678                                 ref_picture, pix_op,
3679                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3680                 }
3681                 pix_op = s->dsp.avg_pixels_tab;
3682             }
3683         }else{
3684             for(i=0; i<2; i++){
3685                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3686                             0, 0, s->picture_structure != i+1,
3687                             ref_picture, pix_op,
3688                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3689
3690                 // after put we make avg of the same block
3691                 pix_op=s->dsp.avg_pixels_tab;
3692
3693                 //opposite parity is always in the same frame if this is second field
3694                 if(!s->first_field){
3695                     ref_picture = s->current_picture_ptr->data;
3696                 }
3697             }
3698         }
3699     break;
3700     default: assert(0);
3701     }
3702 }
3703
3704 /**
3705  * motion compensation of a single macroblock
3706  * @param s context
3707  * @param dest_y luma destination pointer
3708  * @param dest_cb chroma cb/u destination pointer
3709  * @param dest_cr chroma cr/v destination pointer
3710  * @param dir direction (0->forward, 1->backward)
3711  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3712  * @param pic_op halfpel motion compensation function (average or put normally)
3713  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3714  */
3715 static inline void MPV_motion_lowres(MpegEncContext *s,
3716                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3717                               int dir, uint8_t **ref_picture,
3718                               h264_chroma_mc_func *pix_op)
3719 {
3720     int mx, my;
3721     int mb_x, mb_y, i;
3722     const int lowres= s->avctx->lowres;
3723     const int block_s= 8>>lowres;
3724
3725     mb_x = s->mb_x;
3726     mb_y = s->mb_y;
3727
3728     switch(s->mv_type) {
3729     case MV_TYPE_16X16:
3730         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3731                     0, 0, 0,
3732                     ref_picture, pix_op,
3733                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3734         break;
3735     case MV_TYPE_8X8:
3736         mx = 0;
3737         my = 0;
3738             for(i=0;i<4;i++) {
3739                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3740                             ref_picture[0], 0, 0,
3741                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3742                             s->width, s->height, s->linesize,
3743                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3744                             block_s, block_s, pix_op,
3745                             s->mv[dir][i][0], s->mv[dir][i][1]);
3746
3747                 mx += s->mv[dir][i][0];
3748                 my += s->mv[dir][i][1];
3749             }
3750
3751         if(!(s->flags&CODEC_FLAG_GRAY))
3752             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3753         break;
3754     case MV_TYPE_FIELD:
3755         if (s->picture_structure == PICT_FRAME) {
3756             /* top field */
3757             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3758                         1, 0, s->field_select[dir][0],
3759                         ref_picture, pix_op,
3760                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3761             /* bottom field */
3762             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3763                         1, 1, s->field_select[dir][1],
3764                         ref_picture, pix_op,
3765                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3766         } else {
3767             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3768                 ref_picture= s->current_picture_ptr->data;
3769             }
3770
3771             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3772                         0, 0, s->field_select[dir][0],
3773                         ref_picture, pix_op,
3774                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3775         }
3776         break;
3777     case MV_TYPE_16X8:
3778         for(i=0; i<2; i++){
3779             uint8_t ** ref2picture;
3780
3781             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3782                 ref2picture= ref_picture;
3783             }else{
3784                 ref2picture= s->current_picture_ptr->data;
3785             }
3786
3787             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3788                         0, 0, s->field_select[dir][i],
3789                         ref2picture, pix_op,
3790                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3791
3792             dest_y += 2*block_s*s->linesize;
3793             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3794             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3795         }
3796         break;
3797     case MV_TYPE_DMV:
3798         if(s->picture_structure == PICT_FRAME){
3799             for(i=0; i<2; i++){
3800                 int j;
3801                 for(j=0; j<2; j++){
3802                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3803                                 1, j, j^i,
3804                                 ref_picture, pix_op,
3805                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3806                 }
3807                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3808             }
3809         }else{
3810             for(i=0; i<2; i++){
3811                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3812                             0, 0, s->picture_structure != i+1,
3813                             ref_picture, pix_op,
3814                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3815
3816                 // after put we make avg of the same block
3817                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3818
3819                 //opposite parity is always in the same frame if this is second field
3820                 if(!s->first_field){
3821                     ref_picture = s->current_picture_ptr->data;
3822                 }
3823             }
3824         }
3825     break;
3826     default: assert(0);
3827     }
3828 }
3829
3830 /* put block[] to dest[] */
3831 static inline void put_dct(MpegEncContext *s,
3832                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3833 {
3834     s->dct_unquantize_intra(s, block, i, qscale);
3835     s->dsp.idct_put (dest, line_size, block);
3836 }
3837
3838 /* add block[] to dest[] */
3839 static inline void add_dct(MpegEncContext *s,
3840                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3841 {
3842     if (s->block_last_index[i] >= 0) {
3843         s->dsp.idct_add (dest, line_size, block);
3844     }
3845 }
3846
3847 static inline void add_dequant_dct(MpegEncContext *s,
3848                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3849 {
3850     if (s->block_last_index[i] >= 0) {
3851         s->dct_unquantize_inter(s, block, i, qscale);
3852
3853         s->dsp.idct_add (dest, line_size, block);
3854     }
3855 }
3856
3857 /**
3858  * cleans dc, ac, coded_block for the current non intra MB
3859  */
3860 void ff_clean_intra_table_entries(MpegEncContext *s)
3861 {
3862     int wrap = s->b8_stride;
3863     int xy = s->block_index[0];
3864
3865     s->dc_val[0][xy           ] =
3866     s->dc_val[0][xy + 1       ] =
3867     s->dc_val[0][xy     + wrap] =
3868     s->dc_val[0][xy + 1 + wrap] = 1024;
3869     /* ac pred */
3870     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3871     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3872     if (s->msmpeg4_version>=3) {
3873         s->coded_block[xy           ] =
3874         s->coded_block[xy + 1       ] =
3875         s->coded_block[xy     + wrap] =
3876         s->coded_block[xy + 1 + wrap] = 0;
3877     }
3878     /* chroma */
3879     wrap = s->mb_stride;
3880     xy = s->mb_x + s->mb_y * wrap;
3881     s->dc_val[1][xy] =
3882     s->dc_val[2][xy] = 1024;
3883     /* ac pred */
3884     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3885     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3886
3887     s->mbintra_table[xy]= 0;
3888 }
3889
3890 /* generic function called after a macroblock has been parsed by the
3891    decoder or after it has been encoded by the encoder.
3892
3893    Important variables used:
3894    s->mb_intra : true if intra macroblock
3895    s->mv_dir   : motion vector direction
3896    s->mv_type  : motion vector type
3897    s->mv       : motion vector
3898    s->interlaced_dct : true if interlaced dct used (mpeg2)
3899  */
3900 static av_always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3901 {
3902     int mb_x, mb_y;
3903     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3904 #ifdef HAVE_XVMC
3905     if(s->avctx->xvmc_acceleration){
3906         XVMC_decode_mb(s);//xvmc uses pblocks
3907         return;
3908     }
3909 #endif
3910
3911     mb_x = s->mb_x;
3912     mb_y = s->mb_y;
3913
3914     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3915        /* save DCT coefficients */
3916        int i,j;
3917        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3918        for(i=0; i<6; i++)
3919            for(j=0; j<64; j++)
3920                *dct++ = block[i][s->dsp.idct_permutation[j]];
3921     }
3922
3923     s->current_picture.qscale_table[mb_xy]= s->qscale;
3924
3925     /* update DC predictors for P macroblocks */
3926     if (!s->mb_intra) {
3927         if (s->h263_pred || s->h263_aic) {
3928             if(s->mbintra_table[mb_xy])
3929                 ff_clean_intra_table_entries(s);
3930         } else {
3931             s->last_dc[0] =
3932             s->last_dc[1] =
3933             s->last_dc[2] = 128 << s->intra_dc_precision;
3934         }
3935     }
3936     else if (s->h263_pred || s->h263_aic)
3937         s->mbintra_table[mb_xy]=1;
3938
3939     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
3940         uint8_t *dest_y, *dest_cb, *dest_cr;
3941         int dct_linesize, dct_offset;
3942         op_pixels_func (*op_pix)[4];
3943         qpel_mc_func (*op_qpix)[16];
3944         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3945         const int uvlinesize= s->current_picture.linesize[1];
3946         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3947         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3948
3949         /* avoid copy if macroblock skipped in last frame too */
3950         /* skip only during decoding as we might trash the buffers during encoding a bit */
3951         if(!s->encoding){
3952             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3953             const int age= s->current_picture.age;
3954
3955             assert(age);
3956
3957             if (s->mb_skipped) {
3958                 s->mb_skipped= 0;
3959                 assert(s->pict_type!=I_TYPE);
3960
3961                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3962                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3963
3964                 /* if previous was skipped too, then nothing to do !  */
3965                 if (*mbskip_ptr >= age && s->current_picture.reference){
3966                     return;
3967                 }
3968             } else if(!s->current_picture.reference){
3969                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3970                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3971             } else{
3972                 *mbskip_ptr = 0; /* not skipped */
3973             }
3974         }
3975
3976         dct_linesize = linesize << s->interlaced_dct;
3977         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3978
3979         if(readable){
3980             dest_y=  s->dest[0];
3981             dest_cb= s->dest[1];
3982             dest_cr= s->dest[2];
3983         }else{
3984             dest_y = s->b_scratchpad;
3985             dest_cb= s->b_scratchpad+16*linesize;
3986             dest_cr= s->b_scratchpad+32*linesize;
3987         }
3988
3989         if (!s->mb_intra) {
3990             /* motion handling */
3991             /* decoding or more than one mb_type (MC was already done otherwise) */
3992             if(!s->encoding){
3993                 if(lowres_flag){
3994                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3995
3996                     if (s->mv_dir & MV_DIR_FORWARD) {
3997                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3998                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3999                     }
4000                     if (s->mv_dir & MV_DIR_BACKWARD) {
4001                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
4002                     }
4003                 }else{
4004                     op_qpix= s->me.qpel_put;
4005                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
4006                         op_pix = s->dsp.put_pixels_tab;
4007                     }else{
4008                         op_pix = s->dsp.put_no_rnd_pixels_tab;
4009                     }
4010                     if (s->mv_dir & MV_DIR_FORWARD) {
4011                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4012                         op_pix = s->dsp.avg_pixels_tab;
4013                         op_qpix= s->me.qpel_avg;
4014                     }
4015                     if (s->mv_dir & MV_DIR_BACKWARD) {
4016                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4017                     }
4018                 }
4019             }
4020
4021             /* skip dequant / idct if we are really late ;) */
4022             if(s->hurry_up>1) goto skip_idct;
4023             if(s->avctx->skip_idct){
4024                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
4025                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
4026                    || s->avctx->skip_idct >= AVDISCARD_ALL)
4027                     goto skip_idct;
4028             }
4029
4030             /* add dct residue */
4031             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
4032                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
4033                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4034                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4035                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4036                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4037
4038                 if(!(s->flags&CODEC_FLAG_GRAY)){
4039                     if (s->chroma_y_shift){
4040                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4041                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4042                     }else{
4043                         dct_linesize >>= 1;
4044                         dct_offset >>=1;
4045                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4046                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4047                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4048                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4049                     }
4050                 }
4051             } else if(s->codec_id != CODEC_ID_WMV2){
4052                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
4053                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
4054                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
4055                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
4056
4057                 if(!(s->flags&CODEC_FLAG_GRAY)){
4058                     if(s->chroma_y_shift){//Chroma420
4059                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
4060                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
4061                     }else{
4062                         //chroma422
4063                         dct_linesize = uvlinesize << s->interlaced_dct;
4064                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4065
4066                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
4067                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
4068                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
4069                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
4070                         if(!s->chroma_x_shift){//Chroma444
4071                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
4072                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
4073                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
4074                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
4075                         }
4076                     }
4077                 }//fi gray
4078             }
4079             else{
4080                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
4081             }
4082         } else {
4083             /* dct only in intra block */
4084             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
4085                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4086                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4087                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4088                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4089
4090                 if(!(s->flags&CODEC_FLAG_GRAY)){
4091                     if(s->chroma_y_shift){
4092                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4093                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4094                     }else{
4095                         dct_offset >>=1;
4096                         dct_linesize >>=1;
4097                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4098                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4099                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4100                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4101                     }
4102                 }
4103             }else{
4104                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
4105                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
4106                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
4107                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
4108
4109                 if(!(s->flags&CODEC_FLAG_GRAY)){
4110                     if(s->chroma_y_shift){
4111                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
4112                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
4113                     }else{
4114
4115                         dct_linesize = uvlinesize << s->interlaced_dct;
4116                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4117
4118                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
4119                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
4120                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
4121                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
4122                         if(!s->chroma_x_shift){//Chroma444
4123                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
4124                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
4125                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
4126                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
4127                         }
4128                     }
4129                 }//gray
4130             }
4131         }
4132 skip_idct:
4133         if(!readable){
4134             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
4135             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
4136             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
4137         }
4138     }
4139 }
4140
4141 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
4142     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
4143     else                  MPV_decode_mb_internal(s, block, 0);
4144 }
4145
4146 #ifdef CONFIG_ENCODERS
4147
4148 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
4149 {
4150     static const char tab[64]=
4151         {3,2,2,1,1,1,1,1,
4152          1,1,1,1,1,1,1,1,
4153          1,1,1,1,1,1,1,1,
4154          0,0,0,0,0,0,0,0,
4155          0,0,0,0,0,0,0,0,
4156          0,0,0,0,0,0,0,0,
4157          0,0,0,0,0,0,0,0,
4158          0,0,0,0,0,0,0,0};
4159     int score=0;
4160     int run=0;
4161     int i;
4162     DCTELEM *block= s->block[n];
4163     const int last_index= s->block_last_index[n];
4164     int skip_dc;
4165
4166     if(threshold<0){
4167         skip_dc=0;
4168         threshold= -threshold;
4169     }else
4170         skip_dc=1;
4171
4172     /* are all which we could set to zero are allready zero? */
4173     if(last_index<=skip_dc - 1) return;
4174
4175     for(i=0; i<=last_index; i++){
4176         const int j = s->intra_scantable.permutated[i];
4177         const int level = FFABS(block[j]);
4178         if(level==1){
4179             if(skip_dc && i==0) continue;
4180             score+= tab[run];
4181             run=0;
4182         }else if(level>1){
4183             return;
4184         }else{
4185             run++;
4186         }
4187     }
4188     if(score >= threshold) return;
4189     for(i=skip_dc; i<=last_index; i++){
4190         const int j = s->intra_scantable.permutated[i];
4191         block[j]=0;
4192     }
4193     if(block[0]) s->block_last_index[n]= 0;
4194     else         s->block_last_index[n]= -1;
4195 }
4196
4197 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4198 {
4199     int i;
4200     const int maxlevel= s->max_qcoeff;
4201     const int minlevel= s->min_qcoeff;
4202     int overflow=0;
4203
4204     if(s->mb_intra){
4205         i=1; //skip clipping of intra dc
4206     }else
4207         i=0;
4208
4209     for(;i<=last_index; i++){
4210         const int j= s->intra_scantable.permutated[i];
4211         int level = block[j];
4212
4213         if     (level>maxlevel){
4214             level=maxlevel;
4215             overflow++;
4216         }else if(level<minlevel){
4217             level=minlevel;
4218             overflow++;
4219         }
4220
4221         block[j]= level;
4222     }
4223
4224     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4225         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4226 }
4227
4228 #endif //CONFIG_ENCODERS
4229
4230 /**
4231  *
4232  * @param h is the normal height, this will be reduced automatically if needed for the last row
4233  */
4234 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4235     if (s->avctx->draw_horiz_band) {
4236         AVFrame *src;
4237         int offset[4];
4238
4239         if(s->picture_structure != PICT_FRAME){
4240             h <<= 1;
4241             y <<= 1;
4242             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4243         }
4244
4245         h= FFMIN(h, s->avctx->height - y);
4246
4247         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4248             src= (AVFrame*)s->current_picture_ptr;
4249         else if(s->last_picture_ptr)
4250             src= (AVFrame*)s->last_picture_ptr;
4251         else
4252             return;
4253
4254         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4255             offset[0]=
4256             offset[1]=
4257             offset[2]=
4258             offset[3]= 0;
4259         }else{
4260             offset[0]= y * s->linesize;;
4261             offset[1]=
4262             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4263             offset[3]= 0;
4264         }
4265
4266         emms_c();
4267
4268         s->avctx->draw_horiz_band(s->avctx, src, offset,
4269                                   y, s->picture_structure, h);
4270     }
4271 }
4272
4273 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4274     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4275     const int uvlinesize= s->current_picture.linesize[1];
4276     const int mb_size= 4 - s->avctx->lowres;
4277
4278     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4279     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4280     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4281     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4282     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4283     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4284     //block_index is not used by mpeg2, so it is not affected by chroma_format
4285
4286     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4287     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4288     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4289
4290     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4291     {
4292         s->dest[0] += s->mb_y *   linesize << mb_size;
4293         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4294         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4295     }
4296 }
4297
4298 #ifdef CONFIG_ENCODERS
4299
4300 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4301     int x, y;
4302 //FIXME optimize
4303     for(y=0; y<8; y++){
4304         for(x=0; x<8; x++){
4305             int x2, y2;
4306             int sum=0;
4307             int sqr=0;
4308             int count=0;
4309
4310             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4311                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4312                     int v= ptr[x2 + y2*stride];
4313                     sum += v;
4314                     sqr += v*v;
4315                     count++;
4316                 }
4317             }
4318             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4319         }
4320     }
4321 }
4322
4323 static av_always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
4324 {
4325     int16_t weight[8][64];
4326     DCTELEM orig[8][64];
4327     const int mb_x= s->mb_x;
4328     const int mb_y= s->mb_y;
4329     int i;
4330     int skip_dct[8];
4331     int dct_offset   = s->linesize*8; //default for progressive frames
4332     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4333     int wrap_y, wrap_c;
4334
4335     for(i=0; i<mb_block_count; i++) skip_dct[i]=s->skipdct;
4336
4337     if(s->adaptive_quant){
4338         const int last_qp= s->qscale;
4339         const int mb_xy= mb_x + mb_y*s->mb_stride;
4340
4341         s->lambda= s->lambda_table[mb_xy];
4342         update_qscale(s);
4343
4344         if(!(s->flags&CODEC_FLAG_QP_RD)){
4345             s->qscale= s->current_picture_ptr->qscale_table[mb_xy];
4346             s->dquant= s->qscale - last_qp;
4347
4348             if(s->out_format==FMT_H263){
4349                 s->dquant= av_clip(s->dquant, -2, 2);
4350
4351                 if(s->codec_id==CODEC_ID_MPEG4){
4352                     if(!s->mb_intra){
4353                         if(s->pict_type == B_TYPE){
4354                             if(s->dquant&1 || s->mv_dir&MV_DIRECT)
4355                                 s->dquant= 0;
4356                         }
4357                         if(s->mv_type==MV_TYPE_8X8)
4358                             s->dquant=0;
4359                     }
4360                 }
4361             }
4362         }
4363         ff_set_qscale(s, last_qp + s->dquant);
4364     }else if(s->flags&CODEC_FLAG_QP_RD)
4365         ff_set_qscale(s, s->qscale + s->dquant);
4366
4367     wrap_y = s->linesize;
4368     wrap_c = s->uvlinesize;
4369     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4370     ptr_cb = s->new_picture.data[1] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4371     ptr_cr = s->new_picture.data[2] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4372
4373     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4374         uint8_t *ebuf= s->edge_emu_buffer + 32;
4375         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4376         ptr_y= ebuf;
4377         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4378         ptr_cb= ebuf+18*wrap_y;
4379         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4380         ptr_cr= ebuf+18*wrap_y+8;
4381     }
4382
4383     if (s->mb_intra) {
4384         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4385             int progressive_score, interlaced_score;
4386
4387             s->interlaced_dct=0;
4388             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4389                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4390
4391             if(progressive_score > 0){
4392                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4393                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4394                 if(progressive_score > interlaced_score){
4395                     s->interlaced_dct=1;
4396
4397                     dct_offset= wrap_y;
4398                     wrap_y<<=1;
4399                     if (s->chroma_format == CHROMA_422)
4400                         wrap_c<<=1;
4401                 }
4402             }
4403         }
4404
4405         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4406         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4407         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4408         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4409
4410         if(s->flags&CODEC_FLAG_GRAY){
4411             skip_dct[4]= 1;
4412             skip_dct[5]= 1;
4413         }else{
4414             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4415             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4416             if(!s->chroma_y_shift){ /* 422 */
4417                 s->dsp.get_pixels(s->block[6], ptr_cb + (dct_offset>>1), wrap_c);
4418                 s->dsp.get_pixels(s->block[7], ptr_cr + (dct_offset>>1), wrap_c);
4419             }
4420         }
4421     }else{
4422         op_pixels_func (*op_pix)[4];
4423         qpel_mc_func (*op_qpix)[16];
4424         uint8_t *dest_y, *dest_cb, *dest_cr;
4425
4426         dest_y  = s->dest[0];
4427         dest_cb = s->dest[1];
4428         dest_cr = s->dest[2];
4429
4430         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4431             op_pix = s->dsp.put_pixels_tab;
4432             op_qpix= s->dsp.put_qpel_pixels_tab;
4433         }else{
4434             op_pix = s->dsp.put_no_rnd_pixels_tab;
4435             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4436         }
4437
4438         if (s->mv_dir & MV_DIR_FORWARD) {
4439             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4440             op_pix = s->dsp.avg_pixels_tab;
4441             op_qpix= s->dsp.avg_qpel_pixels_tab;
4442         }
4443         if (s->mv_dir & MV_DIR_BACKWARD) {
4444             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4445         }
4446
4447         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4448             int progressive_score, interlaced_score;
4449
4450             s->interlaced_dct=0;
4451             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4452                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4453
4454             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4455
4456             if(progressive_score>0){
4457                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4458                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4459
4460                 if(progressive_score > interlaced_score){
4461                     s->interlaced_dct=1;
4462
4463                     dct_offset= wrap_y;
4464                     wrap_y<<=1;
4465                     if (s->chroma_format == CHROMA_422)
4466                         wrap_c<<=1;
4467                 }
4468             }
4469         }
4470
4471         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4472         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4473         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4474         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4475
4476         if(s->flags&CODEC_FLAG_GRAY){
4477             skip_dct[4]= 1;
4478             skip_dct[5]= 1;
4479         }else{
4480             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4481             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4482             if(!s->chroma_y_shift){ /* 422 */
4483                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset>>1), dest_cb + (dct_offset>>1), wrap_c);
4484                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset>>1), dest_cr + (dct_offset>>1), wrap_c);
4485             }
4486         }
4487         /* pre quantization */
4488         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4489             //FIXME optimize
4490             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4491             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4492             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4493             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4494             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4495             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4496             if(!s->chroma_y_shift){ /* 422 */
4497                 if(s->dsp.sad[1](NULL, ptr_cb +(dct_offset>>1), dest_cb +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[6]= 1;
4498                 if(s->dsp.sad[1](NULL, ptr_cr +(dct_offset>>1), dest_cr +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[7]= 1;
4499             }
4500         }
4501     }
4502
4503     if(s->avctx->quantizer_noise_shaping){
4504         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4505         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4506         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4507         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4508         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4509         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4510         if(!s->chroma_y_shift){ /* 422 */
4511             if(!skip_dct[6]) get_vissual_weight(weight[6], ptr_cb + (dct_offset>>1), wrap_c);
4512             if(!skip_dct[7]) get_vissual_weight(weight[7], ptr_cr + (dct_offset>>1), wrap_c);
4513         }
4514         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*mb_block_count);
4515     }
4516
4517     /* DCT & quantize */
4518     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4519     {
4520         for(i=0;i<mb_block_count;i++) {
4521             if(!skip_dct[i]){
4522                 int overflow;
4523                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4524             // FIXME we could decide to change to quantizer instead of clipping
4525             // JS: I don't think that would be a good idea it could lower quality instead
4526             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4527                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4528             }else
4529                 s->block_last_index[i]= -1;
4530         }
4531         if(s->avctx->quantizer_noise_shaping){
4532             for(i=0;i<mb_block_count;i++) {
4533                 if(!skip_dct[i]){
4534                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4535                 }
4536             }
4537         }
4538
4539         if(s->luma_elim_threshold && !s->mb_intra)
4540             for(i=0; i<4; i++)
4541                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4542         if(s->chroma_elim_threshold && !s->mb_intra)
4543             for(i=4; i<mb_block_count; i++)
4544                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4545
4546         if(s->flags & CODEC_FLAG_CBP_RD){
4547             for(i=0;i<mb_block_count;i++) {
4548                 if(s->block_last_index[i] == -1)
4549                     s->coded_score[i]= INT_MAX/256;
4550             }
4551         }
4552     }
4553
4554     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4555         s->block_last_index[4]=
4556         s->block_last_index[5]= 0;
4557         s->block[4][0]=
4558         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4559     }
4560
4561     //non c quantize code returns incorrect block_last_index FIXME
4562     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4563         for(i=0; i<mb_block_count; i++){
4564             int j;
4565             if(s->block_last_index[i]>0){
4566                 for(j=63; j>0; j--){
4567                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4568                 }
4569                 s->block_last_index[i]= j;
4570             }
4571         }
4572     }
4573
4574     /* huffman encode */
4575     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4576     case CODEC_ID_MPEG1VIDEO:
4577     case CODEC_ID_MPEG2VIDEO:
4578         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4579     case CODEC_ID_MPEG4:
4580         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4581     case CODEC_ID_MSMPEG4V2:
4582     case CODEC_ID_MSMPEG4V3:
4583     case CODEC_ID_WMV1:
4584         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4585     case CODEC_ID_WMV2:
4586          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4587 #ifdef CONFIG_H261_ENCODER
4588     case CODEC_ID_H261:
4589         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4590 #endif
4591     case CODEC_ID_H263:
4592     case CODEC_ID_H263P:
4593     case CODEC_ID_FLV1:
4594     case CODEC_ID_RV10:
4595     case CODEC_ID_RV20:
4596         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4597     case CODEC_ID_MJPEG:
4598         mjpeg_encode_mb(s, s->block); break;
4599     default:
4600         assert(0);
4601     }
4602 }
4603
4604 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4605 {
4606     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
4607     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
4608 }
4609
4610 #endif //CONFIG_ENCODERS
4611
4612 void ff_mpeg_flush(AVCodecContext *avctx){
4613     int i;
4614     MpegEncContext *s = avctx->priv_data;
4615
4616     if(s==NULL || s->picture==NULL)
4617         return;
4618
4619     for(i=0; i<MAX_PICTURE_COUNT; i++){
4620        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4621                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4622         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4623     }
4624     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4625
4626     s->mb_x= s->mb_y= 0;
4627
4628     s->parse_context.state= -1;
4629     s->parse_context.frame_start_found= 0;
4630     s->parse_context.overread= 0;
4631     s->parse_context.overread_index= 0;
4632     s->parse_context.index= 0;
4633     s->parse_context.last_index= 0;
4634     s->bitstream_buffer_size=0;
4635     s->pp_time=0;
4636 }
4637
4638 #ifdef CONFIG_ENCODERS
4639 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4640 {
4641     const uint16_t *srcw= (uint16_t*)src;
4642     int words= length>>4;
4643     int bits= length&15;
4644     int i;
4645
4646     if(length==0) return;
4647
4648     if(words < 16){
4649         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4650     }else if(put_bits_count(pb)&7){
4651         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4652     }else{
4653         for(i=0; put_bits_count(pb)&31; i++)
4654             put_bits(pb, 8, src[i]);
4655         flush_put_bits(pb);
4656         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4657         skip_put_bytes(pb, 2*words-i);
4658     }
4659
4660     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4661 }
4662
4663 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4664     int i;
4665
4666     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4667
4668     /* mpeg1 */
4669     d->mb_skip_run= s->mb_skip_run;
4670     for(i=0; i<3; i++)
4671         d->last_dc[i]= s->last_dc[i];
4672
4673     /* statistics */
4674     d->mv_bits= s->mv_bits;
4675     d->i_tex_bits= s->i_tex_bits;
4676     d->p_tex_bits= s->p_tex_bits;
4677     d->i_count= s->i_count;
4678     d->f_count= s->f_count;
4679     d->b_count= s->b_count;
4680     d->skip_count= s->skip_count;
4681     d->misc_bits= s->misc_bits;
4682     d->last_bits= 0;
4683
4684     d->mb_skipped= 0;
4685     d->qscale= s->qscale;
4686     d->dquant= s->dquant;
4687 }
4688
4689 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4690     int i;
4691
4692     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4693     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4694
4695     /* mpeg1 */
4696     d->mb_skip_run= s->mb_skip_run;
4697     for(i=0; i<3; i++)
4698         d->last_dc[i]= s->last_dc[i];
4699
4700     /* statistics */
4701     d->mv_bits= s->mv_bits;
4702     d->i_tex_bits= s->i_tex_bits;
4703     d->p_tex_bits= s->p_tex_bits;
4704     d->i_count= s->i_count;
4705     d->f_count= s->f_count;
4706     d->b_count= s->b_count;
4707     d->skip_count= s->skip_count;
4708     d->misc_bits= s->misc_bits;
4709
4710     d->mb_intra= s->mb_intra;
4711     d->mb_skipped= s->mb_skipped;
4712     d->mv_type= s->mv_type;
4713     d->mv_dir= s->mv_dir;
4714     d->pb= s->pb;
4715     if(s->data_partitioning){
4716         d->pb2= s->pb2;
4717         d->tex_pb= s->tex_pb;
4718     }
4719     d->block= s->block;
4720     for(i=0; i<8; i++)
4721         d->block_last_index[i]= s->block_last_index[i];
4722     d->interlaced_dct= s->interlaced_dct;
4723     d->qscale= s->qscale;
4724 }
4725
4726 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4727                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4728                            int *dmin, int *next_block, int motion_x, int motion_y)
4729 {
4730     int score;
4731     uint8_t *dest_backup[3];
4732
4733     copy_context_before_encode(s, backup, type);
4734
4735     s->block= s->blocks[*next_block];
4736     s->pb= pb[*next_block];
4737     if(s->data_partitioning){
4738         s->pb2   = pb2   [*next_block];
4739         s->tex_pb= tex_pb[*next_block];
4740     }
4741
4742     if(*next_block){
4743         memcpy(dest_backup, s->dest, sizeof(s->dest));
4744         s->dest[0] = s->rd_scratchpad;
4745         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4746         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4747         assert(s->linesize >= 32); //FIXME
4748     }
4749
4750     encode_mb(s, motion_x, motion_y);
4751
4752     score= put_bits_count(&s->pb);
4753     if(s->data_partitioning){
4754         score+= put_bits_count(&s->pb2);
4755         score+= put_bits_count(&s->tex_pb);
4756     }
4757
4758     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4759         MPV_decode_mb(s, s->block);
4760
4761         score *= s->lambda2;
4762         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4763     }
4764
4765     if(*next_block){
4766         memcpy(s->dest, dest_backup, sizeof(s->dest));
4767     }
4768
4769     if(score<*dmin){
4770         *dmin= score;
4771         *next_block^=1;
4772
4773         copy_context_after_encode(best, s, type);
4774     }
4775 }
4776
4777 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4778     uint32_t *sq = ff_squareTbl + 256;
4779     int acc=0;
4780     int x,y;
4781
4782     if(w==16 && h==16)
4783         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4784     else if(w==8 && h==8)
4785         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4786
4787     for(y=0; y<h; y++){
4788         for(x=0; x<w; x++){
4789             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4790         }
4791     }
4792
4793     assert(acc>=0);
4794
4795     return acc;
4796 }
4797
4798 static int sse_mb(MpegEncContext *s){
4799     int w= 16;
4800     int h= 16;
4801
4802     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4803     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4804
4805     if(w==16 && h==16)
4806       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4807         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4808                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4809                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4810       }else{
4811         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4812                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4813                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4814       }
4815     else
4816         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4817                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4818                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4819 }
4820
4821 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4822     MpegEncContext *s= arg;
4823
4824
4825     s->me.pre_pass=1;
4826     s->me.dia_size= s->avctx->pre_dia_size;
4827     s->first_slice_line=1;
4828     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4829         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4830             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4831         }
4832         s->first_slice_line=0;
4833     }
4834
4835     s->me.pre_pass=0;
4836
4837     return 0;
4838 }
4839
4840 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4841     MpegEncContext *s= arg;
4842
4843     ff_check_alignment();
4844
4845     s->me.dia_size= s->avctx->dia_size;
4846     s->first_slice_line=1;
4847     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4848         s->mb_x=0; //for block init below
4849         ff_init_block_index(s);
4850         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4851             s->block_index[0]+=2;
4852             s->block_index[1]+=2;
4853             s->block_index[2]+=2;
4854             s->block_index[3]+=2;
4855
4856             /* compute motion vector & mb_type and store in context */
4857             if(s->pict_type==B_TYPE)
4858                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4859             else
4860                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4861         }
4862         s->first_slice_line=0;
4863     }
4864     return 0;
4865 }
4866
4867 static int mb_var_thread(AVCodecContext *c, void *arg){
4868     MpegEncContext *s= arg;
4869     int mb_x, mb_y;
4870
4871     ff_check_alignment();
4872
4873     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4874         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4875             int xx = mb_x * 16;
4876             int yy = mb_y * 16;
4877             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4878             int varc;
4879             int sum = s->dsp.pix_sum(pix, s->linesize);
4880
4881             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4882
4883             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4884             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4885             s->me.mb_var_sum_temp    += varc;
4886         }
4887     }
4888     return 0;
4889 }
4890
4891 static void write_slice_end(MpegEncContext *s){
4892     if(s->codec_id==CODEC_ID_MPEG4){
4893         if(s->partitioned_frame){
4894             ff_mpeg4_merge_partitions(s);
4895         }
4896
4897         ff_mpeg4_stuffing(&s->pb);
4898     }else if(s->out_format == FMT_MJPEG){
4899         ff_mjpeg_stuffing(&s->pb);
4900     }
4901
4902     align_put_bits(&s->pb);
4903     flush_put_bits(&s->pb);
4904
4905     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4906         s->misc_bits+= get_bits_diff(s);
4907 }
4908
4909 static int encode_thread(AVCodecContext *c, void *arg){
4910     MpegEncContext *s= arg;
4911     int mb_x, mb_y, pdif = 0;
4912     int i, j;
4913     MpegEncContext best_s, backup_s;
4914     uint8_t bit_buf[2][MAX_MB_BYTES];
4915     uint8_t bit_buf2[2][MAX_MB_BYTES];
4916     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4917     PutBitContext pb[2], pb2[2], tex_pb[2];
4918 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4919
4920     ff_check_alignment();
4921
4922     for(i=0; i<2; i++){
4923         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4924         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4925         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4926     }
4927
4928     s->last_bits= put_bits_count(&s->pb);
4929     s->mv_bits=0;
4930     s->misc_bits=0;
4931     s->i_tex_bits=0;
4932     s->p_tex_bits=0;
4933     s->i_count=0;
4934     s->f_count=0;
4935     s->b_count=0;
4936     s->skip_count=0;
4937
4938     for(i=0; i<3; i++){
4939         /* init last dc values */
4940         /* note: quant matrix value (8) is implied here */
4941         s->last_dc[i] = 128 << s->intra_dc_precision;
4942
4943         s->current_picture.error[i] = 0;
4944     }
4945     s->mb_skip_run = 0;
4946     memset(s->last_mv, 0, sizeof(s->last_mv));
4947
4948     s->last_mv_dir = 0;
4949
4950     switch(s->codec_id){
4951     case CODEC_ID_H263:
4952     case CODEC_ID_H263P:
4953     case CODEC_ID_FLV1:
4954         s->gob_index = ff_h263_get_gob_height(s);
4955         break;
4956     case CODEC_ID_MPEG4:
4957         if(s->partitioned_frame)
4958             ff_mpeg4_init_partitions(s);
4959         break;
4960     }
4961
4962     s->resync_mb_x=0;
4963     s->resync_mb_y=0;
4964     s->first_slice_line = 1;
4965     s->ptr_lastgob = s->pb.buf;
4966     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4967 //    printf("row %d at %X\n", s->mb_y, (int)s);
4968         s->mb_x=0;
4969         s->mb_y= mb_y;
4970
4971         ff_set_qscale(s, s->qscale);
4972         ff_init_block_index(s);
4973
4974         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4975             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4976             int mb_type= s->mb_type[xy];
4977 //            int d;
4978             int dmin= INT_MAX;
4979             int dir;
4980
4981             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4982                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4983                 return -1;
4984             }
4985             if(s->data_partitioning){
4986                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4987                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4988                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4989                     return -1;
4990                 }
4991             }
4992
4993             s->mb_x = mb_x;
4994             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4995             ff_update_block_index(s);
4996
4997 #ifdef CONFIG_H261_ENCODER
4998             if(s->codec_id == CODEC_ID_H261){
4999                 ff_h261_reorder_mb_index(s);
5000                 xy= s->mb_y*s->mb_stride + s->mb_x;
5001                 mb_type= s->mb_type[xy];
5002             }
5003 #endif
5004
5005             /* write gob / video packet header  */
5006             if(s->rtp_mode){
5007                 int current_packet_size, is_gob_start;
5008
5009                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
5010
5011                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
5012
5013                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
5014
5015                 switch(s->codec_id){
5016                 case CODEC_ID_H263:
5017                 case CODEC_ID_H263P:
5018                     if(!s->h263_slice_structured)
5019                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
5020                     break;
5021                 case CODEC_ID_MPEG2VIDEO:
5022                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
5023                 case CODEC_ID_MPEG1VIDEO:
5024                     if(s->mb_skip_run) is_gob_start=0;
5025                     break;
5026                 }
5027
5028                 if(is_gob_start){
5029                     if(s->start_mb_y != mb_y || mb_x!=0){
5030                         write_slice_end(s);
5031
5032                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
5033                             ff_mpeg4_init_partitions(s);
5034                         }
5035                     }
5036
5037                     assert((put_bits_count(&s->pb)&7) == 0);
5038                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
5039
5040                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
5041                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
5042                         int d= 100 / s->avctx->error_rate;
5043                         if(r % d == 0){
5044                             current_packet_size=0;
5045 #ifndef ALT_BITSTREAM_WRITER
5046                             s->pb.buf_ptr= s->ptr_lastgob;
5047 #endif
5048                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
5049                         }
5050                     }
5051
5052                     if (s->avctx->rtp_callback){
5053                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
5054                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
5055                     }
5056
5057                     switch(s->codec_id){
5058                     case CODEC_ID_MPEG4:
5059                         ff_mpeg4_encode_video_packet_header(s);
5060                         ff_mpeg4_clean_buffers(s);
5061                     break;
5062                     case CODEC_ID_MPEG1VIDEO:
5063                     case CODEC_ID_MPEG2VIDEO:
5064                         ff_mpeg1_encode_slice_header(s);
5065                         ff_mpeg1_clean_buffers(s);
5066                     break;
5067                     case CODEC_ID_H263:
5068                     case CODEC_ID_H263P:
5069                         h263_encode_gob_header(s, mb_y);
5070                     break;
5071                     }
5072
5073                     if(s->flags&CODEC_FLAG_PASS1){
5074                         int bits= put_bits_count(&s->pb);
5075                         s->misc_bits+= bits - s->last_bits;
5076                         s->last_bits= bits;
5077                     }
5078
5079                     s->ptr_lastgob += current_packet_size;
5080                     s->first_slice_line=1;
5081                     s->resync_mb_x=mb_x;
5082                     s->resync_mb_y=mb_y;
5083                 }
5084             }
5085
5086             if(  (s->resync_mb_x   == s->mb_x)
5087                && s->resync_mb_y+1 == s->mb_y){
5088                 s->first_slice_line=0;
5089             }
5090
5091             s->mb_skipped=0;
5092             s->dquant=0; //only for QP_RD
5093
5094             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
5095                 int next_block=0;
5096                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
5097
5098                 copy_context_before_encode(&backup_s, s, -1);
5099                 backup_s.pb= s->pb;
5100                 best_s.data_partitioning= s->data_partitioning;
5101                 best_s.partitioned_frame= s->partitioned_frame;
5102                 if(s->data_partitioning){
5103                     backup_s.pb2= s->pb2;
5104                     backup_s.tex_pb= s->tex_pb;
5105                 }
5106
5107                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
5108                     s->mv_dir = MV_DIR_FORWARD;
5109                     s->mv_type = MV_TYPE_16X16;
5110                     s->mb_intra= 0;
5111                     s->mv[0][0][0] = s->p_mv_table[xy][0];
5112                     s->mv[0][0][1] = s->p_mv_table[xy][1];
5113                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
5114                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5115                 }
5116                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
5117                     s->mv_dir = MV_DIR_FORWARD;
5118                     s->mv_type = MV_TYPE_FIELD;
5119                     s->mb_intra= 0;
5120                     for(i=0; i<2; i++){
5121                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5122                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5123                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5124                     }
5125                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
5126                                  &dmin, &next_block, 0, 0);
5127                 }
5128                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
5129                     s->mv_dir = MV_DIR_FORWARD;
5130                     s->mv_type = MV_TYPE_16X16;
5131                     s->mb_intra= 0;
5132                     s->mv[0][0][0] = 0;
5133                     s->mv[0][0][1] = 0;
5134                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
5135                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5136                 }
5137                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
5138                     s->mv_dir = MV_DIR_FORWARD;
5139                     s->mv_type = MV_TYPE_8X8;
5140                     s->mb_intra= 0;
5141                     for(i=0; i<4; i++){
5142                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5143                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5144                     }
5145                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
5146                                  &dmin, &next_block, 0, 0);
5147                 }
5148                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
5149                     s->mv_dir = MV_DIR_FORWARD;
5150                     s->mv_type = MV_TYPE_16X16;
5151                     s->mb_intra= 0;
5152                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5153                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5154                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
5155                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5156                 }
5157                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
5158                     s->mv_dir = MV_DIR_BACKWARD;
5159                     s->mv_type = MV_TYPE_16X16;
5160                     s->mb_intra= 0;
5161                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5162                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5163                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
5164                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
5165                 }
5166                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
5167                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5168                     s->mv_type = MV_TYPE_16X16;
5169                     s->mb_intra= 0;
5170                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5171                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5172                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5173                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5174                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
5175                                  &dmin, &next_block, 0, 0);
5176                 }
5177                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
5178                     s->mv_dir = MV_DIR_FORWARD;
5179                     s->mv_type = MV_TYPE_FIELD;
5180                     s->mb_intra= 0;
5181                     for(i=0; i<2; i++){
5182                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5183                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5184                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5185                     }
5186                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
5187                                  &dmin, &next_block, 0, 0);
5188                 }
5189                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
5190                     s->mv_dir = MV_DIR_BACKWARD;
5191                     s->mv_type = MV_TYPE_FIELD;
5192                     s->mb_intra= 0;
5193                     for(i=0; i<2; i++){
5194                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5195                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5196                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5197                     }
5198                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
5199                                  &dmin, &next_block, 0, 0);
5200                 }
5201                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
5202                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5203                     s->mv_type = MV_TYPE_FIELD;
5204                     s->mb_intra= 0;
5205                     for(dir=0; dir<2; dir++){
5206                         for(i=0; i<2; i++){
5207                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5208                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5209                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5210                         }
5211                     }
5212                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
5213                                  &dmin, &next_block, 0, 0);
5214                 }
5215                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
5216                     s->mv_dir = 0;
5217                     s->mv_type = MV_TYPE_16X16;
5218                     s->mb_intra= 1;
5219                     s->mv[0][0][0] = 0;
5220                     s->mv[0][0][1] = 0;
5221                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
5222                                  &dmin, &next_block, 0, 0);
5223                     if(s->h263_pred || s->h263_aic){
5224                         if(best_s.mb_intra)
5225                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
5226                         else
5227                             ff_clean_intra_table_entries(s); //old mode?
5228                     }
5229                 }
5230
5231                 if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){
5232                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
5233                         const int last_qp= backup_s.qscale;
5234                         int qpi, qp, dc[6];
5235                         DCTELEM ac[6][16];
5236                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
5237                         static const int dquant_tab[4]={-1,1,-2,2};
5238
5239                         assert(backup_s.dquant == 0);
5240
5241                         //FIXME intra
5242                         s->mv_dir= best_s.mv_dir;
5243                         s->mv_type = MV_TYPE_16X16;
5244                         s->mb_intra= best_s.mb_intra;
5245                         s->mv[0][0][0] = best_s.mv[0][0][0];
5246                         s->mv[0][0][1] = best_s.mv[0][0][1];
5247                         s->mv[1][0][0] = best_s.mv[1][0][0];
5248                         s->mv[1][0][1] = best_s.mv[1][0][1];
5249
5250                         qpi = s->pict_type == B_TYPE ? 2 : 0;
5251                         for(; qpi<4; qpi++){
5252                             int dquant= dquant_tab[qpi];
5253                             qp= last_qp + dquant;
5254                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
5255                                 continue;
5256                             backup_s.dquant= dquant;
5257                             if(s->mb_intra && s->dc_val[0]){
5258                                 for(i=0; i<6; i++){
5259                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
5260                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
5261                                 }
5262                             }
5263
5264                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5265                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
5266                             if(best_s.qscale != qp){
5267                                 if(s->mb_intra && s->dc_val[0]){
5268                                     for(i=0; i<6; i++){
5269                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
5270                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
5271                                     }
5272                                 }
5273                             }
5274                         }
5275                     }
5276                 }
5277                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
5278                     int mx= s->b_direct_mv_table[xy][0];
5279                     int my= s->b_direct_mv_table[xy][1];
5280
5281                     backup_s.dquant = 0;
5282                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5283                     s->mb_intra= 0;
5284                     ff_mpeg4_set_direct_mv(s, mx, my);
5285                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5286                                  &dmin, &next_block, mx, my);
5287                 }
5288                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT0){
5289                     backup_s.dquant = 0;
5290                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5291                     s->mb_intra= 0;
5292                     ff_mpeg4_set_direct_mv(s, 0, 0);
5293                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5294                                  &dmin, &next_block, 0, 0);
5295                 }
5296                 if(!best_s.mb_intra && s->flags2&CODEC_FLAG2_SKIP_RD){
5297                     int coded=0;
5298                     for(i=0; i<6; i++)
5299                         coded |= s->block_last_index[i];
5300                     if(coded){
5301                         int mx,my;
5302                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
5303                         if(best_s.mv_dir & MV_DIRECT){
5304                             mx=my=0; //FIXME find the one we actually used
5305                             ff_mpeg4_set_direct_mv(s, mx, my);
5306                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
5307                             mx= s->mv[1][0][0];
5308                             my= s->mv[1][0][1];
5309                         }else{
5310                             mx= s->mv[0][0][0];
5311                             my= s->mv[0][0][1];
5312                         }
5313
5314                         s->mv_dir= best_s.mv_dir;
5315                         s->mv_type = best_s.mv_type;
5316                         s->mb_intra= 0;
5317 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
5318                         s->mv[0][0][1] = best_s.mv[0][0][1];
5319                         s->mv[1][0][0] = best_s.mv[1][0][0];
5320                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
5321                         backup_s.dquant= 0;
5322                         s->skipdct=1;
5323                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5324                                         &dmin, &next_block, mx, my);
5325                         s->skipdct=0;
5326                     }
5327                 }
5328
5329                 s->current_picture.qscale_table[xy]= best_s.qscale;
5330
5331                 copy_context_after_encode(s, &best_s, -1);
5332
5333                 pb_bits_count= put_bits_count(&s->pb);
5334                 flush_put_bits(&s->pb);
5335                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5336                 s->pb= backup_s.pb;
5337
5338                 if(s->data_partitioning){
5339                     pb2_bits_count= put_bits_count(&s->pb2);
5340                     flush_put_bits(&s->pb2);
5341                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5342                     s->pb2= backup_s.pb2;
5343
5344                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5345                     flush_put_bits(&s->tex_pb);
5346                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5347                     s->tex_pb= backup_s.tex_pb;
5348                 }
5349                 s->last_bits= put_bits_count(&s->pb);
5350
5351                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5352                     ff_h263_update_motion_val(s);
5353
5354                 if(next_block==0){ //FIXME 16 vs linesize16
5355                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5356                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5357                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5358                 }
5359
5360                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5361                     MPV_decode_mb(s, s->block);
5362             } else {
5363                 int motion_x = 0, motion_y = 0;
5364                 s->mv_type=MV_TYPE_16X16;
5365                 // only one MB-Type possible
5366
5367                 switch(mb_type){
5368                 case CANDIDATE_MB_TYPE_INTRA:
5369                     s->mv_dir = 0;
5370                     s->mb_intra= 1;
5371                     motion_x= s->mv[0][0][0] = 0;
5372                     motion_y= s->mv[0][0][1] = 0;
5373                     break;
5374                 case CANDIDATE_MB_TYPE_INTER:
5375                     s->mv_dir = MV_DIR_FORWARD;
5376                     s->mb_intra= 0;
5377                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5378                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5379                     break;
5380                 case CANDIDATE_MB_TYPE_INTER_I:
5381                     s->mv_dir = MV_DIR_FORWARD;
5382                     s->mv_type = MV_TYPE_FIELD;
5383                     s->mb_intra= 0;
5384                     for(i=0; i<2; i++){
5385                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5386                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5387                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5388                     }
5389                     break;
5390                 case CANDIDATE_MB_TYPE_INTER4V:
5391                     s->mv_dir = MV_DIR_FORWARD;
5392                     s->mv_type = MV_TYPE_8X8;
5393                     s->mb_intra= 0;
5394                     for(i=0; i<4; i++){
5395                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5396                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5397                     }
5398                     break;
5399                 case CANDIDATE_MB_TYPE_DIRECT:
5400                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5401                     s->mb_intra= 0;
5402                     motion_x=s->b_direct_mv_table[xy][0];
5403                     motion_y=s->b_direct_mv_table[xy][1];
5404                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5405                     break;
5406                 case CANDIDATE_MB_TYPE_DIRECT0:
5407                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5408                     s->mb_intra= 0;
5409                     ff_mpeg4_set_direct_mv(s, 0, 0);
5410                     break;
5411                 case CANDIDATE_MB_TYPE_BIDIR:
5412                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5413                     s->mb_intra= 0;
5414                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5415                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5416                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5417                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5418                     break;
5419                 case CANDIDATE_MB_TYPE_BACKWARD:
5420                     s->mv_dir = MV_DIR_BACKWARD;
5421                     s->mb_intra= 0;
5422                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5423                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5424                     break;
5425                 case CANDIDATE_MB_TYPE_FORWARD:
5426                     s->mv_dir = MV_DIR_FORWARD;
5427                     s->mb_intra= 0;
5428                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5429                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5430 //                    printf(" %d %d ", motion_x, motion_y);
5431                     break;
5432                 case CANDIDATE_MB_TYPE_FORWARD_I:
5433                     s->mv_dir = MV_DIR_FORWARD;
5434                     s->mv_type = MV_TYPE_FIELD;
5435                     s->mb_intra= 0;
5436                     for(i=0; i<2; i++){
5437                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5438                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5439                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5440                     }
5441                     break;
5442                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5443                     s->mv_dir = MV_DIR_BACKWARD;
5444                     s->mv_type = MV_TYPE_FIELD;
5445                     s->mb_intra= 0;
5446                     for(i=0; i<2; i++){
5447                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5448                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5449                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5450                     }
5451                     break;
5452                 case CANDIDATE_MB_TYPE_BIDIR_I:
5453                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5454                     s->mv_type = MV_TYPE_FIELD;
5455                     s->mb_intra= 0;
5456                     for(dir=0; dir<2; dir++){
5457                         for(i=0; i<2; i++){
5458                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5459                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5460                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5461                         }
5462                     }
5463                     break;
5464                 default:
5465                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5466                 }
5467
5468                 encode_mb(s, motion_x, motion_y);
5469
5470                 // RAL: Update last macroblock type
5471                 s->last_mv_dir = s->mv_dir;
5472
5473                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5474                     ff_h263_update_motion_val(s);
5475
5476                 MPV_decode_mb(s, s->block);
5477             }
5478
5479             /* clean the MV table in IPS frames for direct mode in B frames */
5480             if(s->mb_intra /* && I,P,S_TYPE */){
5481                 s->p_mv_table[xy][0]=0;
5482                 s->p_mv_table[xy][1]=0;
5483             }
5484
5485             if(s->flags&CODEC_FLAG_PSNR){
5486                 int w= 16;
5487                 int h= 16;
5488
5489                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5490                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5491
5492                 s->current_picture.error[0] += sse(
5493                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5494                     s->dest[0], w, h, s->linesize);
5495                 s->current_picture.error[1] += sse(
5496                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5497                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5498                 s->current_picture.error[2] += sse(
5499                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5500                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5501             }
5502             if(s->loop_filter){
5503                 if(s->out_format == FMT_H263)
5504                     ff_h263_loop_filter(s);
5505             }
5506 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5507         }
5508     }
5509
5510     //not beautiful here but we must write it before flushing so it has to be here
5511     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5512         msmpeg4_encode_ext_header(s);
5513
5514     write_slice_end(s);
5515
5516     /* Send the last GOB if RTP */
5517     if (s->avctx->rtp_callback) {
5518         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5519         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5520         /* Call the RTP callback to send the last GOB */
5521         emms_c();
5522         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5523     }
5524
5525     return 0;
5526 }
5527
5528 #define MERGE(field) dst->field += src->field; src->field=0
5529 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5530     MERGE(me.scene_change_score);
5531     MERGE(me.mc_mb_var_sum_temp);
5532     MERGE(me.mb_var_sum_temp);
5533 }
5534
5535 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5536     int i;
5537
5538     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5539     MERGE(dct_count[1]);
5540     MERGE(mv_bits);
5541     MERGE(i_tex_bits);
5542     MERGE(p_tex_bits);
5543     MERGE(i_count);
5544     MERGE(f_count);
5545     MERGE(b_count);
5546     MERGE(skip_count);
5547     MERGE(misc_bits);
5548     MERGE(error_count);
5549     MERGE(padding_bug_score);
5550     MERGE(current_picture.error[0]);
5551     MERGE(current_picture.error[1]);
5552     MERGE(current_picture.error[2]);
5553
5554     if(dst->avctx->noise_reduction){
5555         for(i=0; i<64; i++){
5556             MERGE(dct_error_sum[0][i]);
5557             MERGE(dct_error_sum[1][i]);
5558         }
5559     }
5560
5561     assert(put_bits_count(&src->pb) % 8 ==0);
5562     assert(put_bits_count(&dst->pb) % 8 ==0);
5563     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5564     flush_put_bits(&dst->pb);
5565 }
5566
5567 static int estimate_qp(MpegEncContext *s, int dry_run){
5568     if (s->next_lambda){
5569         s->current_picture_ptr->quality=
5570         s->current_picture.quality = s->next_lambda;
5571         if(!dry_run) s->next_lambda= 0;
5572     } else if (!s->fixed_qscale) {
5573         s->current_picture_ptr->quality=
5574         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
5575         if (s->current_picture.quality < 0)
5576             return -1;
5577     }
5578
5579     if(s->adaptive_quant){
5580         switch(s->codec_id){
5581         case CODEC_ID_MPEG4:
5582             ff_clean_mpeg4_qscales(s);
5583             break;
5584         case CODEC_ID_H263:
5585         case CODEC_ID_H263P:
5586         case CODEC_ID_FLV1:
5587             ff_clean_h263_qscales(s);
5588             break;
5589         }
5590
5591         s->lambda= s->lambda_table[0];
5592         //FIXME broken
5593     }else
5594         s->lambda= s->current_picture.quality;
5595 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5596     update_qscale(s);
5597     return 0;
5598 }
5599
5600 static int encode_picture(MpegEncContext *s, int picture_number)
5601 {
5602     int i;
5603     int bits;
5604
5605     s->picture_number = picture_number;
5606
5607     /* Reset the average MB variance */
5608     s->me.mb_var_sum_temp    =
5609     s->me.mc_mb_var_sum_temp = 0;
5610
5611     /* we need to initialize some time vars before we can encode b-frames */
5612     // RAL: Condition added for MPEG1VIDEO
5613     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5614         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5615
5616     s->me.scene_change_score=0;
5617
5618 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5619
5620     if(s->pict_type==I_TYPE){
5621         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5622         else                        s->no_rounding=0;
5623     }else if(s->pict_type!=B_TYPE){
5624         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5625             s->no_rounding ^= 1;
5626     }
5627
5628     if(s->flags & CODEC_FLAG_PASS2){
5629         if (estimate_qp(s,1) < 0)
5630             return -1;
5631         ff_get_2pass_fcode(s);
5632     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
5633         if(s->pict_type==B_TYPE)
5634             s->lambda= s->last_lambda_for[s->pict_type];
5635         else
5636             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
5637         update_qscale(s);
5638     }
5639
5640     s->mb_intra=0; //for the rate distortion & bit compare functions
5641     for(i=1; i<s->avctx->thread_count; i++){
5642         ff_update_duplicate_context(s->thread_context[i], s);
5643     }
5644
5645     ff_init_me(s);
5646
5647     /* Estimate motion for every MB */
5648     if(s->pict_type != I_TYPE){
5649         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5650         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
5651         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5652             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5653                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5654             }
5655         }
5656
5657         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5658     }else /* if(s->pict_type == I_TYPE) */{
5659         /* I-Frame */
5660         for(i=0; i<s->mb_stride*s->mb_height; i++)
5661             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5662
5663         if(!s->fixed_qscale){
5664             /* finding spatial complexity for I-frame rate control */
5665             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5666         }
5667     }
5668     for(i=1; i<s->avctx->thread_count; i++){
5669         merge_context_after_me(s, s->thread_context[i]);
5670     }
5671     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5672     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5673     emms_c();
5674
5675     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5676         s->pict_type= I_TYPE;
5677         for(i=0; i<s->mb_stride*s->mb_height; i++)
5678             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5679 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5680     }
5681
5682     if(!s->umvplus){
5683         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5684             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5685
5686             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5687                 int a,b;
5688                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5689                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5690                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5691             }
5692
5693             ff_fix_long_p_mvs(s);
5694             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5695             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5696                 int j;
5697                 for(i=0; i<2; i++){
5698                     for(j=0; j<2; j++)
5699                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5700                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5701                 }
5702             }
5703         }
5704
5705         if(s->pict_type==B_TYPE){
5706             int a, b;
5707
5708             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5709             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5710             s->f_code = FFMAX(a, b);
5711
5712             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5713             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5714             s->b_code = FFMAX(a, b);
5715
5716             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5717             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5718             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5719             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5720             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5721                 int dir, j;
5722                 for(dir=0; dir<2; dir++){
5723                     for(i=0; i<2; i++){
5724                         for(j=0; j<2; j++){
5725                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5726                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5727                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5728                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5729                         }
5730                     }
5731                 }
5732             }
5733         }
5734     }
5735
5736     if (estimate_qp(s, 0) < 0)
5737         return -1;
5738
5739     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5740         s->qscale= 3; //reduce clipping problems
5741
5742     if (s->out_format == FMT_MJPEG) {
5743         /* for mjpeg, we do include qscale in the matrix */
5744         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5745         for(i=1;i<64;i++){
5746             int j= s->dsp.idct_permutation[i];
5747
5748             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
5749         }
5750         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5751                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5752         s->qscale= 8;
5753     }
5754
5755     //FIXME var duplication
5756     s->current_picture_ptr->key_frame=
5757     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5758     s->current_picture_ptr->pict_type=
5759     s->current_picture.pict_type= s->pict_type;
5760
5761     if(s->current_picture.key_frame)
5762         s->picture_in_gop_number=0;
5763
5764     s->last_bits= put_bits_count(&s->pb);
5765     switch(s->out_format) {
5766     case FMT_MJPEG:
5767         mjpeg_picture_header(s);
5768         break;
5769 #ifdef CONFIG_H261_ENCODER
5770     case FMT_H261:
5771         ff_h261_encode_picture_header(s, picture_number);
5772         break;
5773 #endif
5774     case FMT_H263:
5775         if (s->codec_id == CODEC_ID_WMV2)
5776             ff_wmv2_encode_picture_header(s, picture_number);
5777         else if (s->h263_msmpeg4)
5778             msmpeg4_encode_picture_header(s, picture_number);
5779         else if (s->h263_pred)
5780             mpeg4_encode_picture_header(s, picture_number);
5781 #ifdef CONFIG_RV10_ENCODER
5782         else if (s->codec_id == CODEC_ID_RV10)
5783             rv10_encode_picture_header(s, picture_number);
5784 #endif
5785 #ifdef CONFIG_RV20_ENCODER
5786         else if (s->codec_id == CODEC_ID_RV20)
5787             rv20_encode_picture_header(s, picture_number);
5788 #endif
5789         else if (s->codec_id == CODEC_ID_FLV1)
5790             ff_flv_encode_picture_header(s, picture_number);
5791         else
5792             h263_encode_picture_header(s, picture_number);
5793         break;
5794     case FMT_MPEG1:
5795         mpeg1_encode_picture_header(s, picture_number);
5796         break;
5797     case FMT_H264:
5798         break;
5799     default:
5800         assert(0);
5801     }
5802     bits= put_bits_count(&s->pb);
5803     s->header_bits= bits - s->last_bits;
5804
5805     for(i=1; i<s->avctx->thread_count; i++){
5806         update_duplicate_context_after_me(s->thread_context[i], s);
5807     }
5808     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5809     for(i=1; i<s->avctx->thread_count; i++){
5810         merge_context_after_encode(s, s->thread_context[i]);
5811     }
5812     emms_c();
5813     return 0;
5814 }
5815
5816 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5817     const int intra= s->mb_intra;
5818     int i;
5819
5820     s->dct_count[intra]++;
5821
5822     for(i=0; i<64; i++){
5823         int level= block[i];
5824
5825         if(level){
5826             if(level>0){
5827                 s->dct_error_sum[intra][i] += level;
5828                 level -= s->dct_offset[intra][i];
5829                 if(level<0) level=0;
5830             }else{
5831                 s->dct_error_sum[intra][i] -= level;
5832                 level += s->dct_offset[intra][i];
5833                 if(level>0) level=0;
5834             }
5835             block[i]= level;
5836         }
5837     }
5838 }
5839
5840 static int dct_quantize_trellis_c(MpegEncContext *s,
5841                         DCTELEM *block, int n,
5842                         int qscale, int *overflow){
5843     const int *qmat;
5844     const uint8_t *scantable= s->intra_scantable.scantable;
5845     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5846     int max=0;
5847     unsigned int threshold1, threshold2;
5848     int bias=0;
5849     int run_tab[65];
5850     int level_tab[65];
5851     int score_tab[65];
5852     int survivor[65];
5853     int survivor_count;
5854     int last_run=0;
5855     int last_level=0;
5856     int last_score= 0;
5857     int last_i;
5858     int coeff[2][64];
5859     int coeff_count[64];
5860     int qmul, qadd, start_i, last_non_zero, i, dc;
5861     const int esc_length= s->ac_esc_length;
5862     uint8_t * length;
5863     uint8_t * last_length;
5864     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5865
5866     s->dsp.fdct (block);
5867
5868     if(s->dct_error_sum)
5869         s->denoise_dct(s, block);
5870     qmul= qscale*16;
5871     qadd= ((qscale-1)|1)*8;
5872
5873     if (s->mb_intra) {
5874         int q;
5875         if (!s->h263_aic) {
5876             if (n < 4)
5877                 q = s->y_dc_scale;
5878             else
5879                 q = s->c_dc_scale;
5880             q = q << 3;
5881         } else{
5882             /* For AIC we skip quant/dequant of INTRADC */
5883             q = 1 << 3;
5884             qadd=0;
5885         }
5886
5887         /* note: block[0] is assumed to be positive */
5888         block[0] = (block[0] + (q >> 1)) / q;
5889         start_i = 1;
5890         last_non_zero = 0;
5891         qmat = s->q_intra_matrix[qscale];
5892         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5893             bias= 1<<(QMAT_SHIFT-1);
5894         length     = s->intra_ac_vlc_length;
5895         last_length= s->intra_ac_vlc_last_length;
5896     } else {
5897         start_i = 0;
5898         last_non_zero = -1;
5899         qmat = s->q_inter_matrix[qscale];
5900         length     = s->inter_ac_vlc_length;
5901         last_length= s->inter_ac_vlc_last_length;
5902     }
5903     last_i= start_i;
5904
5905     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5906     threshold2= (threshold1<<1);
5907
5908     for(i=63; i>=start_i; i--) {
5909         const int j = scantable[i];
5910         int level = block[j] * qmat[j];
5911
5912         if(((unsigned)(level+threshold1))>threshold2){
5913             last_non_zero = i;
5914             break;
5915         }
5916     }
5917
5918     for(i=start_i; i<=last_non_zero; i++) {
5919         const int j = scantable[i];
5920         int level = block[j] * qmat[j];
5921
5922 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5923 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5924         if(((unsigned)(level+threshold1))>threshold2){
5925             if(level>0){
5926                 level= (bias + level)>>QMAT_SHIFT;
5927                 coeff[0][i]= level;
5928                 coeff[1][i]= level-1;
5929 //                coeff[2][k]= level-2;
5930             }else{
5931                 level= (bias - level)>>QMAT_SHIFT;
5932                 coeff[0][i]= -level;
5933                 coeff[1][i]= -level+1;
5934 //                coeff[2][k]= -level+2;
5935             }
5936             coeff_count[i]= FFMIN(level, 2);
5937             assert(coeff_count[i]);
5938             max |=level;
5939         }else{
5940             coeff[0][i]= (level>>31)|1;
5941             coeff_count[i]= 1;
5942         }
5943     }
5944
5945     *overflow= s->max_qcoeff < max; //overflow might have happened
5946
5947     if(last_non_zero < start_i){
5948         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5949         return last_non_zero;
5950     }
5951
5952     score_tab[start_i]= 0;
5953     survivor[0]= start_i;
5954     survivor_count= 1;
5955
5956     for(i=start_i; i<=last_non_zero; i++){
5957         int level_index, j;
5958         const int dct_coeff= FFABS(block[ scantable[i] ]);
5959         const int zero_distoration= dct_coeff*dct_coeff;
5960         int best_score=256*256*256*120;
5961         for(level_index=0; level_index < coeff_count[i]; level_index++){
5962             int distoration;
5963             int level= coeff[level_index][i];
5964             const int alevel= FFABS(level);
5965             int unquant_coeff;
5966
5967             assert(level);
5968
5969             if(s->out_format == FMT_H263){
5970                 unquant_coeff= alevel*qmul + qadd;
5971             }else{ //MPEG1
5972                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5973                 if(s->mb_intra){
5974                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5975                         unquant_coeff =   (unquant_coeff - 1) | 1;
5976                 }else{
5977                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5978                         unquant_coeff =   (unquant_coeff - 1) | 1;
5979                 }
5980                 unquant_coeff<<= 3;
5981             }
5982
5983             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5984             level+=64;
5985             if((level&(~127)) == 0){
5986                 for(j=survivor_count-1; j>=0; j--){
5987                     int run= i - survivor[j];
5988                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5989                     score += score_tab[i-run];
5990
5991                     if(score < best_score){
5992                         best_score= score;
5993                         run_tab[i+1]= run;
5994                         level_tab[i+1]= level-64;
5995                     }
5996                 }
5997
5998                 if(s->out_format == FMT_H263){
5999                     for(j=survivor_count-1; j>=0; j--){
6000                         int run= i - survivor[j];
6001                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
6002                         score += score_tab[i-run];
6003                         if(score < last_score){
6004                             last_score= score;
6005                             last_run= run;
6006                             last_level= level-64;
6007                             last_i= i+1;
6008                         }
6009                     }
6010                 }
6011             }else{
6012                 distoration += esc_length*lambda;
6013                 for(j=survivor_count-1; j>=0; j--){
6014                     int run= i - survivor[j];
6015                     int score= distoration + score_tab[i-run];
6016
6017                     if(score < best_score){
6018                         best_score= score;
6019                         run_tab[i+1]= run;
6020                         level_tab[i+1]= level-64;
6021                     }
6022                 }
6023
6024                 if(s->out_format == FMT_H263){
6025                   for(j=survivor_count-1; j>=0; j--){
6026                         int run= i - survivor[j];
6027                         int score= distoration + score_tab[i-run];
6028                         if(score < last_score){
6029                             last_score= score;
6030                             last_run= run;
6031                             last_level= level-64;
6032                             last_i= i+1;
6033                         }
6034                     }
6035                 }
6036             }
6037         }
6038
6039         score_tab[i+1]= best_score;
6040
6041         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
6042         if(last_non_zero <= 27){
6043             for(; survivor_count; survivor_count--){
6044                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
6045                     break;
6046             }
6047         }else{
6048             for(; survivor_count; survivor_count--){
6049                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
6050                     break;
6051             }
6052         }
6053
6054         survivor[ survivor_count++ ]= i+1;
6055     }
6056
6057     if(s->out_format != FMT_H263){
6058         last_score= 256*256*256*120;
6059         for(i= survivor[0]; i<=last_non_zero + 1; i++){
6060             int score= score_tab[i];
6061             if(i) score += lambda*2; //FIXME exacter?
6062
6063             if(score < last_score){
6064                 last_score= score;
6065                 last_i= i;
6066                 last_level= level_tab[i];
6067                 last_run= run_tab[i];
6068             }
6069         }
6070     }
6071
6072     s->coded_score[n] = last_score;
6073
6074     dc= FFABS(block[0]);
6075     last_non_zero= last_i - 1;
6076     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
6077
6078     if(last_non_zero < start_i)
6079         return last_non_zero;
6080
6081     if(last_non_zero == 0 && start_i == 0){
6082         int best_level= 0;
6083         int best_score= dc * dc;
6084
6085         for(i=0; i<coeff_count[0]; i++){
6086             int level= coeff[i][0];
6087             int alevel= FFABS(level);
6088             int unquant_coeff, score, distortion;
6089
6090             if(s->out_format == FMT_H263){
6091                     unquant_coeff= (alevel*qmul + qadd)>>3;
6092             }else{ //MPEG1
6093                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
6094                     unquant_coeff =   (unquant_coeff - 1) | 1;
6095             }
6096             unquant_coeff = (unquant_coeff + 4) >> 3;
6097             unquant_coeff<<= 3 + 3;
6098
6099             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
6100             level+=64;
6101             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
6102             else                    score= distortion + esc_length*lambda;
6103
6104             if(score < best_score){
6105                 best_score= score;
6106                 best_level= level - 64;
6107             }
6108         }
6109         block[0]= best_level;
6110         s->coded_score[n] = best_score - dc*dc;
6111         if(best_level == 0) return -1;
6112         else                return last_non_zero;
6113     }
6114
6115     i= last_i;
6116     assert(last_level);
6117
6118     block[ perm_scantable[last_non_zero] ]= last_level;
6119     i -= last_run + 1;
6120
6121     for(; i>start_i; i -= run_tab[i] + 1){
6122         block[ perm_scantable[i-1] ]= level_tab[i];
6123     }
6124
6125     return last_non_zero;
6126 }
6127
6128 //#define REFINE_STATS 1
6129 static int16_t basis[64][64];
6130
6131 static void build_basis(uint8_t *perm){
6132     int i, j, x, y;
6133     emms_c();
6134     for(i=0; i<8; i++){
6135         for(j=0; j<8; j++){
6136             for(y=0; y<8; y++){
6137                 for(x=0; x<8; x++){
6138                     double s= 0.25*(1<<BASIS_SHIFT);
6139                     int index= 8*i + j;
6140                     int perm_index= perm[index];
6141                     if(i==0) s*= sqrt(0.5);
6142                     if(j==0) s*= sqrt(0.5);
6143                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
6144                 }
6145             }
6146         }
6147     }
6148 }
6149
6150 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
6151                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
6152                         int n, int qscale){
6153     int16_t rem[64];
6154     DECLARE_ALIGNED_16(DCTELEM, d1[64]);
6155     const int *qmat;
6156     const uint8_t *scantable= s->intra_scantable.scantable;
6157     const uint8_t *perm_scantable= s->intra_scantable.permutated;
6158 //    unsigned int threshold1, threshold2;
6159 //    int bias=0;
6160     int run_tab[65];
6161     int prev_run=0;
6162     int prev_level=0;
6163     int qmul, qadd, start_i, last_non_zero, i, dc;
6164     uint8_t * length;
6165     uint8_t * last_length;
6166     int lambda;
6167     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
6168 #ifdef REFINE_STATS
6169 static int count=0;
6170 static int after_last=0;
6171 static int to_zero=0;
6172 static int from_zero=0;
6173 static int raise=0;
6174 static int lower=0;
6175 static int messed_sign=0;
6176 #endif
6177
6178     if(basis[0][0] == 0)
6179         build_basis(s->dsp.idct_permutation);
6180
6181     qmul= qscale*2;
6182     qadd= (qscale-1)|1;
6183     if (s->mb_intra) {
6184         if (!s->h263_aic) {
6185             if (n < 4)
6186                 q = s->y_dc_scale;
6187             else
6188                 q = s->c_dc_scale;
6189         } else{
6190             /* For AIC we skip quant/dequant of INTRADC */
6191             q = 1;
6192             qadd=0;
6193         }
6194         q <<= RECON_SHIFT-3;
6195         /* note: block[0] is assumed to be positive */
6196         dc= block[0]*q;
6197 //        block[0] = (block[0] + (q >> 1)) / q;
6198         start_i = 1;
6199         qmat = s->q_intra_matrix[qscale];
6200 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
6201 //            bias= 1<<(QMAT_SHIFT-1);
6202         length     = s->intra_ac_vlc_length;
6203         last_length= s->intra_ac_vlc_last_length;
6204     } else {
6205         dc= 0;
6206         start_i = 0;
6207         qmat = s->q_inter_matrix[qscale];
6208         length     = s->inter_ac_vlc_length;
6209         last_length= s->inter_ac_vlc_last_length;
6210     }
6211     last_non_zero = s->block_last_index[n];
6212
6213 #ifdef REFINE_STATS
6214 {START_TIMER
6215 #endif
6216     dc += (1<<(RECON_SHIFT-1));
6217     for(i=0; i<64; i++){
6218         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
6219     }
6220 #ifdef REFINE_STATS
6221 STOP_TIMER("memset rem[]")}
6222 #endif
6223     sum=0;
6224     for(i=0; i<64; i++){
6225         int one= 36;
6226         int qns=4;
6227         int w;
6228
6229         w= FFABS(weight[i]) + qns*one;
6230         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
6231
6232         weight[i] = w;
6233 //        w=weight[i] = (63*qns + (w/2)) / w;
6234
6235         assert(w>0);
6236         assert(w<(1<<6));
6237         sum += w*w;
6238     }
6239     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
6240 #ifdef REFINE_STATS
6241 {START_TIMER
6242 #endif
6243     run=0;
6244     rle_index=0;
6245     for(i=start_i; i<=last_non_zero; i++){
6246         int j= perm_scantable[i];
6247         const int level= block[j];
6248         int coeff;
6249
6250         if(level){
6251             if(level<0) coeff= qmul*level - qadd;
6252             else        coeff= qmul*level + qadd;
6253             run_tab[rle_index++]=run;
6254             run=0;
6255
6256             s->dsp.add_8x8basis(rem, basis[j], coeff);
6257         }else{
6258             run++;
6259         }
6260     }
6261 #ifdef REFINE_STATS
6262 if(last_non_zero>0){
6263 STOP_TIMER("init rem[]")
6264 }
6265 }
6266
6267 {START_TIMER
6268 #endif
6269     for(;;){
6270         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
6271         int best_coeff=0;
6272         int best_change=0;
6273         int run2, best_unquant_change=0, analyze_gradient;
6274 #ifdef REFINE_STATS
6275 {START_TIMER
6276 #endif
6277         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
6278
6279         if(analyze_gradient){
6280 #ifdef REFINE_STATS
6281 {START_TIMER
6282 #endif
6283             for(i=0; i<64; i++){
6284                 int w= weight[i];
6285
6286                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
6287             }
6288 #ifdef REFINE_STATS
6289 STOP_TIMER("rem*w*w")}
6290 {START_TIMER
6291 #endif
6292             s->dsp.fdct(d1);
6293 #ifdef REFINE_STATS
6294 STOP_TIMER("dct")}
6295 #endif
6296         }
6297
6298         if(start_i){
6299             const int level= block[0];
6300             int change, old_coeff;
6301
6302             assert(s->mb_intra);
6303
6304             old_coeff= q*level;
6305
6306             for(change=-1; change<=1; change+=2){
6307                 int new_level= level + change;
6308                 int score, new_coeff;
6309
6310                 new_coeff= q*new_level;
6311                 if(new_coeff >= 2048 || new_coeff < 0)
6312                     continue;
6313
6314                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
6315                 if(score<best_score){
6316                     best_score= score;
6317                     best_coeff= 0;
6318                     best_change= change;
6319                     best_unquant_change= new_coeff - old_coeff;
6320                 }
6321             }
6322         }
6323
6324         run=0;
6325         rle_index=0;
6326         run2= run_tab[rle_index++];
6327         prev_level=0;
6328         prev_run=0;
6329
6330         for(i=start_i; i<64; i++){
6331             int j= perm_scantable[i];
6332             const int level= block[j];
6333             int change, old_coeff;
6334
6335             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
6336                 break;
6337
6338             if(level){
6339                 if(level<0) old_coeff= qmul*level - qadd;
6340                 else        old_coeff= qmul*level + qadd;
6341                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
6342             }else{
6343                 old_coeff=0;
6344                 run2--;
6345                 assert(run2>=0 || i >= last_non_zero );
6346             }
6347
6348             for(change=-1; change<=1; change+=2){
6349                 int new_level= level + change;
6350                 int score, new_coeff, unquant_change;
6351
6352                 score=0;
6353                 if(s->avctx->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
6354                    continue;
6355
6356                 if(new_level){
6357                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6358                     else            new_coeff= qmul*new_level + qadd;
6359                     if(new_coeff >= 2048 || new_coeff <= -2048)
6360                         continue;
6361                     //FIXME check for overflow
6362
6363                     if(level){
6364                         if(level < 63 && level > -63){
6365                             if(i < last_non_zero)
6366                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6367                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6368                             else
6369                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6370                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6371                         }
6372                     }else{
6373                         assert(FFABS(new_level)==1);
6374
6375                         if(analyze_gradient){
6376                             int g= d1[ scantable[i] ];
6377                             if(g && (g^new_level) >= 0)
6378                                 continue;
6379                         }
6380
6381                         if(i < last_non_zero){
6382                             int next_i= i + run2 + 1;
6383                             int next_level= block[ perm_scantable[next_i] ] + 64;
6384
6385                             if(next_level&(~127))
6386                                 next_level= 0;
6387
6388                             if(next_i < last_non_zero)
6389                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6390                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6391                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6392                             else
6393                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6394                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6395                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6396                         }else{
6397                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6398                             if(prev_level){
6399                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6400                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6401                             }
6402                         }
6403                     }
6404                 }else{
6405                     new_coeff=0;
6406                     assert(FFABS(level)==1);
6407
6408                     if(i < last_non_zero){
6409                         int next_i= i + run2 + 1;
6410                         int next_level= block[ perm_scantable[next_i] ] + 64;
6411
6412                         if(next_level&(~127))
6413                             next_level= 0;
6414
6415                         if(next_i < last_non_zero)
6416                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6417                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6418                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6419                         else
6420                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6421                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6422                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6423                     }else{
6424                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6425                         if(prev_level){
6426                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6427                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6428                         }
6429                     }
6430                 }
6431
6432                 score *= lambda;
6433
6434                 unquant_change= new_coeff - old_coeff;
6435                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6436
6437                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6438                 if(score<best_score){
6439                     best_score= score;
6440                     best_coeff= i;
6441                     best_change= change;
6442                     best_unquant_change= unquant_change;
6443                 }
6444             }
6445             if(level){
6446                 prev_level= level + 64;
6447                 if(prev_level&(~127))
6448                     prev_level= 0;
6449                 prev_run= run;
6450                 run=0;
6451             }else{
6452                 run++;
6453             }
6454         }
6455 #ifdef REFINE_STATS
6456 STOP_TIMER("iterative step")}
6457 #endif
6458
6459         if(best_change){
6460             int j= perm_scantable[ best_coeff ];
6461
6462             block[j] += best_change;
6463
6464             if(best_coeff > last_non_zero){
6465                 last_non_zero= best_coeff;
6466                 assert(block[j]);
6467 #ifdef REFINE_STATS
6468 after_last++;
6469 #endif
6470             }else{
6471 #ifdef REFINE_STATS
6472 if(block[j]){
6473     if(block[j] - best_change){
6474         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
6475             raise++;
6476         }else{
6477             lower++;
6478         }
6479     }else{
6480         from_zero++;
6481     }
6482 }else{
6483     to_zero++;
6484 }
6485 #endif
6486                 for(; last_non_zero>=start_i; last_non_zero--){
6487                     if(block[perm_scantable[last_non_zero]])
6488                         break;
6489                 }
6490             }
6491 #ifdef REFINE_STATS
6492 count++;
6493 if(256*256*256*64 % count == 0){
6494     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6495 }
6496 #endif
6497             run=0;
6498             rle_index=0;
6499             for(i=start_i; i<=last_non_zero; i++){
6500                 int j= perm_scantable[i];
6501                 const int level= block[j];
6502
6503                  if(level){
6504                      run_tab[rle_index++]=run;
6505                      run=0;
6506                  }else{
6507                      run++;
6508                  }
6509             }
6510
6511             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6512         }else{
6513             break;
6514         }
6515     }
6516 #ifdef REFINE_STATS
6517 if(last_non_zero>0){
6518 STOP_TIMER("iterative search")
6519 }
6520 }
6521 #endif
6522
6523     return last_non_zero;
6524 }
6525
6526 static int dct_quantize_c(MpegEncContext *s,
6527                         DCTELEM *block, int n,
6528                         int qscale, int *overflow)
6529 {
6530     int i, j, level, last_non_zero, q, start_i;
6531     const int *qmat;
6532     const uint8_t *scantable= s->intra_scantable.scantable;
6533     int bias;
6534     int max=0;
6535     unsigned int threshold1, threshold2;
6536
6537     s->dsp.fdct (block);
6538
6539     if(s->dct_error_sum)
6540         s->denoise_dct(s, block);
6541
6542     if (s->mb_intra) {
6543         if (!s->h263_aic) {
6544             if (n < 4)
6545                 q = s->y_dc_scale;
6546             else
6547                 q = s->c_dc_scale;
6548             q = q << 3;
6549         } else
6550             /* For AIC we skip quant/dequant of INTRADC */
6551             q = 1 << 3;
6552
6553         /* note: block[0] is assumed to be positive */
6554         block[0] = (block[0] + (q >> 1)) / q;
6555         start_i = 1;
6556         last_non_zero = 0;
6557         qmat = s->q_intra_matrix[qscale];
6558         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6559     } else {
6560         start_i = 0;
6561         last_non_zero = -1;
6562         qmat = s->q_inter_matrix[qscale];
6563         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6564     }
6565     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6566     threshold2= (threshold1<<1);
6567     for(i=63;i>=start_i;i--) {
6568         j = scantable[i];
6569         level = block[j] * qmat[j];
6570
6571         if(((unsigned)(level+threshold1))>threshold2){
6572             last_non_zero = i;
6573             break;
6574         }else{
6575             block[j]=0;
6576         }
6577     }
6578     for(i=start_i; i<=last_non_zero; i++) {
6579         j = scantable[i];
6580         level = block[j] * qmat[j];
6581
6582 //        if(   bias+level >= (1<<QMAT_SHIFT)
6583 //           || bias-level >= (1<<QMAT_SHIFT)){
6584         if(((unsigned)(level+threshold1))>threshold2){
6585             if(level>0){
6586                 level= (bias + level)>>QMAT_SHIFT;
6587                 block[j]= level;
6588             }else{
6589                 level= (bias - level)>>QMAT_SHIFT;
6590                 block[j]= -level;
6591             }
6592             max |=level;
6593         }else{
6594             block[j]=0;
6595         }
6596     }
6597     *overflow= s->max_qcoeff < max; //overflow might have happened
6598
6599     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6600     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6601         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6602
6603     return last_non_zero;
6604 }
6605
6606 #endif //CONFIG_ENCODERS
6607
6608 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6609                                    DCTELEM *block, int n, int qscale)
6610 {
6611     int i, level, nCoeffs;
6612     const uint16_t *quant_matrix;
6613
6614     nCoeffs= s->block_last_index[n];
6615
6616     if (n < 4)
6617         block[0] = block[0] * s->y_dc_scale;
6618     else
6619         block[0] = block[0] * s->c_dc_scale;
6620     /* XXX: only mpeg1 */
6621     quant_matrix = s->intra_matrix;
6622     for(i=1;i<=nCoeffs;i++) {
6623         int j= s->intra_scantable.permutated[i];
6624         level = block[j];
6625         if (level) {
6626             if (level < 0) {
6627                 level = -level;
6628                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6629                 level = (level - 1) | 1;
6630                 level = -level;
6631             } else {
6632                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6633                 level = (level - 1) | 1;
6634             }
6635             block[j] = level;
6636         }
6637     }
6638 }
6639
6640 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6641                                    DCTELEM *block, int n, int qscale)
6642 {
6643     int i, level, nCoeffs;
6644     const uint16_t *quant_matrix;
6645
6646     nCoeffs= s->block_last_index[n];
6647
6648     quant_matrix = s->inter_matrix;
6649     for(i=0; i<=nCoeffs; i++) {
6650         int j= s->intra_scantable.permutated[i];
6651         level = block[j];
6652         if (level) {
6653             if (level < 0) {
6654                 level = -level;
6655                 level = (((level << 1) + 1) * qscale *
6656                          ((int) (quant_matrix[j]))) >> 4;
6657                 level = (level - 1) | 1;
6658                 level = -level;
6659             } else {
6660                 level = (((level << 1) + 1) * qscale *
6661                          ((int) (quant_matrix[j]))) >> 4;
6662                 level = (level - 1) | 1;
6663             }
6664             block[j] = level;
6665         }
6666     }
6667 }
6668
6669 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6670                                    DCTELEM *block, int n, int qscale)
6671 {
6672     int i, level, nCoeffs;
6673     const uint16_t *quant_matrix;
6674
6675     if(s->alternate_scan) nCoeffs= 63;
6676     else nCoeffs= s->block_last_index[n];
6677
6678     if (n < 4)
6679         block[0] = block[0] * s->y_dc_scale;
6680     else
6681         block[0] = block[0] * s->c_dc_scale;
6682     quant_matrix = s->intra_matrix;
6683     for(i=1;i<=nCoeffs;i++) {
6684         int j= s->intra_scantable.permutated[i];
6685         level = block[j];
6686         if (level) {
6687             if (level < 0) {
6688                 level = -level;
6689                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6690                 level = -level;
6691             } else {
6692                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6693             }
6694             block[j] = level;
6695         }
6696     }
6697 }
6698
6699 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
6700                                    DCTELEM *block, int n, int qscale)
6701 {
6702     int i, level, nCoeffs;
6703     const uint16_t *quant_matrix;
6704     int sum=-1;
6705
6706     if(s->alternate_scan) nCoeffs= 63;
6707     else nCoeffs= s->block_last_index[n];
6708
6709     if (n < 4)
6710         block[0] = block[0] * s->y_dc_scale;
6711     else
6712         block[0] = block[0] * s->c_dc_scale;
6713     quant_matrix = s->intra_matrix;
6714     for(i=1;i<=nCoeffs;i++) {
6715         int j= s->intra_scantable.permutated[i];
6716         level = block[j];
6717         if (level) {
6718             if (level < 0) {
6719                 level = -level;
6720                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6721                 level = -level;
6722             } else {
6723                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6724             }
6725             block[j] = level;
6726             sum+=level;
6727         }
6728     }
6729     block[63]^=sum&1;
6730 }
6731
6732 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6733                                    DCTELEM *block, int n, int qscale)
6734 {
6735     int i, level, nCoeffs;
6736     const uint16_t *quant_matrix;
6737     int sum=-1;
6738
6739     if(s->alternate_scan) nCoeffs= 63;
6740     else nCoeffs= s->block_last_index[n];
6741
6742     quant_matrix = s->inter_matrix;
6743     for(i=0; i<=nCoeffs; i++) {
6744         int j= s->intra_scantable.permutated[i];
6745         level = block[j];
6746         if (level) {
6747             if (level < 0) {
6748                 level = -level;
6749                 level = (((level << 1) + 1) * qscale *
6750                          ((int) (quant_matrix[j]))) >> 4;
6751                 level = -level;
6752             } else {
6753                 level = (((level << 1) + 1) * qscale *
6754                          ((int) (quant_matrix[j]))) >> 4;
6755             }
6756             block[j] = level;
6757             sum+=level;
6758         }
6759     }
6760     block[63]^=sum&1;
6761 }
6762
6763 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6764                                   DCTELEM *block, int n, int qscale)
6765 {
6766     int i, level, qmul, qadd;
6767     int nCoeffs;
6768
6769     assert(s->block_last_index[n]>=0);
6770
6771     qmul = qscale << 1;
6772
6773     if (!s->h263_aic) {
6774         if (n < 4)
6775             block[0] = block[0] * s->y_dc_scale;
6776         else
6777             block[0] = block[0] * s->c_dc_scale;
6778         qadd = (qscale - 1) | 1;
6779     }else{
6780         qadd = 0;
6781     }
6782     if(s->ac_pred)
6783         nCoeffs=63;
6784     else
6785         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6786
6787     for(i=1; i<=nCoeffs; i++) {
6788         level = block[i];
6789         if (level) {
6790             if (level < 0) {
6791                 level = level * qmul - qadd;
6792             } else {
6793                 level = level * qmul + qadd;
6794             }
6795             block[i] = level;
6796         }
6797     }
6798 }
6799
6800 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6801                                   DCTELEM *block, int n, int qscale)
6802 {
6803     int i, level, qmul, qadd;
6804     int nCoeffs;
6805
6806     assert(s->block_last_index[n]>=0);
6807
6808     qadd = (qscale - 1) | 1;
6809     qmul = qscale << 1;
6810
6811     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6812
6813     for(i=0; i<=nCoeffs; i++) {
6814         level = block[i];
6815         if (level) {
6816             if (level < 0) {
6817                 level = level * qmul - qadd;
6818             } else {
6819                 level = level * qmul + qadd;
6820             }
6821             block[i] = level;
6822         }
6823     }
6824 }
6825
6826 #ifdef CONFIG_ENCODERS
6827 AVCodec h263_encoder = {
6828     "h263",
6829     CODEC_TYPE_VIDEO,
6830     CODEC_ID_H263,
6831     sizeof(MpegEncContext),
6832     MPV_encode_init,
6833     MPV_encode_picture,
6834     MPV_encode_end,
6835     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6836 };
6837
6838 AVCodec h263p_encoder = {
6839     "h263p",
6840     CODEC_TYPE_VIDEO,
6841     CODEC_ID_H263P,
6842     sizeof(MpegEncContext),
6843     MPV_encode_init,
6844     MPV_encode_picture,
6845     MPV_encode_end,
6846     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6847 };
6848
6849 AVCodec flv_encoder = {
6850     "flv",
6851     CODEC_TYPE_VIDEO,
6852     CODEC_ID_FLV1,
6853     sizeof(MpegEncContext),
6854     MPV_encode_init,
6855     MPV_encode_picture,
6856     MPV_encode_end,
6857     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6858 };
6859
6860 AVCodec rv10_encoder = {
6861     "rv10",
6862     CODEC_TYPE_VIDEO,
6863     CODEC_ID_RV10,
6864     sizeof(MpegEncContext),
6865     MPV_encode_init,
6866     MPV_encode_picture,
6867     MPV_encode_end,
6868     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6869 };
6870
6871 AVCodec rv20_encoder = {
6872     "rv20",
6873     CODEC_TYPE_VIDEO,
6874     CODEC_ID_RV20,
6875     sizeof(MpegEncContext),
6876     MPV_encode_init,
6877     MPV_encode_picture,
6878     MPV_encode_end,
6879     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6880 };
6881
6882 AVCodec mpeg4_encoder = {
6883     "mpeg4",
6884     CODEC_TYPE_VIDEO,
6885     CODEC_ID_MPEG4,
6886     sizeof(MpegEncContext),
6887     MPV_encode_init,
6888     MPV_encode_picture,
6889     MPV_encode_end,
6890     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6891     .capabilities= CODEC_CAP_DELAY,
6892 };
6893
6894 AVCodec msmpeg4v1_encoder = {
6895     "msmpeg4v1",
6896     CODEC_TYPE_VIDEO,
6897     CODEC_ID_MSMPEG4V1,
6898     sizeof(MpegEncContext),
6899     MPV_encode_init,
6900     MPV_encode_picture,
6901     MPV_encode_end,
6902     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6903 };
6904
6905 AVCodec msmpeg4v2_encoder = {
6906     "msmpeg4v2",
6907     CODEC_TYPE_VIDEO,
6908     CODEC_ID_MSMPEG4V2,
6909     sizeof(MpegEncContext),
6910     MPV_encode_init,
6911     MPV_encode_picture,
6912     MPV_encode_end,
6913     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6914 };
6915
6916 AVCodec msmpeg4v3_encoder = {
6917     "msmpeg4",
6918     CODEC_TYPE_VIDEO,
6919     CODEC_ID_MSMPEG4V3,
6920     sizeof(MpegEncContext),
6921     MPV_encode_init,
6922     MPV_encode_picture,
6923     MPV_encode_end,
6924     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6925 };
6926
6927 AVCodec wmv1_encoder = {
6928     "wmv1",
6929     CODEC_TYPE_VIDEO,
6930     CODEC_ID_WMV1,
6931     sizeof(MpegEncContext),
6932     MPV_encode_init,
6933     MPV_encode_picture,
6934     MPV_encode_end,
6935     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6936 };
6937
6938 AVCodec mjpeg_encoder = {
6939     "mjpeg",
6940     CODEC_TYPE_VIDEO,
6941     CODEC_ID_MJPEG,
6942     sizeof(MpegEncContext),
6943     MPV_encode_init,
6944     MPV_encode_picture,
6945     MPV_encode_end,
6946     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, PIX_FMT_YUVJ422P, -1},
6947 };
6948
6949 #endif //CONFIG_ENCODERS