]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
move the mjpeg_encoder struct from mpegvideo.c to mjpegenc.c
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  *
22  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
23  */
24
25 /**
26  * @file mpegvideo.c
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "avcodec.h"
31 #include "dsputil.h"
32 #include "mpegvideo.h"
33 #include "mjpegenc.h"
34 #include "msmpeg4.h"
35 #include "faandct.h"
36 #include <limits.h>
37
38 #ifdef USE_FASTMEMCPY
39 #include "libvo/fastmemcpy.h"
40 #endif
41
42 //#undef NDEBUG
43 //#include <assert.h>
44
45 #ifdef CONFIG_ENCODERS
46 static int encode_picture(MpegEncContext *s, int picture_number);
47 #endif //CONFIG_ENCODERS
48 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
49                                    DCTELEM *block, int n, int qscale);
50 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
51                                    DCTELEM *block, int n, int qscale);
52 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
53                                    DCTELEM *block, int n, int qscale);
54 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
55                                    DCTELEM *block, int n, int qscale);
56 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
57                                    DCTELEM *block, int n, int qscale);
58 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
59                                   DCTELEM *block, int n, int qscale);
60 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
61                                   DCTELEM *block, int n, int qscale);
62 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
63 #ifdef CONFIG_ENCODERS
64 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
65 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
66 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
67 static int sse_mb(MpegEncContext *s);
68 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
69 #endif //CONFIG_ENCODERS
70
71 #ifdef HAVE_XVMC
72 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
73 extern void XVMC_field_end(MpegEncContext *s);
74 extern void XVMC_decode_mb(MpegEncContext *s);
75 #endif
76
77 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
78
79
80 /* enable all paranoid tests for rounding, overflows, etc... */
81 //#define PARANOID
82
83 //#define DEBUG
84
85
86 /* for jpeg fast DCT */
87 #define CONST_BITS 14
88
89 static const uint16_t aanscales[64] = {
90     /* precomputed values scaled up by 14 bits */
91     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
92     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
93     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
94     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
95     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
96     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
97     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
98     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
99 };
100
101 static const uint8_t h263_chroma_roundtab[16] = {
102 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
103     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
104 };
105
106 static const uint8_t ff_default_chroma_qscale_table[32]={
107 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
108     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
109 };
110
111 #ifdef CONFIG_ENCODERS
112 static uint8_t default_mv_penalty[MAX_FCODE+1][MAX_MV*2+1];
113 static uint8_t default_fcode_tab[MAX_MV*2+1];
114
115 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
116
117 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
118                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
119 {
120     int qscale;
121     int shift=0;
122
123     for(qscale=qmin; qscale<=qmax; qscale++){
124         int i;
125         if (dsp->fdct == ff_jpeg_fdct_islow
126 #ifdef FAAN_POSTSCALE
127             || dsp->fdct == ff_faandct
128 #endif
129             ) {
130             for(i=0;i<64;i++) {
131                 const int j= dsp->idct_permutation[i];
132                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
133                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
134                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
135                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
136
137                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
138                                 (qscale * quant_matrix[j]));
139             }
140         } else if (dsp->fdct == fdct_ifast
141 #ifndef FAAN_POSTSCALE
142                    || dsp->fdct == ff_faandct
143 #endif
144                    ) {
145             for(i=0;i<64;i++) {
146                 const int j= dsp->idct_permutation[i];
147                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
148                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
149                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
150                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
151
152                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
153                                 (aanscales[i] * qscale * quant_matrix[j]));
154             }
155         } else {
156             for(i=0;i<64;i++) {
157                 const int j= dsp->idct_permutation[i];
158                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
159                    So 16           <= qscale * quant_matrix[i]             <= 7905
160                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
161                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
162                 */
163                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
164 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
165                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
166
167                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
168                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
169             }
170         }
171
172         for(i=intra; i<64; i++){
173             int64_t max= 8191;
174             if (dsp->fdct == fdct_ifast
175 #ifndef FAAN_POSTSCALE
176                    || dsp->fdct == ff_faandct
177 #endif
178                    ) {
179                 max= (8191LL*aanscales[i]) >> 14;
180             }
181             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
182                 shift++;
183             }
184         }
185     }
186     if(shift){
187         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger than %d, overflows possible\n", QMAT_SHIFT - shift);
188     }
189 }
190
191 static inline void update_qscale(MpegEncContext *s){
192     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
193     s->qscale= av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
194
195     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
196 }
197 #endif //CONFIG_ENCODERS
198
199 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
200     int i;
201     int end;
202
203     st->scantable= src_scantable;
204
205     for(i=0; i<64; i++){
206         int j;
207         j = src_scantable[i];
208         st->permutated[i] = permutation[j];
209 #ifdef ARCH_POWERPC
210         st->inverse[j] = i;
211 #endif
212     }
213
214     end=-1;
215     for(i=0; i<64; i++){
216         int j;
217         j = st->permutated[i];
218         if(j>end) end=j;
219         st->raster_end[i]= end;
220     }
221 }
222
223 #ifdef CONFIG_ENCODERS
224 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix){
225     int i;
226
227     if(matrix){
228         put_bits(pb, 1, 1);
229         for(i=0;i<64;i++) {
230             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
231         }
232     }else
233         put_bits(pb, 1, 0);
234 }
235 #endif //CONFIG_ENCODERS
236
237 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
238     int i;
239
240     assert(p<=end);
241     if(p>=end)
242         return end;
243
244     for(i=0; i<3; i++){
245         uint32_t tmp= *state << 8;
246         *state= tmp + *(p++);
247         if(tmp == 0x100 || p==end)
248             return p;
249     }
250
251     while(p<end){
252         if     (p[-1] > 1      ) p+= 3;
253         else if(p[-2]          ) p+= 2;
254         else if(p[-3]|(p[-1]-1)) p++;
255         else{
256             p++;
257             break;
258         }
259     }
260
261     p= FFMIN(p, end)-4;
262     *state=  be2me_32(unaligned32(p));
263
264     return p+4;
265 }
266
267 /* init common dct for both encoder and decoder */
268 int DCT_common_init(MpegEncContext *s)
269 {
270     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
271     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
272     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
273     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
274     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
275     if(s->flags & CODEC_FLAG_BITEXACT)
276         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
277     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
278
279 #ifdef CONFIG_ENCODERS
280     s->dct_quantize= dct_quantize_c;
281     s->denoise_dct= denoise_dct_c;
282 #endif //CONFIG_ENCODERS
283
284 #ifdef HAVE_MMX
285     MPV_common_init_mmx(s);
286 #endif
287 #ifdef ARCH_ALPHA
288     MPV_common_init_axp(s);
289 #endif
290 #ifdef HAVE_MLIB
291     MPV_common_init_mlib(s);
292 #endif
293 #ifdef HAVE_MMI
294     MPV_common_init_mmi(s);
295 #endif
296 #ifdef ARCH_ARMV4L
297     MPV_common_init_armv4l(s);
298 #endif
299 #ifdef ARCH_POWERPC
300     MPV_common_init_ppc(s);
301 #endif
302
303 #ifdef CONFIG_ENCODERS
304     s->fast_dct_quantize= s->dct_quantize;
305
306     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
307         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
308     }
309
310 #endif //CONFIG_ENCODERS
311
312     /* load & permutate scantables
313        note: only wmv uses different ones
314     */
315     if(s->alternate_scan){
316         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
317         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
318     }else{
319         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
320         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
321     }
322     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
323     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
324
325     return 0;
326 }
327
328 static void copy_picture(Picture *dst, Picture *src){
329     *dst = *src;
330     dst->type= FF_BUFFER_TYPE_COPY;
331 }
332
333 #ifdef CONFIG_ENCODERS
334 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
335     int i;
336
337     dst->pict_type              = src->pict_type;
338     dst->quality                = src->quality;
339     dst->coded_picture_number   = src->coded_picture_number;
340     dst->display_picture_number = src->display_picture_number;
341 //    dst->reference              = src->reference;
342     dst->pts                    = src->pts;
343     dst->interlaced_frame       = src->interlaced_frame;
344     dst->top_field_first        = src->top_field_first;
345
346     if(s->avctx->me_threshold){
347         if(!src->motion_val[0])
348             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
349         if(!src->mb_type)
350             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
351         if(!src->ref_index[0])
352             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
353         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
354             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
355             src->motion_subsample_log2, dst->motion_subsample_log2);
356
357         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
358
359         for(i=0; i<2; i++){
360             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
361             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
362
363             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
364                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
365             }
366             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
367                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
368             }
369         }
370     }
371 }
372 #endif
373
374 /**
375  * allocates a Picture
376  * The pixels are allocated/set by calling get_buffer() if shared=0
377  */
378 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
379     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
380     const int mb_array_size= s->mb_stride*s->mb_height;
381     const int b8_array_size= s->b8_stride*s->mb_height*2;
382     const int b4_array_size= s->b4_stride*s->mb_height*4;
383     int i;
384
385     if(shared){
386         assert(pic->data[0]);
387         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
388         pic->type= FF_BUFFER_TYPE_SHARED;
389     }else{
390         int r;
391
392         assert(!pic->data[0]);
393
394         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
395
396         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
397             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
398             return -1;
399         }
400
401         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
402             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
403             return -1;
404         }
405
406         if(pic->linesize[1] != pic->linesize[2]){
407             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
408             return -1;
409         }
410
411         s->linesize  = pic->linesize[0];
412         s->uvlinesize= pic->linesize[1];
413     }
414
415     if(pic->qscale_table==NULL){
416         if (s->encoding) {
417             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
418             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
419             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
420         }
421
422         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
423         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
424         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
425         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
426         if(s->out_format == FMT_H264){
427             for(i=0; i<2; i++){
428                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
429                 pic->motion_val[i]= pic->motion_val_base[i]+4;
430                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
431             }
432             pic->motion_subsample_log2= 2;
433         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
434             for(i=0; i<2; i++){
435                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
436                 pic->motion_val[i]= pic->motion_val_base[i]+4;
437                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
438             }
439             pic->motion_subsample_log2= 3;
440         }
441         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
442             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
443         }
444         pic->qstride= s->mb_stride;
445         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
446     }
447
448     //it might be nicer if the application would keep track of these but it would require a API change
449     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
450     s->prev_pict_types[0]= s->pict_type;
451     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
452         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
453
454     return 0;
455 fail: //for the CHECKED_ALLOCZ macro
456     return -1;
457 }
458
459 /**
460  * deallocates a picture
461  */
462 static void free_picture(MpegEncContext *s, Picture *pic){
463     int i;
464
465     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
466         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
467     }
468
469     av_freep(&pic->mb_var);
470     av_freep(&pic->mc_mb_var);
471     av_freep(&pic->mb_mean);
472     av_freep(&pic->mbskip_table);
473     av_freep(&pic->qscale_table);
474     av_freep(&pic->mb_type_base);
475     av_freep(&pic->dct_coeff);
476     av_freep(&pic->pan_scan);
477     pic->mb_type= NULL;
478     for(i=0; i<2; i++){
479         av_freep(&pic->motion_val_base[i]);
480         av_freep(&pic->ref_index[i]);
481     }
482
483     if(pic->type == FF_BUFFER_TYPE_SHARED){
484         for(i=0; i<4; i++){
485             pic->base[i]=
486             pic->data[i]= NULL;
487         }
488         pic->type= 0;
489     }
490 }
491
492 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
493     int i;
494
495     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
496     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
497     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
498
499      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
500     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
501     s->rd_scratchpad=   s->me.scratchpad;
502     s->b_scratchpad=    s->me.scratchpad;
503     s->obmc_scratchpad= s->me.scratchpad + 16;
504     if (s->encoding) {
505         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
506         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
507         if(s->avctx->noise_reduction){
508             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
509         }
510     }
511     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
512     s->block= s->blocks[0];
513
514     for(i=0;i<12;i++){
515         s->pblocks[i] = (short *)(&s->block[i]);
516     }
517     return 0;
518 fail:
519     return -1; //free() through MPV_common_end()
520 }
521
522 static void free_duplicate_context(MpegEncContext *s){
523     if(s==NULL) return;
524
525     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
526     av_freep(&s->me.scratchpad);
527     s->rd_scratchpad=
528     s->b_scratchpad=
529     s->obmc_scratchpad= NULL;
530
531     av_freep(&s->dct_error_sum);
532     av_freep(&s->me.map);
533     av_freep(&s->me.score_map);
534     av_freep(&s->blocks);
535     s->block= NULL;
536 }
537
538 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
539 #define COPY(a) bak->a= src->a
540     COPY(allocated_edge_emu_buffer);
541     COPY(edge_emu_buffer);
542     COPY(me.scratchpad);
543     COPY(rd_scratchpad);
544     COPY(b_scratchpad);
545     COPY(obmc_scratchpad);
546     COPY(me.map);
547     COPY(me.score_map);
548     COPY(blocks);
549     COPY(block);
550     COPY(start_mb_y);
551     COPY(end_mb_y);
552     COPY(me.map_generation);
553     COPY(pb);
554     COPY(dct_error_sum);
555     COPY(dct_count[0]);
556     COPY(dct_count[1]);
557 #undef COPY
558 }
559
560 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
561     MpegEncContext bak;
562     int i;
563     //FIXME copy only needed parts
564 //START_TIMER
565     backup_duplicate_context(&bak, dst);
566     memcpy(dst, src, sizeof(MpegEncContext));
567     backup_duplicate_context(dst, &bak);
568     for(i=0;i<12;i++){
569         dst->pblocks[i] = (short *)(&dst->block[i]);
570     }
571 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
572 }
573
574 #ifdef CONFIG_ENCODERS
575 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
576 #define COPY(a) dst->a= src->a
577     COPY(pict_type);
578     COPY(current_picture);
579     COPY(f_code);
580     COPY(b_code);
581     COPY(qscale);
582     COPY(lambda);
583     COPY(lambda2);
584     COPY(picture_in_gop_number);
585     COPY(gop_picture_number);
586     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
587     COPY(progressive_frame); //FIXME don't set in encode_header
588     COPY(partitioned_frame); //FIXME don't set in encode_header
589 #undef COPY
590 }
591 #endif
592
593 /**
594  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
595  * the changed fields will not depend upon the prior state of the MpegEncContext.
596  */
597 static void MPV_common_defaults(MpegEncContext *s){
598     s->y_dc_scale_table=
599     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
600     s->chroma_qscale_table= ff_default_chroma_qscale_table;
601     s->progressive_frame= 1;
602     s->progressive_sequence= 1;
603     s->picture_structure= PICT_FRAME;
604
605     s->coded_picture_number = 0;
606     s->picture_number = 0;
607     s->input_picture_number = 0;
608
609     s->picture_in_gop_number = 0;
610
611     s->f_code = 1;
612     s->b_code = 1;
613 }
614
615 /**
616  * sets the given MpegEncContext to defaults for decoding.
617  * the changed fields will not depend upon the prior state of the MpegEncContext.
618  */
619 void MPV_decode_defaults(MpegEncContext *s){
620     MPV_common_defaults(s);
621 }
622
623 /**
624  * sets the given MpegEncContext to defaults for encoding.
625  * the changed fields will not depend upon the prior state of the MpegEncContext.
626  */
627
628 #ifdef CONFIG_ENCODERS
629 static void MPV_encode_defaults(MpegEncContext *s){
630     int i;
631     MPV_common_defaults(s);
632
633     for(i=-16; i<16; i++){
634         default_fcode_tab[i + MAX_MV]= 1;
635     }
636     s->me.mv_penalty= default_mv_penalty;
637     s->fcode_tab= default_fcode_tab;
638 }
639 #endif //CONFIG_ENCODERS
640
641 /**
642  * init common structure for both encoder and decoder.
643  * this assumes that some variables like width/height are already set
644  */
645 int MPV_common_init(MpegEncContext *s)
646 {
647     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
648
649     s->mb_height = (s->height + 15) / 16;
650
651     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
652         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
653         return -1;
654     }
655
656     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
657         return -1;
658
659     dsputil_init(&s->dsp, s->avctx);
660     DCT_common_init(s);
661
662     s->flags= s->avctx->flags;
663     s->flags2= s->avctx->flags2;
664
665     s->mb_width  = (s->width  + 15) / 16;
666     s->mb_stride = s->mb_width + 1;
667     s->b8_stride = s->mb_width*2 + 1;
668     s->b4_stride = s->mb_width*4 + 1;
669     mb_array_size= s->mb_height * s->mb_stride;
670     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
671
672     /* set chroma shifts */
673     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
674                                                     &(s->chroma_y_shift) );
675
676     /* set default edge pos, will be overriden in decode_header if needed */
677     s->h_edge_pos= s->mb_width*16;
678     s->v_edge_pos= s->mb_height*16;
679
680     s->mb_num = s->mb_width * s->mb_height;
681
682     s->block_wrap[0]=
683     s->block_wrap[1]=
684     s->block_wrap[2]=
685     s->block_wrap[3]= s->b8_stride;
686     s->block_wrap[4]=
687     s->block_wrap[5]= s->mb_stride;
688
689     y_size = s->b8_stride * (2 * s->mb_height + 1);
690     c_size = s->mb_stride * (s->mb_height + 1);
691     yc_size = y_size + 2 * c_size;
692
693     /* convert fourcc to upper case */
694     s->codec_tag=          toupper( s->avctx->codec_tag     &0xFF)
695                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
696                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
697                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
698
699     s->stream_codec_tag=          toupper( s->avctx->stream_codec_tag     &0xFF)
700                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
701                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
702                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
703
704     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
705
706     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
707     for(y=0; y<s->mb_height; y++){
708         for(x=0; x<s->mb_width; x++){
709             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
710         }
711     }
712     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
713
714     if (s->encoding) {
715         /* Allocate MV tables */
716         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
717         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
718         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
719         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
720         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
721         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
722         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
723         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
724         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
725         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
726         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
727         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
728
729         if(s->msmpeg4_version){
730             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
731         }
732         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
733
734         /* Allocate MB type table */
735         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
736
737         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
738
739         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
740         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
741         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
742         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
743         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
744         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
745
746         if(s->avctx->noise_reduction){
747             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
748         }
749     }
750     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
751
752     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
753
754     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
755         /* interlaced direct mode decoding tables */
756             for(i=0; i<2; i++){
757                 int j, k;
758                 for(j=0; j<2; j++){
759                     for(k=0; k<2; k++){
760                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
761                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
762                     }
763                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
764                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
765                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
766                 }
767                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
768             }
769     }
770     if (s->out_format == FMT_H263) {
771         /* ac values */
772         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
773         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
774         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
775         s->ac_val[2] = s->ac_val[1] + c_size;
776
777         /* cbp values */
778         CHECKED_ALLOCZ(s->coded_block_base, y_size);
779         s->coded_block= s->coded_block_base + s->b8_stride + 1;
780
781         /* cbp, ac_pred, pred_dir */
782         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
783         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
784     }
785
786     if (s->h263_pred || s->h263_plus || !s->encoding) {
787         /* dc values */
788         //MN: we need these for error resilience of intra-frames
789         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
790         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
791         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
792         s->dc_val[2] = s->dc_val[1] + c_size;
793         for(i=0;i<yc_size;i++)
794             s->dc_val_base[i] = 1024;
795     }
796
797     /* which mb is a intra block */
798     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
799     memset(s->mbintra_table, 1, mb_array_size);
800
801     /* init macroblock skip table */
802     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
803     //Note the +1 is for a quicker mpeg4 slice_end detection
804     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
805
806     s->parse_context.state= -1;
807     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
808        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
809        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
810        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
811     }
812
813     s->context_initialized = 1;
814
815     s->thread_context[0]= s;
816     for(i=1; i<s->avctx->thread_count; i++){
817         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
818         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
819     }
820
821     for(i=0; i<s->avctx->thread_count; i++){
822         if(init_duplicate_context(s->thread_context[i], s) < 0)
823            goto fail;
824         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
825         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
826     }
827
828     return 0;
829  fail:
830     MPV_common_end(s);
831     return -1;
832 }
833
834 /* init common structure for both encoder and decoder */
835 void MPV_common_end(MpegEncContext *s)
836 {
837     int i, j, k;
838
839     for(i=0; i<s->avctx->thread_count; i++){
840         free_duplicate_context(s->thread_context[i]);
841     }
842     for(i=1; i<s->avctx->thread_count; i++){
843         av_freep(&s->thread_context[i]);
844     }
845
846     av_freep(&s->parse_context.buffer);
847     s->parse_context.buffer_size=0;
848
849     av_freep(&s->mb_type);
850     av_freep(&s->p_mv_table_base);
851     av_freep(&s->b_forw_mv_table_base);
852     av_freep(&s->b_back_mv_table_base);
853     av_freep(&s->b_bidir_forw_mv_table_base);
854     av_freep(&s->b_bidir_back_mv_table_base);
855     av_freep(&s->b_direct_mv_table_base);
856     s->p_mv_table= NULL;
857     s->b_forw_mv_table= NULL;
858     s->b_back_mv_table= NULL;
859     s->b_bidir_forw_mv_table= NULL;
860     s->b_bidir_back_mv_table= NULL;
861     s->b_direct_mv_table= NULL;
862     for(i=0; i<2; i++){
863         for(j=0; j<2; j++){
864             for(k=0; k<2; k++){
865                 av_freep(&s->b_field_mv_table_base[i][j][k]);
866                 s->b_field_mv_table[i][j][k]=NULL;
867             }
868             av_freep(&s->b_field_select_table[i][j]);
869             av_freep(&s->p_field_mv_table_base[i][j]);
870             s->p_field_mv_table[i][j]=NULL;
871         }
872         av_freep(&s->p_field_select_table[i]);
873     }
874
875     av_freep(&s->dc_val_base);
876     av_freep(&s->ac_val_base);
877     av_freep(&s->coded_block_base);
878     av_freep(&s->mbintra_table);
879     av_freep(&s->cbp_table);
880     av_freep(&s->pred_dir_table);
881
882     av_freep(&s->mbskip_table);
883     av_freep(&s->prev_pict_types);
884     av_freep(&s->bitstream_buffer);
885     s->allocated_bitstream_buffer_size=0;
886
887     av_freep(&s->avctx->stats_out);
888     av_freep(&s->ac_stats);
889     av_freep(&s->error_status_table);
890     av_freep(&s->mb_index2xy);
891     av_freep(&s->lambda_table);
892     av_freep(&s->q_intra_matrix);
893     av_freep(&s->q_inter_matrix);
894     av_freep(&s->q_intra_matrix16);
895     av_freep(&s->q_inter_matrix16);
896     av_freep(&s->input_picture);
897     av_freep(&s->reordered_input_picture);
898     av_freep(&s->dct_offset);
899
900     if(s->picture){
901         for(i=0; i<MAX_PICTURE_COUNT; i++){
902             free_picture(s, &s->picture[i]);
903         }
904     }
905     av_freep(&s->picture);
906     s->context_initialized = 0;
907     s->last_picture_ptr=
908     s->next_picture_ptr=
909     s->current_picture_ptr= NULL;
910     s->linesize= s->uvlinesize= 0;
911
912     for(i=0; i<3; i++)
913         av_freep(&s->visualization_buffer[i]);
914
915     avcodec_default_free_buffers(s->avctx);
916 }
917
918 #ifdef CONFIG_ENCODERS
919
920 /* init video encoder */
921 int MPV_encode_init(AVCodecContext *avctx)
922 {
923     MpegEncContext *s = avctx->priv_data;
924     int i;
925     int chroma_h_shift, chroma_v_shift;
926
927     MPV_encode_defaults(s);
928
929     switch (avctx->codec_id) {
930     case CODEC_ID_MPEG2VIDEO:
931         if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P){
932             av_log(avctx, AV_LOG_ERROR, "only YUV420 and YUV422 are supported\n");
933             return -1;
934         }
935         break;
936     case CODEC_ID_LJPEG:
937     case CODEC_ID_MJPEG:
938         if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUVJ422P &&
939            ((avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P) || avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL)){
940             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
941             return -1;
942         }
943         break;
944     default:
945         if(avctx->pix_fmt != PIX_FMT_YUV420P){
946             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
947             return -1;
948         }
949     }
950
951     switch (avctx->pix_fmt) {
952     case PIX_FMT_YUVJ422P:
953     case PIX_FMT_YUV422P:
954         s->chroma_format = CHROMA_422;
955         break;
956     case PIX_FMT_YUVJ420P:
957     case PIX_FMT_YUV420P:
958     default:
959         s->chroma_format = CHROMA_420;
960         break;
961     }
962
963     s->bit_rate = avctx->bit_rate;
964     s->width = avctx->width;
965     s->height = avctx->height;
966     if(avctx->gop_size > 600 && avctx->strict_std_compliance>FF_COMPLIANCE_EXPERIMENTAL){
967         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
968         avctx->gop_size=600;
969     }
970     s->gop_size = avctx->gop_size;
971     s->avctx = avctx;
972     s->flags= avctx->flags;
973     s->flags2= avctx->flags2;
974     s->max_b_frames= avctx->max_b_frames;
975     s->codec_id= avctx->codec->id;
976     s->luma_elim_threshold  = avctx->luma_elim_threshold;
977     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
978     s->strict_std_compliance= avctx->strict_std_compliance;
979     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
980     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
981     s->mpeg_quant= avctx->mpeg_quant;
982     s->rtp_mode= !!avctx->rtp_payload_size;
983     s->intra_dc_precision= avctx->intra_dc_precision;
984     s->user_specified_pts = AV_NOPTS_VALUE;
985
986     if (s->gop_size <= 1) {
987         s->intra_only = 1;
988         s->gop_size = 12;
989     } else {
990         s->intra_only = 0;
991     }
992
993     s->me_method = avctx->me_method;
994
995     /* Fixed QSCALE */
996     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
997
998     s->adaptive_quant= (   s->avctx->lumi_masking
999                         || s->avctx->dark_masking
1000                         || s->avctx->temporal_cplx_masking
1001                         || s->avctx->spatial_cplx_masking
1002                         || s->avctx->p_masking
1003                         || s->avctx->border_masking
1004                         || (s->flags&CODEC_FLAG_QP_RD))
1005                        && !s->fixed_qscale;
1006
1007     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
1008     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
1009     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
1010     s->intra_vlc_format= !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
1011     s->q_scale_type= !!(s->flags2 & CODEC_FLAG2_NON_LINEAR_QUANT);
1012
1013     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
1014         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
1015         return -1;
1016     }
1017
1018     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
1019         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
1020     }
1021
1022     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
1023         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
1024         return -1;
1025     }
1026
1027     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
1028         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
1029         return -1;
1030     }
1031
1032     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
1033        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
1034        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
1035
1036         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
1037     }
1038
1039     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
1040        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
1041         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
1042         return -1;
1043     }
1044
1045     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
1046         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
1047         return -1;
1048     }
1049
1050     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
1051         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
1052         return -1;
1053     }
1054
1055     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1056         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1057         return -1;
1058     }
1059
1060     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1061         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1062         return -1;
1063     }
1064
1065     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1066         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1067         return -1;
1068     }
1069
1070     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1071        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1072         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1073         return -1;
1074     }
1075
1076     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1077         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1078         return -1;
1079     }
1080
1081     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1082         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1083         return -1;
1084     }
1085
1086     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1087         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1088         return -1;
1089     }
1090
1091     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1092         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet, set threshold to 1000000000\n");
1093         return -1;
1094     }
1095
1096     if((s->flags2 & CODEC_FLAG2_INTRA_VLC) && s->codec_id != CODEC_ID_MPEG2VIDEO){
1097         av_log(avctx, AV_LOG_ERROR, "intra vlc table not supported by codec\n");
1098         return -1;
1099     }
1100
1101     if(s->flags & CODEC_FLAG_LOW_DELAY){
1102         if (s->codec_id != CODEC_ID_MPEG2VIDEO && s->codec_id != CODEC_ID_MPEG1VIDEO){
1103             av_log(avctx, AV_LOG_ERROR, "low delay forcing is only available for mpeg1/2\n");
1104             return -1;
1105         }
1106         if (s->max_b_frames != 0){
1107             av_log(avctx, AV_LOG_ERROR, "b frames cannot be used with low delay\n");
1108             return -1;
1109         }
1110     }
1111
1112     if(s->q_scale_type == 1){
1113         if(s->codec_id != CODEC_ID_MPEG2VIDEO){
1114             av_log(avctx, AV_LOG_ERROR, "non linear quant is only available for mpeg2\n");
1115             return -1;
1116         }
1117         if(avctx->qmax > 12){
1118             av_log(avctx, AV_LOG_ERROR, "non linear quant only supports qmax <= 12 currently\n");
1119             return -1;
1120         }
1121     }
1122
1123     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1124        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1125        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1126         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1127         return -1;
1128     }
1129
1130     if(s->avctx->thread_count > 1)
1131         s->rtp_mode= 1;
1132
1133     if(!avctx->time_base.den || !avctx->time_base.num){
1134         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1135         return -1;
1136     }
1137
1138     i= (INT_MAX/2+128)>>8;
1139     if(avctx->me_threshold >= i){
1140         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1141         return -1;
1142     }
1143     if(avctx->mb_threshold >= i){
1144         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1145         return -1;
1146     }
1147
1148     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1149         av_log(avctx, AV_LOG_INFO, "notice: b_frame_strategy only affects the first pass\n");
1150         avctx->b_frame_strategy = 0;
1151     }
1152
1153     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1154     if(i > 1){
1155         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1156         avctx->time_base.den /= i;
1157         avctx->time_base.num /= i;
1158 //        return -1;
1159     }
1160
1161     if(s->codec_id==CODEC_ID_MJPEG){
1162         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1163         s->inter_quant_bias= 0;
1164     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1165         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1166         s->inter_quant_bias= 0;
1167     }else{
1168         s->intra_quant_bias=0;
1169         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1170     }
1171
1172     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1173         s->intra_quant_bias= avctx->intra_quant_bias;
1174     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1175         s->inter_quant_bias= avctx->inter_quant_bias;
1176
1177     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1178
1179     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1180         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1181         return -1;
1182     }
1183     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1184
1185     switch(avctx->codec->id) {
1186     case CODEC_ID_MPEG1VIDEO:
1187         s->out_format = FMT_MPEG1;
1188         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
1189         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1190         break;
1191     case CODEC_ID_MPEG2VIDEO:
1192         s->out_format = FMT_MPEG1;
1193         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
1194         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1195         s->rtp_mode= 1;
1196         break;
1197     case CODEC_ID_LJPEG:
1198     case CODEC_ID_MJPEG:
1199         s->out_format = FMT_MJPEG;
1200         s->intra_only = 1; /* force intra only for jpeg */
1201         s->mjpeg_vsample[0] = 2;
1202         s->mjpeg_vsample[1] = 2>>chroma_v_shift;
1203         s->mjpeg_vsample[2] = 2>>chroma_v_shift;
1204         s->mjpeg_hsample[0] = 2;
1205         s->mjpeg_hsample[1] = 2>>chroma_h_shift;
1206         s->mjpeg_hsample[2] = 2>>chroma_h_shift;
1207         if (!(ENABLE_MJPEG_ENCODER || ENABLE_LJPEG_ENCODER)
1208             || ff_mjpeg_encode_init(s) < 0)
1209             return -1;
1210         avctx->delay=0;
1211         s->low_delay=1;
1212         break;
1213     case CODEC_ID_H261:
1214         if (!ENABLE_H261_ENCODER)  return -1;
1215         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
1216             av_log(avctx, AV_LOG_ERROR, "The specified picture size of %dx%d is not valid for the H.261 codec.\nValid sizes are 176x144, 352x288\n", s->width, s->height);
1217             return -1;
1218         }
1219         s->out_format = FMT_H261;
1220         avctx->delay=0;
1221         s->low_delay=1;
1222         break;
1223     case CODEC_ID_H263:
1224         if (h263_get_picture_format(s->width, s->height) == 7) {
1225             av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
1226             return -1;
1227         }
1228         s->out_format = FMT_H263;
1229         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1230         avctx->delay=0;
1231         s->low_delay=1;
1232         break;
1233     case CODEC_ID_H263P:
1234         s->out_format = FMT_H263;
1235         s->h263_plus = 1;
1236         /* Fx */
1237         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1238         s->h263_aic= (avctx->flags & CODEC_FLAG_AC_PRED) ? 1:0;
1239         s->modified_quant= s->h263_aic;
1240         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1241         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1242         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1243         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1244         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1245
1246         /* /Fx */
1247         /* These are just to be sure */
1248         avctx->delay=0;
1249         s->low_delay=1;
1250         break;
1251     case CODEC_ID_FLV1:
1252         s->out_format = FMT_H263;
1253         s->h263_flv = 2; /* format = 1; 11-bit codes */
1254         s->unrestricted_mv = 1;
1255         s->rtp_mode=0; /* don't allow GOB */
1256         avctx->delay=0;
1257         s->low_delay=1;
1258         break;
1259     case CODEC_ID_RV10:
1260         s->out_format = FMT_H263;
1261         avctx->delay=0;
1262         s->low_delay=1;
1263         break;
1264     case CODEC_ID_RV20:
1265         s->out_format = FMT_H263;
1266         avctx->delay=0;
1267         s->low_delay=1;
1268         s->modified_quant=1;
1269         s->h263_aic=1;
1270         s->h263_plus=1;
1271         s->loop_filter=1;
1272         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1273         break;
1274     case CODEC_ID_MPEG4:
1275         s->out_format = FMT_H263;
1276         s->h263_pred = 1;
1277         s->unrestricted_mv = 1;
1278         s->low_delay= s->max_b_frames ? 0 : 1;
1279         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1280         break;
1281     case CODEC_ID_MSMPEG4V1:
1282         s->out_format = FMT_H263;
1283         s->h263_msmpeg4 = 1;
1284         s->h263_pred = 1;
1285         s->unrestricted_mv = 1;
1286         s->msmpeg4_version= 1;
1287         avctx->delay=0;
1288         s->low_delay=1;
1289         break;
1290     case CODEC_ID_MSMPEG4V2:
1291         s->out_format = FMT_H263;
1292         s->h263_msmpeg4 = 1;
1293         s->h263_pred = 1;
1294         s->unrestricted_mv = 1;
1295         s->msmpeg4_version= 2;
1296         avctx->delay=0;
1297         s->low_delay=1;
1298         break;
1299     case CODEC_ID_MSMPEG4V3:
1300         s->out_format = FMT_H263;
1301         s->h263_msmpeg4 = 1;
1302         s->h263_pred = 1;
1303         s->unrestricted_mv = 1;
1304         s->msmpeg4_version= 3;
1305         s->flipflop_rounding=1;
1306         avctx->delay=0;
1307         s->low_delay=1;
1308         break;
1309     case CODEC_ID_WMV1:
1310         s->out_format = FMT_H263;
1311         s->h263_msmpeg4 = 1;
1312         s->h263_pred = 1;
1313         s->unrestricted_mv = 1;
1314         s->msmpeg4_version= 4;
1315         s->flipflop_rounding=1;
1316         avctx->delay=0;
1317         s->low_delay=1;
1318         break;
1319     case CODEC_ID_WMV2:
1320         s->out_format = FMT_H263;
1321         s->h263_msmpeg4 = 1;
1322         s->h263_pred = 1;
1323         s->unrestricted_mv = 1;
1324         s->msmpeg4_version= 5;
1325         s->flipflop_rounding=1;
1326         avctx->delay=0;
1327         s->low_delay=1;
1328         break;
1329     default:
1330         return -1;
1331     }
1332
1333     avctx->has_b_frames= !s->low_delay;
1334
1335     s->encoding = 1;
1336
1337     /* init */
1338     if (MPV_common_init(s) < 0)
1339         return -1;
1340
1341     if(s->modified_quant)
1342         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1343     s->progressive_frame=
1344     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN));
1345     s->quant_precision=5;
1346
1347     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1348     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1349
1350     if (ENABLE_H261_ENCODER && s->out_format == FMT_H261)
1351         ff_h261_encode_init(s);
1352     if (s->out_format == FMT_H263)
1353         h263_encode_init(s);
1354     if (ENABLE_MSMPEG4_ENCODER && s->msmpeg4_version)
1355         ff_msmpeg4_encode_init(s);
1356     if (s->out_format == FMT_MPEG1)
1357         ff_mpeg1_encode_init(s);
1358
1359     /* init q matrix */
1360     for(i=0;i<64;i++) {
1361         int j= s->dsp.idct_permutation[i];
1362         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1363             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1364             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1365         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1366             s->intra_matrix[j] =
1367             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1368         }else
1369         { /* mpeg1/2 */
1370             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1371             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1372         }
1373         if(s->avctx->intra_matrix)
1374             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1375         if(s->avctx->inter_matrix)
1376             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1377     }
1378
1379     /* precompute matrix */
1380     /* for mjpeg, we do include qscale in the matrix */
1381     if (s->out_format != FMT_MJPEG) {
1382         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1383                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1384         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1385                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1386     }
1387
1388     if(ff_rate_control_init(s) < 0)
1389         return -1;
1390
1391     return 0;
1392 }
1393
1394 int MPV_encode_end(AVCodecContext *avctx)
1395 {
1396     MpegEncContext *s = avctx->priv_data;
1397
1398     ff_rate_control_uninit(s);
1399
1400     MPV_common_end(s);
1401     if ((ENABLE_MJPEG_ENCODER || ENABLE_LJPEG_ENCODER) && s->out_format == FMT_MJPEG)
1402         ff_mjpeg_encode_close(s);
1403
1404     av_freep(&avctx->extradata);
1405
1406     return 0;
1407 }
1408
1409 #endif //CONFIG_ENCODERS
1410
1411 void init_rl(RLTable *rl, uint8_t static_store[2][2*MAX_RUN + MAX_LEVEL + 3])
1412 {
1413     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1414     uint8_t index_run[MAX_RUN+1];
1415     int last, run, level, start, end, i;
1416
1417     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1418     if(static_store && rl->max_level[0])
1419         return;
1420
1421     /* compute max_level[], max_run[] and index_run[] */
1422     for(last=0;last<2;last++) {
1423         if (last == 0) {
1424             start = 0;
1425             end = rl->last;
1426         } else {
1427             start = rl->last;
1428             end = rl->n;
1429         }
1430
1431         memset(max_level, 0, MAX_RUN + 1);
1432         memset(max_run, 0, MAX_LEVEL + 1);
1433         memset(index_run, rl->n, MAX_RUN + 1);
1434         for(i=start;i<end;i++) {
1435             run = rl->table_run[i];
1436             level = rl->table_level[i];
1437             if (index_run[run] == rl->n)
1438                 index_run[run] = i;
1439             if (level > max_level[run])
1440                 max_level[run] = level;
1441             if (run > max_run[level])
1442                 max_run[level] = run;
1443         }
1444         if(static_store)
1445             rl->max_level[last] = static_store[last];
1446         else
1447             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1448         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1449         if(static_store)
1450             rl->max_run[last] = static_store[last] + MAX_RUN + 1;
1451         else
1452             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1453         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1454         if(static_store)
1455             rl->index_run[last] = static_store[last] + MAX_RUN + MAX_LEVEL + 2;
1456         else
1457             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1458         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1459     }
1460 }
1461
1462 /* draw the edges of width 'w' of an image of size width, height */
1463 //FIXME check that this is ok for mpeg4 interlaced
1464 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1465 {
1466     uint8_t *ptr, *last_line;
1467     int i;
1468
1469     last_line = buf + (height - 1) * wrap;
1470     for(i=0;i<w;i++) {
1471         /* top and bottom */
1472         memcpy(buf - (i + 1) * wrap, buf, width);
1473         memcpy(last_line + (i + 1) * wrap, last_line, width);
1474     }
1475     /* left and right */
1476     ptr = buf;
1477     for(i=0;i<height;i++) {
1478         memset(ptr - w, ptr[0], w);
1479         memset(ptr + width, ptr[width-1], w);
1480         ptr += wrap;
1481     }
1482     /* corners */
1483     for(i=0;i<w;i++) {
1484         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1485         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1486         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1487         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1488     }
1489 }
1490
1491 int ff_find_unused_picture(MpegEncContext *s, int shared){
1492     int i;
1493
1494     if(shared){
1495         for(i=0; i<MAX_PICTURE_COUNT; i++){
1496             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1497         }
1498     }else{
1499         for(i=0; i<MAX_PICTURE_COUNT; i++){
1500             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1501         }
1502         for(i=0; i<MAX_PICTURE_COUNT; i++){
1503             if(s->picture[i].data[0]==NULL) return i;
1504         }
1505     }
1506
1507     assert(0);
1508     return -1;
1509 }
1510
1511 static void update_noise_reduction(MpegEncContext *s){
1512     int intra, i;
1513
1514     for(intra=0; intra<2; intra++){
1515         if(s->dct_count[intra] > (1<<16)){
1516             for(i=0; i<64; i++){
1517                 s->dct_error_sum[intra][i] >>=1;
1518             }
1519             s->dct_count[intra] >>= 1;
1520         }
1521
1522         for(i=0; i<64; i++){
1523             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1524         }
1525     }
1526 }
1527
1528 /**
1529  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1530  */
1531 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1532 {
1533     int i;
1534     AVFrame *pic;
1535     s->mb_skipped = 0;
1536
1537     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1538
1539     /* mark&release old frames */
1540     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1541       if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1542         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1543
1544         /* release forgotten pictures */
1545         /* if(mpeg124/h263) */
1546         if(!s->encoding){
1547             for(i=0; i<MAX_PICTURE_COUNT; i++){
1548                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1549                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1550                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1551                 }
1552             }
1553         }
1554       }
1555     }
1556 alloc:
1557     if(!s->encoding){
1558         /* release non reference frames */
1559         for(i=0; i<MAX_PICTURE_COUNT; i++){
1560             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1561                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1562             }
1563         }
1564
1565         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1566             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1567         else{
1568             i= ff_find_unused_picture(s, 0);
1569             pic= (AVFrame*)&s->picture[i];
1570         }
1571
1572         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1573                         && !s->dropable ? 3 : 0;
1574
1575         pic->coded_picture_number= s->coded_picture_number++;
1576
1577         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1578             return -1;
1579
1580         s->current_picture_ptr= (Picture*)pic;
1581         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1582         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1583     }
1584
1585     s->current_picture_ptr->pict_type= s->pict_type;
1586 //    if(s->flags && CODEC_FLAG_QSCALE)
1587   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1588     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1589
1590     copy_picture(&s->current_picture, s->current_picture_ptr);
1591
1592     if (s->pict_type != B_TYPE) {
1593         s->last_picture_ptr= s->next_picture_ptr;
1594         if(!s->dropable)
1595             s->next_picture_ptr= s->current_picture_ptr;
1596     }
1597 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1598         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1599         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1600         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1601         s->pict_type, s->dropable);*/
1602
1603     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1604     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1605
1606     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL) && !s->dropable){
1607         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1608         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1609         goto alloc;
1610     }
1611
1612     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1613
1614     if(s->picture_structure!=PICT_FRAME){
1615         int i;
1616         for(i=0; i<4; i++){
1617             if(s->picture_structure == PICT_BOTTOM_FIELD){
1618                  s->current_picture.data[i] += s->current_picture.linesize[i];
1619             }
1620             s->current_picture.linesize[i] *= 2;
1621             s->last_picture.linesize[i] *=2;
1622             s->next_picture.linesize[i] *=2;
1623         }
1624     }
1625
1626     s->hurry_up= s->avctx->hurry_up;
1627     s->error_resilience= avctx->error_resilience;
1628
1629     /* set dequantizer, we can't do it during init as it might change for mpeg4
1630        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1631     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1632         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1633         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1634     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1635         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1636         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1637     }else{
1638         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1639         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1640     }
1641
1642     if(s->dct_error_sum){
1643         assert(s->avctx->noise_reduction && s->encoding);
1644
1645         update_noise_reduction(s);
1646     }
1647
1648 #ifdef HAVE_XVMC
1649     if(s->avctx->xvmc_acceleration)
1650         return XVMC_field_start(s, avctx);
1651 #endif
1652     return 0;
1653 }
1654
1655 /* generic function for encode/decode called after a frame has been coded/decoded */
1656 void MPV_frame_end(MpegEncContext *s)
1657 {
1658     int i;
1659     /* draw edge for correct motion prediction if outside */
1660 #ifdef HAVE_XVMC
1661 //just to make sure that all data is rendered.
1662     if(s->avctx->xvmc_acceleration){
1663         XVMC_field_end(s);
1664     }else
1665 #endif
1666     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1667             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1668             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1669             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1670     }
1671     emms_c();
1672
1673     s->last_pict_type    = s->pict_type;
1674     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1675     if(s->pict_type!=B_TYPE){
1676         s->last_non_b_pict_type= s->pict_type;
1677     }
1678 #if 0
1679         /* copy back current_picture variables */
1680     for(i=0; i<MAX_PICTURE_COUNT; i++){
1681         if(s->picture[i].data[0] == s->current_picture.data[0]){
1682             s->picture[i]= s->current_picture;
1683             break;
1684         }
1685     }
1686     assert(i<MAX_PICTURE_COUNT);
1687 #endif
1688
1689     if(s->encoding){
1690         /* release non-reference frames */
1691         for(i=0; i<MAX_PICTURE_COUNT; i++){
1692             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1693                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1694             }
1695         }
1696     }
1697     // clear copies, to avoid confusion
1698 #if 0
1699     memset(&s->last_picture, 0, sizeof(Picture));
1700     memset(&s->next_picture, 0, sizeof(Picture));
1701     memset(&s->current_picture, 0, sizeof(Picture));
1702 #endif
1703     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1704 }
1705
1706 /**
1707  * draws an line from (ex, ey) -> (sx, sy).
1708  * @param w width of the image
1709  * @param h height of the image
1710  * @param stride stride/linesize of the image
1711  * @param color color of the arrow
1712  */
1713 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1714     int x, y, fr, f;
1715
1716     sx= av_clip(sx, 0, w-1);
1717     sy= av_clip(sy, 0, h-1);
1718     ex= av_clip(ex, 0, w-1);
1719     ey= av_clip(ey, 0, h-1);
1720
1721     buf[sy*stride + sx]+= color;
1722
1723     if(FFABS(ex - sx) > FFABS(ey - sy)){
1724         if(sx > ex){
1725             FFSWAP(int, sx, ex);
1726             FFSWAP(int, sy, ey);
1727         }
1728         buf+= sx + sy*stride;
1729         ex-= sx;
1730         f= ((ey-sy)<<16)/ex;
1731         for(x= 0; x <= ex; x++){
1732             y = (x*f)>>16;
1733             fr= (x*f)&0xFFFF;
1734             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1735             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1736         }
1737     }else{
1738         if(sy > ey){
1739             FFSWAP(int, sx, ex);
1740             FFSWAP(int, sy, ey);
1741         }
1742         buf+= sx + sy*stride;
1743         ey-= sy;
1744         if(ey) f= ((ex-sx)<<16)/ey;
1745         else   f= 0;
1746         for(y= 0; y <= ey; y++){
1747             x = (y*f)>>16;
1748             fr= (y*f)&0xFFFF;
1749             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1750             buf[y*stride + x+1]+= (color*         fr )>>16;;
1751         }
1752     }
1753 }
1754
1755 /**
1756  * draws an arrow from (ex, ey) -> (sx, sy).
1757  * @param w width of the image
1758  * @param h height of the image
1759  * @param stride stride/linesize of the image
1760  * @param color color of the arrow
1761  */
1762 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1763     int dx,dy;
1764
1765     sx= av_clip(sx, -100, w+100);
1766     sy= av_clip(sy, -100, h+100);
1767     ex= av_clip(ex, -100, w+100);
1768     ey= av_clip(ey, -100, h+100);
1769
1770     dx= ex - sx;
1771     dy= ey - sy;
1772
1773     if(dx*dx + dy*dy > 3*3){
1774         int rx=  dx + dy;
1775         int ry= -dx + dy;
1776         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1777
1778         //FIXME subpixel accuracy
1779         rx= ROUNDED_DIV(rx*3<<4, length);
1780         ry= ROUNDED_DIV(ry*3<<4, length);
1781
1782         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1783         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1784     }
1785     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1786 }
1787
1788 /**
1789  * prints debuging info for the given picture.
1790  */
1791 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1792
1793     if(!pict || !pict->mb_type) return;
1794
1795     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1796         int x,y;
1797
1798         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1799         switch (pict->pict_type) {
1800             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1801             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1802             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1803             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1804             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1805             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1806         }
1807         for(y=0; y<s->mb_height; y++){
1808             for(x=0; x<s->mb_width; x++){
1809                 if(s->avctx->debug&FF_DEBUG_SKIP){
1810                     int count= s->mbskip_table[x + y*s->mb_stride];
1811                     if(count>9) count=9;
1812                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1813                 }
1814                 if(s->avctx->debug&FF_DEBUG_QP){
1815                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1816                 }
1817                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1818                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1819                     //Type & MV direction
1820                     if(IS_PCM(mb_type))
1821                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1822                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1823                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1824                     else if(IS_INTRA4x4(mb_type))
1825                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1826                     else if(IS_INTRA16x16(mb_type))
1827                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1828                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1829                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1830                     else if(IS_DIRECT(mb_type))
1831                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1832                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1833                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1834                     else if(IS_GMC(mb_type))
1835                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1836                     else if(IS_SKIP(mb_type))
1837                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1838                     else if(!USES_LIST(mb_type, 1))
1839                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1840                     else if(!USES_LIST(mb_type, 0))
1841                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1842                     else{
1843                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1844                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1845                     }
1846
1847                     //segmentation
1848                     if(IS_8X8(mb_type))
1849                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1850                     else if(IS_16X8(mb_type))
1851                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1852                     else if(IS_8X16(mb_type))
1853                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1854                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1855                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1856                     else
1857                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1858
1859
1860                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1861                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1862                     else
1863                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1864                 }
1865 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1866             }
1867             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1868         }
1869     }
1870
1871     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1872         const int shift= 1 + s->quarter_sample;
1873         int mb_y;
1874         uint8_t *ptr;
1875         int i;
1876         int h_chroma_shift, v_chroma_shift;
1877         const int width = s->avctx->width;
1878         const int height= s->avctx->height;
1879         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1880         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1881         s->low_delay=0; //needed to see the vectors without trashing the buffers
1882
1883         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1884         for(i=0; i<3; i++){
1885             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1886             pict->data[i]= s->visualization_buffer[i];
1887         }
1888         pict->type= FF_BUFFER_TYPE_COPY;
1889         ptr= pict->data[0];
1890
1891         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1892             int mb_x;
1893             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1894                 const int mb_index= mb_x + mb_y*s->mb_stride;
1895                 if((s->avctx->debug_mv) && pict->motion_val){
1896                   int type;
1897                   for(type=0; type<3; type++){
1898                     int direction = 0;
1899                     switch (type) {
1900                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1901                                 continue;
1902                               direction = 0;
1903                               break;
1904                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1905                                 continue;
1906                               direction = 0;
1907                               break;
1908                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1909                                 continue;
1910                               direction = 1;
1911                               break;
1912                     }
1913                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1914                         continue;
1915
1916                     if(IS_8X8(pict->mb_type[mb_index])){
1917                       int i;
1918                       for(i=0; i<4; i++){
1919                         int sx= mb_x*16 + 4 + 8*(i&1);
1920                         int sy= mb_y*16 + 4 + 8*(i>>1);
1921                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1922                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1923                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1924                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1925                       }
1926                     }else if(IS_16X8(pict->mb_type[mb_index])){
1927                       int i;
1928                       for(i=0; i<2; i++){
1929                         int sx=mb_x*16 + 8;
1930                         int sy=mb_y*16 + 4 + 8*i;
1931                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1932                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1933                         int my=(pict->motion_val[direction][xy][1]>>shift);
1934
1935                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1936                             my*=2;
1937
1938                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1939                       }
1940                     }else if(IS_8X16(pict->mb_type[mb_index])){
1941                       int i;
1942                       for(i=0; i<2; i++){
1943                         int sx=mb_x*16 + 4 + 8*i;
1944                         int sy=mb_y*16 + 8;
1945                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1946                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1947                         int my=(pict->motion_val[direction][xy][1]>>shift);
1948
1949                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1950                             my*=2;
1951
1952                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1953                       }
1954                     }else{
1955                       int sx= mb_x*16 + 8;
1956                       int sy= mb_y*16 + 8;
1957                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1958                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1959                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1960                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1961                     }
1962                   }
1963                 }
1964                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1965                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1966                     int y;
1967                     for(y=0; y<8; y++){
1968                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1969                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1970                     }
1971                 }
1972                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1973                     int mb_type= pict->mb_type[mb_index];
1974                     uint64_t u,v;
1975                     int y;
1976 #define COLOR(theta, r)\
1977 u= (int)(128 + r*cos(theta*3.141592/180));\
1978 v= (int)(128 + r*sin(theta*3.141592/180));
1979
1980
1981                     u=v=128;
1982                     if(IS_PCM(mb_type)){
1983                         COLOR(120,48)
1984                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1985                         COLOR(30,48)
1986                     }else if(IS_INTRA4x4(mb_type)){
1987                         COLOR(90,48)
1988                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1989 //                        COLOR(120,48)
1990                     }else if(IS_DIRECT(mb_type)){
1991                         COLOR(150,48)
1992                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1993                         COLOR(170,48)
1994                     }else if(IS_GMC(mb_type)){
1995                         COLOR(190,48)
1996                     }else if(IS_SKIP(mb_type)){
1997 //                        COLOR(180,48)
1998                     }else if(!USES_LIST(mb_type, 1)){
1999                         COLOR(240,48)
2000                     }else if(!USES_LIST(mb_type, 0)){
2001                         COLOR(0,48)
2002                     }else{
2003                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
2004                         COLOR(300,48)
2005                     }
2006
2007                     u*= 0x0101010101010101ULL;
2008                     v*= 0x0101010101010101ULL;
2009                     for(y=0; y<8; y++){
2010                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
2011                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
2012                     }
2013
2014                     //segmentation
2015                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
2016                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
2017                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
2018                     }
2019                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
2020                         for(y=0; y<16; y++)
2021                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
2022                     }
2023                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
2024                         int dm= 1 << (mv_sample_log2-2);
2025                         for(i=0; i<4; i++){
2026                             int sx= mb_x*16 + 8*(i&1);
2027                             int sy= mb_y*16 + 8*(i>>1);
2028                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
2029                             //FIXME bidir
2030                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
2031                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
2032                                 for(y=0; y<8; y++)
2033                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
2034                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
2035                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
2036                         }
2037                     }
2038
2039                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
2040                         // hmm
2041                     }
2042                 }
2043                 s->mbskip_table[mb_index]=0;
2044             }
2045         }
2046     }
2047 }
2048
2049 #ifdef CONFIG_ENCODERS
2050
2051 static int get_sae(uint8_t *src, int ref, int stride){
2052     int x,y;
2053     int acc=0;
2054
2055     for(y=0; y<16; y++){
2056         for(x=0; x<16; x++){
2057             acc+= FFABS(src[x+y*stride] - ref);
2058         }
2059     }
2060
2061     return acc;
2062 }
2063
2064 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
2065     int x, y, w, h;
2066     int acc=0;
2067
2068     w= s->width &~15;
2069     h= s->height&~15;
2070
2071     for(y=0; y<h; y+=16){
2072         for(x=0; x<w; x+=16){
2073             int offset= x + y*stride;
2074             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
2075             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
2076             int sae = get_sae(src + offset, mean, stride);
2077
2078             acc+= sae + 500 < sad;
2079         }
2080     }
2081     return acc;
2082 }
2083
2084
2085 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2086     AVFrame *pic=NULL;
2087     int64_t pts;
2088     int i;
2089     const int encoding_delay= s->max_b_frames;
2090     int direct=1;
2091
2092     if(pic_arg){
2093         pts= pic_arg->pts;
2094         pic_arg->display_picture_number= s->input_picture_number++;
2095
2096         if(pts != AV_NOPTS_VALUE){
2097             if(s->user_specified_pts != AV_NOPTS_VALUE){
2098                 int64_t time= pts;
2099                 int64_t last= s->user_specified_pts;
2100
2101                 if(time <= last){
2102                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2103                     return -1;
2104                 }
2105             }
2106             s->user_specified_pts= pts;
2107         }else{
2108             if(s->user_specified_pts != AV_NOPTS_VALUE){
2109                 s->user_specified_pts=
2110                 pts= s->user_specified_pts + 1;
2111                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2112             }else{
2113                 pts= pic_arg->display_picture_number;
2114             }
2115         }
2116     }
2117
2118   if(pic_arg){
2119     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2120     if(pic_arg->linesize[0] != s->linesize) direct=0;
2121     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2122     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2123
2124 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2125
2126     if(direct){
2127         i= ff_find_unused_picture(s, 1);
2128
2129         pic= (AVFrame*)&s->picture[i];
2130         pic->reference= 3;
2131
2132         for(i=0; i<4; i++){
2133             pic->data[i]= pic_arg->data[i];
2134             pic->linesize[i]= pic_arg->linesize[i];
2135         }
2136         alloc_picture(s, (Picture*)pic, 1);
2137     }else{
2138         i= ff_find_unused_picture(s, 0);
2139
2140         pic= (AVFrame*)&s->picture[i];
2141         pic->reference= 3;
2142
2143         alloc_picture(s, (Picture*)pic, 0);
2144
2145         if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
2146            && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
2147            && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
2148        // empty
2149         }else{
2150             int h_chroma_shift, v_chroma_shift;
2151             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2152
2153             for(i=0; i<3; i++){
2154                 int src_stride= pic_arg->linesize[i];
2155                 int dst_stride= i ? s->uvlinesize : s->linesize;
2156                 int h_shift= i ? h_chroma_shift : 0;
2157                 int v_shift= i ? v_chroma_shift : 0;
2158                 int w= s->width >>h_shift;
2159                 int h= s->height>>v_shift;
2160                 uint8_t *src= pic_arg->data[i];
2161                 uint8_t *dst= pic->data[i];
2162
2163                 if(!s->avctx->rc_buffer_size)
2164                     dst +=INPLACE_OFFSET;
2165
2166                 if(src_stride==dst_stride)
2167                     memcpy(dst, src, src_stride*h);
2168                 else{
2169                     while(h--){
2170                         memcpy(dst, src, w);
2171                         dst += dst_stride;
2172                         src += src_stride;
2173                     }
2174                 }
2175             }
2176         }
2177     }
2178     copy_picture_attributes(s, pic, pic_arg);
2179     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2180   }
2181
2182     /* shift buffer entries */
2183     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2184         s->input_picture[i-1]= s->input_picture[i];
2185
2186     s->input_picture[encoding_delay]= (Picture*)pic;
2187
2188     return 0;
2189 }
2190
2191 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2192     int x, y, plane;
2193     int score=0;
2194     int64_t score64=0;
2195
2196     for(plane=0; plane<3; plane++){
2197         const int stride= p->linesize[plane];
2198         const int bw= plane ? 1 : 2;
2199         for(y=0; y<s->mb_height*bw; y++){
2200             for(x=0; x<s->mb_width*bw; x++){
2201                 int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2202                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2203
2204                 switch(s->avctx->frame_skip_exp){
2205                     case 0: score= FFMAX(score, v); break;
2206                     case 1: score+= FFABS(v);break;
2207                     case 2: score+= v*v;break;
2208                     case 3: score64+= FFABS(v*v*(int64_t)v);break;
2209                     case 4: score64+= v*v*(int64_t)(v*v);break;
2210                 }
2211             }
2212         }
2213     }
2214
2215     if(score) score64= score;
2216
2217     if(score64 < s->avctx->frame_skip_threshold)
2218         return 1;
2219     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2220         return 1;
2221     return 0;
2222 }
2223
2224 static int estimate_best_b_count(MpegEncContext *s){
2225     AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2226     AVCodecContext *c= avcodec_alloc_context();
2227     AVFrame input[FF_MAX_B_FRAMES+2];
2228     const int scale= s->avctx->brd_scale;
2229     int i, j, out_size, p_lambda, b_lambda, lambda2;
2230     int outbuf_size= s->width * s->height; //FIXME
2231     uint8_t *outbuf= av_malloc(outbuf_size);
2232     int64_t best_rd= INT64_MAX;
2233     int best_b_count= -1;
2234
2235     assert(scale>=0 && scale <=3);
2236
2237 //    emms_c();
2238     p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
2239     b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
2240     if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
2241     lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
2242
2243     c->width = s->width >> scale;
2244     c->height= s->height>> scale;
2245     c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2246     c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2247     c->mb_decision= s->avctx->mb_decision;
2248     c->me_cmp= s->avctx->me_cmp;
2249     c->mb_cmp= s->avctx->mb_cmp;
2250     c->me_sub_cmp= s->avctx->me_sub_cmp;
2251     c->pix_fmt = PIX_FMT_YUV420P;
2252     c->time_base= s->avctx->time_base;
2253     c->max_b_frames= s->max_b_frames;
2254
2255     if (avcodec_open(c, codec) < 0)
2256         return -1;
2257
2258     for(i=0; i<s->max_b_frames+2; i++){
2259         int ysize= c->width*c->height;
2260         int csize= (c->width/2)*(c->height/2);
2261         Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
2262
2263         avcodec_get_frame_defaults(&input[i]);
2264         input[i].data[0]= av_malloc(ysize + 2*csize);
2265         input[i].data[1]= input[i].data[0] + ysize;
2266         input[i].data[2]= input[i].data[1] + csize;
2267         input[i].linesize[0]= c->width;
2268         input[i].linesize[1]=
2269         input[i].linesize[2]= c->width/2;
2270
2271         if(pre_input_ptr && (!i || s->input_picture[i-1])) {
2272             pre_input= *pre_input_ptr;
2273
2274             if(pre_input.type != FF_BUFFER_TYPE_SHARED && i) {
2275                 pre_input.data[0]+=INPLACE_OFFSET;
2276                 pre_input.data[1]+=INPLACE_OFFSET;
2277                 pre_input.data[2]+=INPLACE_OFFSET;
2278             }
2279
2280             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0], pre_input.data[0], pre_input.linesize[0], c->width, c->height);
2281             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1], pre_input.data[1], pre_input.linesize[1], c->width>>1, c->height>>1);
2282             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2], pre_input.data[2], pre_input.linesize[2], c->width>>1, c->height>>1);
2283         }
2284     }
2285
2286     for(j=0; j<s->max_b_frames+1; j++){
2287         int64_t rd=0;
2288
2289         if(!s->input_picture[j])
2290             break;
2291
2292         c->error[0]= c->error[1]= c->error[2]= 0;
2293
2294         input[0].pict_type= I_TYPE;
2295         input[0].quality= 1 * FF_QP2LAMBDA;
2296         out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2297 //        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2298
2299         for(i=0; i<s->max_b_frames+1; i++){
2300             int is_p= i % (j+1) == j || i==s->max_b_frames;
2301
2302             input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2303             input[i+1].quality= is_p ? p_lambda : b_lambda;
2304             out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2305             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2306         }
2307
2308         /* get the delayed frames */
2309         while(out_size){
2310             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2311             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2312         }
2313
2314         rd += c->error[0] + c->error[1] + c->error[2];
2315
2316         if(rd < best_rd){
2317             best_rd= rd;
2318             best_b_count= j;
2319         }
2320     }
2321
2322     av_freep(&outbuf);
2323     avcodec_close(c);
2324     av_freep(&c);
2325
2326     for(i=0; i<s->max_b_frames+2; i++){
2327         av_freep(&input[i].data[0]);
2328     }
2329
2330     return best_b_count;
2331 }
2332
2333 static void select_input_picture(MpegEncContext *s){
2334     int i;
2335
2336     for(i=1; i<MAX_PICTURE_COUNT; i++)
2337         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2338     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2339
2340     /* set next picture type & ordering */
2341     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2342         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2343             s->reordered_input_picture[0]= s->input_picture[0];
2344             s->reordered_input_picture[0]->pict_type= I_TYPE;
2345             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2346         }else{
2347             int b_frames;
2348
2349             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2350                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2351                 //FIXME check that te gop check above is +-1 correct
2352 //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2353
2354                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2355                         for(i=0; i<4; i++)
2356                             s->input_picture[0]->data[i]= NULL;
2357                         s->input_picture[0]->type= 0;
2358                     }else{
2359                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2360                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2361
2362                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2363                     }
2364
2365                     emms_c();
2366                     ff_vbv_update(s, 0);
2367
2368                     goto no_output_pic;
2369                 }
2370             }
2371
2372             if(s->flags&CODEC_FLAG_PASS2){
2373                 for(i=0; i<s->max_b_frames+1; i++){
2374                     int pict_num= s->input_picture[0]->display_picture_number + i;
2375
2376                     if(pict_num >= s->rc_context.num_entries)
2377                         break;
2378                     if(!s->input_picture[i]){
2379                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2380                         break;
2381                     }
2382
2383                     s->input_picture[i]->pict_type=
2384                         s->rc_context.entry[pict_num].new_pict_type;
2385                 }
2386             }
2387
2388             if(s->avctx->b_frame_strategy==0){
2389                 b_frames= s->max_b_frames;
2390                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2391             }else if(s->avctx->b_frame_strategy==1){
2392                 for(i=1; i<s->max_b_frames+1; i++){
2393                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2394                         s->input_picture[i]->b_frame_score=
2395                             get_intra_count(s, s->input_picture[i  ]->data[0],
2396                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2397                     }
2398                 }
2399                 for(i=0; i<s->max_b_frames+1; i++){
2400                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/s->avctx->b_sensitivity) break;
2401                 }
2402
2403                 b_frames= FFMAX(0, i-1);
2404
2405                 /* reset scores */
2406                 for(i=0; i<b_frames+1; i++){
2407                     s->input_picture[i]->b_frame_score=0;
2408                 }
2409             }else if(s->avctx->b_frame_strategy==2){
2410                 b_frames= estimate_best_b_count(s);
2411             }else{
2412                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2413                 b_frames=0;
2414             }
2415
2416             emms_c();
2417 //static int b_count=0;
2418 //b_count+= b_frames;
2419 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2420
2421             for(i= b_frames - 1; i>=0; i--){
2422                 int type= s->input_picture[i]->pict_type;
2423                 if(type && type != B_TYPE)
2424                     b_frames= i;
2425             }
2426             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2427                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2428             }
2429
2430             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2431               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2432                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2433               }else{
2434                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2435                     b_frames=0;
2436                 s->input_picture[b_frames]->pict_type= I_TYPE;
2437               }
2438             }
2439
2440             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2441                && b_frames
2442                && s->input_picture[b_frames]->pict_type== I_TYPE)
2443                 b_frames--;
2444
2445             s->reordered_input_picture[0]= s->input_picture[b_frames];
2446             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2447                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2448             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2449             for(i=0; i<b_frames; i++){
2450                 s->reordered_input_picture[i+1]= s->input_picture[i];
2451                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2452                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2453             }
2454         }
2455     }
2456 no_output_pic:
2457     if(s->reordered_input_picture[0]){
2458         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2459
2460         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2461
2462         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED || s->avctx->rc_buffer_size){
2463             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2464
2465             int i= ff_find_unused_picture(s, 0);
2466             Picture *pic= &s->picture[i];
2467
2468             pic->reference              = s->reordered_input_picture[0]->reference;
2469             alloc_picture(s, pic, 0);
2470
2471             /* mark us unused / free shared pic */
2472             if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_INTERNAL)
2473                 s->avctx->release_buffer(s->avctx, (AVFrame*)s->reordered_input_picture[0]);
2474             for(i=0; i<4; i++)
2475                 s->reordered_input_picture[0]->data[i]= NULL;
2476             s->reordered_input_picture[0]->type= 0;
2477
2478             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2479
2480             s->current_picture_ptr= pic;
2481         }else{
2482             // input is not a shared pix -> reuse buffer for current_pix
2483
2484             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2485                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2486
2487             s->current_picture_ptr= s->reordered_input_picture[0];
2488             for(i=0; i<4; i++){
2489                 s->new_picture.data[i]+= INPLACE_OFFSET;
2490             }
2491         }
2492         copy_picture(&s->current_picture, s->current_picture_ptr);
2493
2494         s->picture_number= s->new_picture.display_picture_number;
2495 //printf("dpn:%d\n", s->picture_number);
2496     }else{
2497        memset(&s->new_picture, 0, sizeof(Picture));
2498     }
2499 }
2500
2501 int MPV_encode_picture(AVCodecContext *avctx,
2502                        unsigned char *buf, int buf_size, void *data)
2503 {
2504     MpegEncContext *s = avctx->priv_data;
2505     AVFrame *pic_arg = data;
2506     int i, stuffing_count;
2507
2508     for(i=0; i<avctx->thread_count; i++){
2509         int start_y= s->thread_context[i]->start_mb_y;
2510         int   end_y= s->thread_context[i]->  end_mb_y;
2511         int h= s->mb_height;
2512         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2513         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2514
2515         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2516     }
2517
2518     s->picture_in_gop_number++;
2519
2520     if(load_input_picture(s, pic_arg) < 0)
2521         return -1;
2522
2523     select_input_picture(s);
2524
2525     /* output? */
2526     if(s->new_picture.data[0]){
2527         s->pict_type= s->new_picture.pict_type;
2528 //emms_c();
2529 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2530         MPV_frame_start(s, avctx);
2531 vbv_retry:
2532         if (encode_picture(s, s->picture_number) < 0)
2533             return -1;
2534
2535         avctx->real_pict_num  = s->picture_number;
2536         avctx->header_bits = s->header_bits;
2537         avctx->mv_bits     = s->mv_bits;
2538         avctx->misc_bits   = s->misc_bits;
2539         avctx->i_tex_bits  = s->i_tex_bits;
2540         avctx->p_tex_bits  = s->p_tex_bits;
2541         avctx->i_count     = s->i_count;
2542         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2543         avctx->skip_count  = s->skip_count;
2544
2545         MPV_frame_end(s);
2546
2547         if (ENABLE_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
2548             ff_mjpeg_encode_picture_trailer(s);
2549
2550         if(avctx->rc_buffer_size){
2551             RateControlContext *rcc= &s->rc_context;
2552             int max_size= rcc->buffer_index/3;
2553
2554             if(put_bits_count(&s->pb) > max_size && s->lambda < s->avctx->lmax){
2555                 s->next_lambda= FFMAX(s->lambda+1, s->lambda*(s->qscale+1) / s->qscale);
2556                 if(s->adaptive_quant){
2557                     int i;
2558                     for(i=0; i<s->mb_height*s->mb_stride; i++)
2559                         s->lambda_table[i]= FFMAX(s->lambda_table[i]+1, s->lambda_table[i]*(s->qscale+1) / s->qscale);
2560                 }
2561                 s->mb_skipped = 0;        //done in MPV_frame_start()
2562                 if(s->pict_type==P_TYPE){ //done in encode_picture() so we must undo it
2563                     if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
2564                         s->no_rounding ^= 1;
2565                 }
2566                 if(s->pict_type!=B_TYPE){
2567                     s->time_base= s->last_time_base;
2568                     s->last_non_b_time= s->time - s->pp_time;
2569                 }
2570 //                av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
2571                 for(i=0; i<avctx->thread_count; i++){
2572                     PutBitContext *pb= &s->thread_context[i]->pb;
2573                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
2574                 }
2575                 goto vbv_retry;
2576             }
2577
2578             assert(s->avctx->rc_max_rate);
2579         }
2580
2581         if(s->flags&CODEC_FLAG_PASS1)
2582             ff_write_pass1_stats(s);
2583
2584         for(i=0; i<4; i++){
2585             s->current_picture_ptr->error[i]= s->current_picture.error[i];
2586             avctx->error[i] += s->current_picture_ptr->error[i];
2587         }
2588
2589         if(s->flags&CODEC_FLAG_PASS1)
2590             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2591         flush_put_bits(&s->pb);
2592         s->frame_bits  = put_bits_count(&s->pb);
2593
2594         stuffing_count= ff_vbv_update(s, s->frame_bits);
2595         if(stuffing_count){
2596             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2597                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2598                 return -1;
2599             }
2600
2601             switch(s->codec_id){
2602             case CODEC_ID_MPEG1VIDEO:
2603             case CODEC_ID_MPEG2VIDEO:
2604                 while(stuffing_count--){
2605                     put_bits(&s->pb, 8, 0);
2606                 }
2607             break;
2608             case CODEC_ID_MPEG4:
2609                 put_bits(&s->pb, 16, 0);
2610                 put_bits(&s->pb, 16, 0x1C3);
2611                 stuffing_count -= 4;
2612                 while(stuffing_count--){
2613                     put_bits(&s->pb, 8, 0xFF);
2614                 }
2615             break;
2616             default:
2617                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2618             }
2619             flush_put_bits(&s->pb);
2620             s->frame_bits  = put_bits_count(&s->pb);
2621         }
2622
2623         /* update mpeg1/2 vbv_delay for CBR */
2624         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2625            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2626             int vbv_delay;
2627
2628             assert(s->repeat_first_field==0);
2629
2630             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2631             assert(vbv_delay < 0xFFFF);
2632
2633             s->vbv_delay_ptr[0] &= 0xF8;
2634             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2635             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2636             s->vbv_delay_ptr[2] &= 0x07;
2637             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2638         }
2639         s->total_bits += s->frame_bits;
2640         avctx->frame_bits  = s->frame_bits;
2641     }else{
2642         assert((pbBufPtr(&s->pb) == s->pb.buf));
2643         s->frame_bits=0;
2644     }
2645     assert((s->frame_bits&7)==0);
2646
2647     return s->frame_bits/8;
2648 }
2649
2650 #endif //CONFIG_ENCODERS
2651
2652 static inline void gmc1_motion(MpegEncContext *s,
2653                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2654                                uint8_t **ref_picture)
2655 {
2656     uint8_t *ptr;
2657     int offset, src_x, src_y, linesize, uvlinesize;
2658     int motion_x, motion_y;
2659     int emu=0;
2660
2661     motion_x= s->sprite_offset[0][0];
2662     motion_y= s->sprite_offset[0][1];
2663     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2664     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2665     motion_x<<=(3-s->sprite_warping_accuracy);
2666     motion_y<<=(3-s->sprite_warping_accuracy);
2667     src_x = av_clip(src_x, -16, s->width);
2668     if (src_x == s->width)
2669         motion_x =0;
2670     src_y = av_clip(src_y, -16, s->height);
2671     if (src_y == s->height)
2672         motion_y =0;
2673
2674     linesize = s->linesize;
2675     uvlinesize = s->uvlinesize;
2676
2677     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2678
2679     if(s->flags&CODEC_FLAG_EMU_EDGE){
2680         if(   (unsigned)src_x >= s->h_edge_pos - 17
2681            || (unsigned)src_y >= s->v_edge_pos - 17){
2682             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2683             ptr= s->edge_emu_buffer;
2684         }
2685     }
2686
2687     if((motion_x|motion_y)&7){
2688         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2689         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2690     }else{
2691         int dxy;
2692
2693         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2694         if (s->no_rounding){
2695             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2696         }else{
2697             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2698         }
2699     }
2700
2701     if(s->flags&CODEC_FLAG_GRAY) return;
2702
2703     motion_x= s->sprite_offset[1][0];
2704     motion_y= s->sprite_offset[1][1];
2705     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2706     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2707     motion_x<<=(3-s->sprite_warping_accuracy);
2708     motion_y<<=(3-s->sprite_warping_accuracy);
2709     src_x = av_clip(src_x, -8, s->width>>1);
2710     if (src_x == s->width>>1)
2711         motion_x =0;
2712     src_y = av_clip(src_y, -8, s->height>>1);
2713     if (src_y == s->height>>1)
2714         motion_y =0;
2715
2716     offset = (src_y * uvlinesize) + src_x;
2717     ptr = ref_picture[1] + offset;
2718     if(s->flags&CODEC_FLAG_EMU_EDGE){
2719         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2720            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2721             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2722             ptr= s->edge_emu_buffer;
2723             emu=1;
2724         }
2725     }
2726     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2727
2728     ptr = ref_picture[2] + offset;
2729     if(emu){
2730         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2731         ptr= s->edge_emu_buffer;
2732     }
2733     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2734
2735     return;
2736 }
2737
2738 static inline void gmc_motion(MpegEncContext *s,
2739                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2740                                uint8_t **ref_picture)
2741 {
2742     uint8_t *ptr;
2743     int linesize, uvlinesize;
2744     const int a= s->sprite_warping_accuracy;
2745     int ox, oy;
2746
2747     linesize = s->linesize;
2748     uvlinesize = s->uvlinesize;
2749
2750     ptr = ref_picture[0];
2751
2752     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2753     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2754
2755     s->dsp.gmc(dest_y, ptr, linesize, 16,
2756            ox,
2757            oy,
2758            s->sprite_delta[0][0], s->sprite_delta[0][1],
2759            s->sprite_delta[1][0], s->sprite_delta[1][1],
2760            a+1, (1<<(2*a+1)) - s->no_rounding,
2761            s->h_edge_pos, s->v_edge_pos);
2762     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2763            ox + s->sprite_delta[0][0]*8,
2764            oy + s->sprite_delta[1][0]*8,
2765            s->sprite_delta[0][0], s->sprite_delta[0][1],
2766            s->sprite_delta[1][0], s->sprite_delta[1][1],
2767            a+1, (1<<(2*a+1)) - s->no_rounding,
2768            s->h_edge_pos, s->v_edge_pos);
2769
2770     if(s->flags&CODEC_FLAG_GRAY) return;
2771
2772     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2773     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2774
2775     ptr = ref_picture[1];
2776     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2777            ox,
2778            oy,
2779            s->sprite_delta[0][0], s->sprite_delta[0][1],
2780            s->sprite_delta[1][0], s->sprite_delta[1][1],
2781            a+1, (1<<(2*a+1)) - s->no_rounding,
2782            s->h_edge_pos>>1, s->v_edge_pos>>1);
2783
2784     ptr = ref_picture[2];
2785     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2786            ox,
2787            oy,
2788            s->sprite_delta[0][0], s->sprite_delta[0][1],
2789            s->sprite_delta[1][0], s->sprite_delta[1][1],
2790            a+1, (1<<(2*a+1)) - s->no_rounding,
2791            s->h_edge_pos>>1, s->v_edge_pos>>1);
2792 }
2793
2794 /**
2795  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2796  * @param buf destination buffer
2797  * @param src source buffer
2798  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2799  * @param block_w width of block
2800  * @param block_h height of block
2801  * @param src_x x coordinate of the top left sample of the block in the source buffer
2802  * @param src_y y coordinate of the top left sample of the block in the source buffer
2803  * @param w width of the source buffer
2804  * @param h height of the source buffer
2805  */
2806 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2807                                     int src_x, int src_y, int w, int h){
2808     int x, y;
2809     int start_y, start_x, end_y, end_x;
2810
2811     if(src_y>= h){
2812         src+= (h-1-src_y)*linesize;
2813         src_y=h-1;
2814     }else if(src_y<=-block_h){
2815         src+= (1-block_h-src_y)*linesize;
2816         src_y=1-block_h;
2817     }
2818     if(src_x>= w){
2819         src+= (w-1-src_x);
2820         src_x=w-1;
2821     }else if(src_x<=-block_w){
2822         src+= (1-block_w-src_x);
2823         src_x=1-block_w;
2824     }
2825
2826     start_y= FFMAX(0, -src_y);
2827     start_x= FFMAX(0, -src_x);
2828     end_y= FFMIN(block_h, h-src_y);
2829     end_x= FFMIN(block_w, w-src_x);
2830
2831     // copy existing part
2832     for(y=start_y; y<end_y; y++){
2833         for(x=start_x; x<end_x; x++){
2834             buf[x + y*linesize]= src[x + y*linesize];
2835         }
2836     }
2837
2838     //top
2839     for(y=0; y<start_y; y++){
2840         for(x=start_x; x<end_x; x++){
2841             buf[x + y*linesize]= buf[x + start_y*linesize];
2842         }
2843     }
2844
2845     //bottom
2846     for(y=end_y; y<block_h; y++){
2847         for(x=start_x; x<end_x; x++){
2848             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2849         }
2850     }
2851
2852     for(y=0; y<block_h; y++){
2853        //left
2854         for(x=0; x<start_x; x++){
2855             buf[x + y*linesize]= buf[start_x + y*linesize];
2856         }
2857
2858        //right
2859         for(x=end_x; x<block_w; x++){
2860             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2861         }
2862     }
2863 }
2864
2865 static inline int hpel_motion(MpegEncContext *s,
2866                                   uint8_t *dest, uint8_t *src,
2867                                   int field_based, int field_select,
2868                                   int src_x, int src_y,
2869                                   int width, int height, int stride,
2870                                   int h_edge_pos, int v_edge_pos,
2871                                   int w, int h, op_pixels_func *pix_op,
2872                                   int motion_x, int motion_y)
2873 {
2874     int dxy;
2875     int emu=0;
2876
2877     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2878     src_x += motion_x >> 1;
2879     src_y += motion_y >> 1;
2880
2881     /* WARNING: do no forget half pels */
2882     src_x = av_clip(src_x, -16, width); //FIXME unneeded for emu?
2883     if (src_x == width)
2884         dxy &= ~1;
2885     src_y = av_clip(src_y, -16, height);
2886     if (src_y == height)
2887         dxy &= ~2;
2888     src += src_y * stride + src_x;
2889
2890     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2891         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2892            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2893             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2894                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2895             src= s->edge_emu_buffer;
2896             emu=1;
2897         }
2898     }
2899     if(field_select)
2900         src += s->linesize;
2901     pix_op[dxy](dest, src, stride, h);
2902     return emu;
2903 }
2904
2905 static inline int hpel_motion_lowres(MpegEncContext *s,
2906                                   uint8_t *dest, uint8_t *src,
2907                                   int field_based, int field_select,
2908                                   int src_x, int src_y,
2909                                   int width, int height, int stride,
2910                                   int h_edge_pos, int v_edge_pos,
2911                                   int w, int h, h264_chroma_mc_func *pix_op,
2912                                   int motion_x, int motion_y)
2913 {
2914     const int lowres= s->avctx->lowres;
2915     const int s_mask= (2<<lowres)-1;
2916     int emu=0;
2917     int sx, sy;
2918
2919     if(s->quarter_sample){
2920         motion_x/=2;
2921         motion_y/=2;
2922     }
2923
2924     sx= motion_x & s_mask;
2925     sy= motion_y & s_mask;
2926     src_x += motion_x >> (lowres+1);
2927     src_y += motion_y >> (lowres+1);
2928
2929     src += src_y * stride + src_x;
2930
2931     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2932        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2933         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2934                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2935         src= s->edge_emu_buffer;
2936         emu=1;
2937     }
2938
2939     sx <<= 2 - lowres;
2940     sy <<= 2 - lowres;
2941     if(field_select)
2942         src += s->linesize;
2943     pix_op[lowres](dest, src, stride, h, sx, sy);
2944     return emu;
2945 }
2946
2947 /* apply one mpeg motion vector to the three components */
2948 static av_always_inline void mpeg_motion(MpegEncContext *s,
2949                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2950                                int field_based, int bottom_field, int field_select,
2951                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2952                                int motion_x, int motion_y, int h)
2953 {
2954     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2955     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2956
2957 #if 0
2958 if(s->quarter_sample)
2959 {
2960     motion_x>>=1;
2961     motion_y>>=1;
2962 }
2963 #endif
2964
2965     v_edge_pos = s->v_edge_pos >> field_based;
2966     linesize   = s->current_picture.linesize[0] << field_based;
2967     uvlinesize = s->current_picture.linesize[1] << field_based;
2968
2969     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2970     src_x = s->mb_x* 16               + (motion_x >> 1);
2971     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2972
2973     if (s->out_format == FMT_H263) {
2974         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2975             mx = (motion_x>>1)|(motion_x&1);
2976             my = motion_y >>1;
2977             uvdxy = ((my & 1) << 1) | (mx & 1);
2978             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2979             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2980         }else{
2981             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2982             uvsrc_x = src_x>>1;
2983             uvsrc_y = src_y>>1;
2984         }
2985     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2986         mx = motion_x / 4;
2987         my = motion_y / 4;
2988         uvdxy = 0;
2989         uvsrc_x = s->mb_x*8 + mx;
2990         uvsrc_y = s->mb_y*8 + my;
2991     } else {
2992         if(s->chroma_y_shift){
2993             mx = motion_x / 2;
2994             my = motion_y / 2;
2995             uvdxy = ((my & 1) << 1) | (mx & 1);
2996             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2997             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2998         } else {
2999             if(s->chroma_x_shift){
3000             //Chroma422
3001                 mx = motion_x / 2;
3002                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
3003                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
3004                 uvsrc_y = src_y;
3005             } else {
3006             //Chroma444
3007                 uvdxy = dxy;
3008                 uvsrc_x = src_x;
3009                 uvsrc_y = src_y;
3010             }
3011         }
3012     }
3013
3014     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3015     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3016     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3017
3018     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
3019        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
3020             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
3021                s->codec_id == CODEC_ID_MPEG1VIDEO){
3022                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
3023                 return ;
3024             }
3025             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3026                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3027             ptr_y = s->edge_emu_buffer;
3028             if(!(s->flags&CODEC_FLAG_GRAY)){
3029                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3030                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3031                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3032                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3033                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3034                 ptr_cb= uvbuf;
3035                 ptr_cr= uvbuf+16;
3036             }
3037     }
3038
3039     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3040         dest_y += s->linesize;
3041         dest_cb+= s->uvlinesize;
3042         dest_cr+= s->uvlinesize;
3043     }
3044
3045     if(field_select){
3046         ptr_y += s->linesize;
3047         ptr_cb+= s->uvlinesize;
3048         ptr_cr+= s->uvlinesize;
3049     }
3050
3051     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
3052
3053     if(!(s->flags&CODEC_FLAG_GRAY)){
3054         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
3055         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
3056     }
3057     if((ENABLE_H261_ENCODER || ENABLE_H261_DECODER) && s->out_format == FMT_H261){
3058         ff_h261_loop_filter(s);
3059     }
3060 }
3061
3062 /* apply one mpeg motion vector to the three components */
3063 static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
3064                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3065                                int field_based, int bottom_field, int field_select,
3066                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
3067                                int motion_x, int motion_y, int h)
3068 {
3069     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3070     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
3071     const int lowres= s->avctx->lowres;
3072     const int block_s= 8>>lowres;
3073     const int s_mask= (2<<lowres)-1;
3074     const int h_edge_pos = s->h_edge_pos >> lowres;
3075     const int v_edge_pos = s->v_edge_pos >> lowres;
3076     linesize   = s->current_picture.linesize[0] << field_based;
3077     uvlinesize = s->current_picture.linesize[1] << field_based;
3078
3079     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
3080         motion_x/=2;
3081         motion_y/=2;
3082     }
3083
3084     if(field_based){
3085         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
3086     }
3087
3088     sx= motion_x & s_mask;
3089     sy= motion_y & s_mask;
3090     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
3091     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
3092
3093     if (s->out_format == FMT_H263) {
3094         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
3095         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
3096         uvsrc_x = src_x>>1;
3097         uvsrc_y = src_y>>1;
3098     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
3099         mx = motion_x / 4;
3100         my = motion_y / 4;
3101         uvsx = (2*mx) & s_mask;
3102         uvsy = (2*my) & s_mask;
3103         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
3104         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
3105     } else {
3106         mx = motion_x / 2;
3107         my = motion_y / 2;
3108         uvsx = mx & s_mask;
3109         uvsy = my & s_mask;
3110         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
3111         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
3112     }
3113
3114     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3115     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3116     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3117
3118     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
3119        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
3120             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3121                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3122             ptr_y = s->edge_emu_buffer;
3123             if(!(s->flags&CODEC_FLAG_GRAY)){
3124                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3125                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3126                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3127                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3128                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3129                 ptr_cb= uvbuf;
3130                 ptr_cr= uvbuf+16;
3131             }
3132     }
3133
3134     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3135         dest_y += s->linesize;
3136         dest_cb+= s->uvlinesize;
3137         dest_cr+= s->uvlinesize;
3138     }
3139
3140     if(field_select){
3141         ptr_y += s->linesize;
3142         ptr_cb+= s->uvlinesize;
3143         ptr_cr+= s->uvlinesize;
3144     }
3145
3146     sx <<= 2 - lowres;
3147     sy <<= 2 - lowres;
3148     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3149
3150     if(!(s->flags&CODEC_FLAG_GRAY)){
3151         uvsx <<= 2 - lowres;
3152         uvsy <<= 2 - lowres;
3153         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3154         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3155     }
3156     //FIXME h261 lowres loop filter
3157 }
3158
3159 //FIXME move to dsputil, avg variant, 16x16 version
3160 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3161     int x;
3162     uint8_t * const top   = src[1];
3163     uint8_t * const left  = src[2];
3164     uint8_t * const mid   = src[0];
3165     uint8_t * const right = src[3];
3166     uint8_t * const bottom= src[4];
3167 #define OBMC_FILTER(x, t, l, m, r, b)\
3168     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3169 #define OBMC_FILTER4(x, t, l, m, r, b)\
3170     OBMC_FILTER(x         , t, l, m, r, b);\
3171     OBMC_FILTER(x+1       , t, l, m, r, b);\
3172     OBMC_FILTER(x  +stride, t, l, m, r, b);\
3173     OBMC_FILTER(x+1+stride, t, l, m, r, b);
3174
3175     x=0;
3176     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3177     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3178     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3179     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3180     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3181     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3182     x+= stride;
3183     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3184     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3185     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3186     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3187     x+= stride;
3188     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3189     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3190     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3191     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3192     x+= 2*stride;
3193     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3194     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3195     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3196     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3197     x+= 2*stride;
3198     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3199     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3200     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3201     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3202     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3203     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3204     x+= stride;
3205     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3206     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3207     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3208     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3209 }
3210
3211 /* obmc for 1 8x8 luma block */
3212 static inline void obmc_motion(MpegEncContext *s,
3213                                uint8_t *dest, uint8_t *src,
3214                                int src_x, int src_y,
3215                                op_pixels_func *pix_op,
3216                                int16_t mv[5][2]/* mid top left right bottom*/)
3217 #define MID    0
3218 {
3219     int i;
3220     uint8_t *ptr[5];
3221
3222     assert(s->quarter_sample==0);
3223
3224     for(i=0; i<5; i++){
3225         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3226             ptr[i]= ptr[MID];
3227         }else{
3228             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3229             hpel_motion(s, ptr[i], src, 0, 0,
3230                         src_x, src_y,
3231                         s->width, s->height, s->linesize,
3232                         s->h_edge_pos, s->v_edge_pos,
3233                         8, 8, pix_op,
3234                         mv[i][0], mv[i][1]);
3235         }
3236     }
3237
3238     put_obmc(dest, ptr, s->linesize);
3239 }
3240
3241 static inline void qpel_motion(MpegEncContext *s,
3242                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3243                                int field_based, int bottom_field, int field_select,
3244                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3245                                qpel_mc_func (*qpix_op)[16],
3246                                int motion_x, int motion_y, int h)
3247 {
3248     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3249     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3250
3251     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3252     src_x = s->mb_x *  16                 + (motion_x >> 2);
3253     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3254
3255     v_edge_pos = s->v_edge_pos >> field_based;
3256     linesize = s->linesize << field_based;
3257     uvlinesize = s->uvlinesize << field_based;
3258
3259     if(field_based){
3260         mx= motion_x/2;
3261         my= motion_y>>1;
3262     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3263         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3264         mx= (motion_x>>1) + rtab[motion_x&7];
3265         my= (motion_y>>1) + rtab[motion_y&7];
3266     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3267         mx= (motion_x>>1)|(motion_x&1);
3268         my= (motion_y>>1)|(motion_y&1);
3269     }else{
3270         mx= motion_x/2;
3271         my= motion_y/2;
3272     }
3273     mx= (mx>>1)|(mx&1);
3274     my= (my>>1)|(my&1);
3275
3276     uvdxy= (mx&1) | ((my&1)<<1);
3277     mx>>=1;
3278     my>>=1;
3279
3280     uvsrc_x = s->mb_x *  8                 + mx;
3281     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3282
3283     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3284     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3285     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3286
3287     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3288        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3289         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3290                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3291         ptr_y= s->edge_emu_buffer;
3292         if(!(s->flags&CODEC_FLAG_GRAY)){
3293             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3294             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3295                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3296             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3297                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3298             ptr_cb= uvbuf;
3299             ptr_cr= uvbuf + 16;
3300         }
3301     }
3302
3303     if(!field_based)
3304         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3305     else{
3306         if(bottom_field){
3307             dest_y += s->linesize;
3308             dest_cb+= s->uvlinesize;
3309             dest_cr+= s->uvlinesize;
3310         }
3311
3312         if(field_select){
3313             ptr_y  += s->linesize;
3314             ptr_cb += s->uvlinesize;
3315             ptr_cr += s->uvlinesize;
3316         }
3317         //damn interlaced mode
3318         //FIXME boundary mirroring is not exactly correct here
3319         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3320         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3321     }
3322     if(!(s->flags&CODEC_FLAG_GRAY)){
3323         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3324         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3325     }
3326 }
3327
3328 inline int ff_h263_round_chroma(int x){
3329     if (x >= 0)
3330         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3331     else {
3332         x = -x;
3333         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3334     }
3335 }
3336
3337 /**
3338  * h263 chorma 4mv motion compensation.
3339  */
3340 static inline void chroma_4mv_motion(MpegEncContext *s,
3341                                      uint8_t *dest_cb, uint8_t *dest_cr,
3342                                      uint8_t **ref_picture,
3343                                      op_pixels_func *pix_op,
3344                                      int mx, int my){
3345     int dxy, emu=0, src_x, src_y, offset;
3346     uint8_t *ptr;
3347
3348     /* In case of 8X8, we construct a single chroma motion vector
3349        with a special rounding */
3350     mx= ff_h263_round_chroma(mx);
3351     my= ff_h263_round_chroma(my);
3352
3353     dxy = ((my & 1) << 1) | (mx & 1);
3354     mx >>= 1;
3355     my >>= 1;
3356
3357     src_x = s->mb_x * 8 + mx;
3358     src_y = s->mb_y * 8 + my;
3359     src_x = av_clip(src_x, -8, s->width/2);
3360     if (src_x == s->width/2)
3361         dxy &= ~1;
3362     src_y = av_clip(src_y, -8, s->height/2);
3363     if (src_y == s->height/2)
3364         dxy &= ~2;
3365
3366     offset = (src_y * (s->uvlinesize)) + src_x;
3367     ptr = ref_picture[1] + offset;
3368     if(s->flags&CODEC_FLAG_EMU_EDGE){
3369         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3370            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3371             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3372             ptr= s->edge_emu_buffer;
3373             emu=1;
3374         }
3375     }
3376     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3377
3378     ptr = ref_picture[2] + offset;
3379     if(emu){
3380         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3381         ptr= s->edge_emu_buffer;
3382     }
3383     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3384 }
3385
3386 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3387                                      uint8_t *dest_cb, uint8_t *dest_cr,
3388                                      uint8_t **ref_picture,
3389                                      h264_chroma_mc_func *pix_op,
3390                                      int mx, int my){
3391     const int lowres= s->avctx->lowres;
3392     const int block_s= 8>>lowres;
3393     const int s_mask= (2<<lowres)-1;
3394     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3395     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3396     int emu=0, src_x, src_y, offset, sx, sy;
3397     uint8_t *ptr;
3398
3399     if(s->quarter_sample){
3400         mx/=2;
3401         my/=2;
3402     }
3403
3404     /* In case of 8X8, we construct a single chroma motion vector
3405        with a special rounding */
3406     mx= ff_h263_round_chroma(mx);
3407     my= ff_h263_round_chroma(my);
3408
3409     sx= mx & s_mask;
3410     sy= my & s_mask;
3411     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3412     src_y = s->mb_y*block_s + (my >> (lowres+1));
3413
3414     offset = src_y * s->uvlinesize + src_x;
3415     ptr = ref_picture[1] + offset;
3416     if(s->flags&CODEC_FLAG_EMU_EDGE){
3417         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3418            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3419             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3420             ptr= s->edge_emu_buffer;
3421             emu=1;
3422         }
3423     }
3424     sx <<= 2 - lowres;
3425     sy <<= 2 - lowres;
3426     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3427
3428     ptr = ref_picture[2] + offset;
3429     if(emu){
3430         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3431         ptr= s->edge_emu_buffer;
3432     }
3433     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3434 }
3435
3436 static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
3437     /* fetch pixels for estimated mv 4 macroblocks ahead
3438      * optimized for 64byte cache lines */
3439     const int shift = s->quarter_sample ? 2 : 1;
3440     const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8;
3441     const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y;
3442     int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
3443     s->dsp.prefetch(pix[0]+off, s->linesize, 4);
3444     off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3445     s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2);
3446 }
3447
3448 /**
3449  * motion compensation of a single macroblock
3450  * @param s context
3451  * @param dest_y luma destination pointer
3452  * @param dest_cb chroma cb/u destination pointer
3453  * @param dest_cr chroma cr/v destination pointer
3454  * @param dir direction (0->forward, 1->backward)
3455  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3456  * @param pic_op halfpel motion compensation function (average or put normally)
3457  * @param pic_op qpel motion compensation function (average or put normally)
3458  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3459  */
3460 static inline void MPV_motion(MpegEncContext *s,
3461                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3462                               int dir, uint8_t **ref_picture,
3463                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3464 {
3465     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3466     int mb_x, mb_y, i;
3467     uint8_t *ptr, *dest;
3468
3469     mb_x = s->mb_x;
3470     mb_y = s->mb_y;
3471
3472     prefetch_motion(s, ref_picture, dir);
3473
3474     if(s->obmc && s->pict_type != B_TYPE){
3475         int16_t mv_cache[4][4][2];
3476         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3477         const int mot_stride= s->b8_stride;
3478         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3479
3480         assert(!s->mb_skipped);
3481
3482         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3483         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3484         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3485
3486         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3487             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3488         }else{
3489             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3490         }
3491
3492         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3493             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3494             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3495         }else{
3496             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3497             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3498         }
3499
3500         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3501             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3502             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3503         }else{
3504             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3505             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3506         }
3507
3508         mx = 0;
3509         my = 0;
3510         for(i=0;i<4;i++) {
3511             const int x= (i&1)+1;
3512             const int y= (i>>1)+1;
3513             int16_t mv[5][2]= {
3514                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3515                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3516                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3517                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3518                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3519             //FIXME cleanup
3520             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3521                         ref_picture[0],
3522                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3523                         pix_op[1],
3524                         mv);
3525
3526             mx += mv[0][0];
3527             my += mv[0][1];
3528         }
3529         if(!(s->flags&CODEC_FLAG_GRAY))
3530             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3531
3532         return;
3533     }
3534
3535     switch(s->mv_type) {
3536     case MV_TYPE_16X16:
3537         if(s->mcsel){
3538             if(s->real_sprite_warping_points==1){
3539                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3540                             ref_picture);
3541             }else{
3542                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3543                             ref_picture);
3544             }
3545         }else if(s->quarter_sample){
3546             qpel_motion(s, dest_y, dest_cb, dest_cr,
3547                         0, 0, 0,
3548                         ref_picture, pix_op, qpix_op,
3549                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3550         }else if(ENABLE_WMV2 && s->mspel){
3551             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3552                         ref_picture, pix_op,
3553                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3554         }else
3555         {
3556             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3557                         0, 0, 0,
3558                         ref_picture, pix_op,
3559                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3560         }
3561         break;
3562     case MV_TYPE_8X8:
3563         mx = 0;
3564         my = 0;
3565         if(s->quarter_sample){
3566             for(i=0;i<4;i++) {
3567                 motion_x = s->mv[dir][i][0];
3568                 motion_y = s->mv[dir][i][1];
3569
3570                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3571                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3572                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3573
3574                 /* WARNING: do no forget half pels */
3575                 src_x = av_clip(src_x, -16, s->width);
3576                 if (src_x == s->width)
3577                     dxy &= ~3;
3578                 src_y = av_clip(src_y, -16, s->height);
3579                 if (src_y == s->height)
3580                     dxy &= ~12;
3581
3582                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3583                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3584                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3585                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3586                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3587                         ptr= s->edge_emu_buffer;
3588                     }
3589                 }
3590                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3591                 qpix_op[1][dxy](dest, ptr, s->linesize);
3592
3593                 mx += s->mv[dir][i][0]/2;
3594                 my += s->mv[dir][i][1]/2;
3595             }
3596         }else{
3597             for(i=0;i<4;i++) {
3598                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3599                             ref_picture[0], 0, 0,
3600                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3601                             s->width, s->height, s->linesize,
3602                             s->h_edge_pos, s->v_edge_pos,
3603                             8, 8, pix_op[1],
3604                             s->mv[dir][i][0], s->mv[dir][i][1]);
3605
3606                 mx += s->mv[dir][i][0];
3607                 my += s->mv[dir][i][1];
3608             }
3609         }
3610
3611         if(!(s->flags&CODEC_FLAG_GRAY))
3612             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3613         break;
3614     case MV_TYPE_FIELD:
3615         if (s->picture_structure == PICT_FRAME) {
3616             if(s->quarter_sample){
3617                 for(i=0; i<2; i++){
3618                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3619                                 1, i, s->field_select[dir][i],
3620                                 ref_picture, pix_op, qpix_op,
3621                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3622                 }
3623             }else{
3624                 /* top field */
3625                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3626                             1, 0, s->field_select[dir][0],
3627                             ref_picture, pix_op,
3628                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3629                 /* bottom field */
3630                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3631                             1, 1, s->field_select[dir][1],
3632                             ref_picture, pix_op,
3633                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3634             }
3635         } else {
3636             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3637                 ref_picture= s->current_picture_ptr->data;
3638             }
3639
3640             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3641                         0, 0, s->field_select[dir][0],
3642                         ref_picture, pix_op,
3643                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3644         }
3645         break;
3646     case MV_TYPE_16X8:
3647         for(i=0; i<2; i++){
3648             uint8_t ** ref2picture;
3649
3650             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3651                 ref2picture= ref_picture;
3652             }else{
3653                 ref2picture= s->current_picture_ptr->data;
3654             }
3655
3656             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3657                         0, 0, s->field_select[dir][i],
3658                         ref2picture, pix_op,
3659                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3660
3661             dest_y += 16*s->linesize;
3662             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3663             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3664         }
3665         break;
3666     case MV_TYPE_DMV:
3667         if(s->picture_structure == PICT_FRAME){
3668             for(i=0; i<2; i++){
3669                 int j;
3670                 for(j=0; j<2; j++){
3671                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3672                                 1, j, j^i,
3673                                 ref_picture, pix_op,
3674                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3675                 }
3676                 pix_op = s->dsp.avg_pixels_tab;
3677             }
3678         }else{
3679             for(i=0; i<2; i++){
3680                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3681                             0, 0, s->picture_structure != i+1,
3682                             ref_picture, pix_op,
3683                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3684
3685                 // after put we make avg of the same block
3686                 pix_op=s->dsp.avg_pixels_tab;
3687
3688                 //opposite parity is always in the same frame if this is second field
3689                 if(!s->first_field){
3690                     ref_picture = s->current_picture_ptr->data;
3691                 }
3692             }
3693         }
3694     break;
3695     default: assert(0);
3696     }
3697 }
3698
3699 /**
3700  * motion compensation of a single macroblock
3701  * @param s context
3702  * @param dest_y luma destination pointer
3703  * @param dest_cb chroma cb/u destination pointer
3704  * @param dest_cr chroma cr/v destination pointer
3705  * @param dir direction (0->forward, 1->backward)
3706  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3707  * @param pic_op halfpel motion compensation function (average or put normally)
3708  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3709  */
3710 static inline void MPV_motion_lowres(MpegEncContext *s,
3711                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3712                               int dir, uint8_t **ref_picture,
3713                               h264_chroma_mc_func *pix_op)
3714 {
3715     int mx, my;
3716     int mb_x, mb_y, i;
3717     const int lowres= s->avctx->lowres;
3718     const int block_s= 8>>lowres;
3719
3720     mb_x = s->mb_x;
3721     mb_y = s->mb_y;
3722
3723     switch(s->mv_type) {
3724     case MV_TYPE_16X16:
3725         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3726                     0, 0, 0,
3727                     ref_picture, pix_op,
3728                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3729         break;
3730     case MV_TYPE_8X8:
3731         mx = 0;
3732         my = 0;
3733             for(i=0;i<4;i++) {
3734                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3735                             ref_picture[0], 0, 0,
3736                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3737                             s->width, s->height, s->linesize,
3738                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3739                             block_s, block_s, pix_op,
3740                             s->mv[dir][i][0], s->mv[dir][i][1]);
3741
3742                 mx += s->mv[dir][i][0];
3743                 my += s->mv[dir][i][1];
3744             }
3745
3746         if(!(s->flags&CODEC_FLAG_GRAY))
3747             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3748         break;
3749     case MV_TYPE_FIELD:
3750         if (s->picture_structure == PICT_FRAME) {
3751             /* top field */
3752             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3753                         1, 0, s->field_select[dir][0],
3754                         ref_picture, pix_op,
3755                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3756             /* bottom field */
3757             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3758                         1, 1, s->field_select[dir][1],
3759                         ref_picture, pix_op,
3760                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3761         } else {
3762             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3763                 ref_picture= s->current_picture_ptr->data;
3764             }
3765
3766             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3767                         0, 0, s->field_select[dir][0],
3768                         ref_picture, pix_op,
3769                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3770         }
3771         break;
3772     case MV_TYPE_16X8:
3773         for(i=0; i<2; i++){
3774             uint8_t ** ref2picture;
3775
3776             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3777                 ref2picture= ref_picture;
3778             }else{
3779                 ref2picture= s->current_picture_ptr->data;
3780             }
3781
3782             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3783                         0, 0, s->field_select[dir][i],
3784                         ref2picture, pix_op,
3785                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3786
3787             dest_y += 2*block_s*s->linesize;
3788             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3789             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3790         }
3791         break;
3792     case MV_TYPE_DMV:
3793         if(s->picture_structure == PICT_FRAME){
3794             for(i=0; i<2; i++){
3795                 int j;
3796                 for(j=0; j<2; j++){
3797                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3798                                 1, j, j^i,
3799                                 ref_picture, pix_op,
3800                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3801                 }
3802                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3803             }
3804         }else{
3805             for(i=0; i<2; i++){
3806                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3807                             0, 0, s->picture_structure != i+1,
3808                             ref_picture, pix_op,
3809                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3810
3811                 // after put we make avg of the same block
3812                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3813
3814                 //opposite parity is always in the same frame if this is second field
3815                 if(!s->first_field){
3816                     ref_picture = s->current_picture_ptr->data;
3817                 }
3818             }
3819         }
3820     break;
3821     default: assert(0);
3822     }
3823 }
3824
3825 /* put block[] to dest[] */
3826 static inline void put_dct(MpegEncContext *s,
3827                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3828 {
3829     s->dct_unquantize_intra(s, block, i, qscale);
3830     s->dsp.idct_put (dest, line_size, block);
3831 }
3832
3833 /* add block[] to dest[] */
3834 static inline void add_dct(MpegEncContext *s,
3835                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3836 {
3837     if (s->block_last_index[i] >= 0) {
3838         s->dsp.idct_add (dest, line_size, block);
3839     }
3840 }
3841
3842 static inline void add_dequant_dct(MpegEncContext *s,
3843                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3844 {
3845     if (s->block_last_index[i] >= 0) {
3846         s->dct_unquantize_inter(s, block, i, qscale);
3847
3848         s->dsp.idct_add (dest, line_size, block);
3849     }
3850 }
3851
3852 /**
3853  * cleans dc, ac, coded_block for the current non intra MB
3854  */
3855 void ff_clean_intra_table_entries(MpegEncContext *s)
3856 {
3857     int wrap = s->b8_stride;
3858     int xy = s->block_index[0];
3859
3860     s->dc_val[0][xy           ] =
3861     s->dc_val[0][xy + 1       ] =
3862     s->dc_val[0][xy     + wrap] =
3863     s->dc_val[0][xy + 1 + wrap] = 1024;
3864     /* ac pred */
3865     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3866     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3867     if (s->msmpeg4_version>=3) {
3868         s->coded_block[xy           ] =
3869         s->coded_block[xy + 1       ] =
3870         s->coded_block[xy     + wrap] =
3871         s->coded_block[xy + 1 + wrap] = 0;
3872     }
3873     /* chroma */
3874     wrap = s->mb_stride;
3875     xy = s->mb_x + s->mb_y * wrap;
3876     s->dc_val[1][xy] =
3877     s->dc_val[2][xy] = 1024;
3878     /* ac pred */
3879     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3880     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3881
3882     s->mbintra_table[xy]= 0;
3883 }
3884
3885 /* generic function called after a macroblock has been parsed by the
3886    decoder or after it has been encoded by the encoder.
3887
3888    Important variables used:
3889    s->mb_intra : true if intra macroblock
3890    s->mv_dir   : motion vector direction
3891    s->mv_type  : motion vector type
3892    s->mv       : motion vector
3893    s->interlaced_dct : true if interlaced dct used (mpeg2)
3894  */
3895 static av_always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3896 {
3897     int mb_x, mb_y;
3898     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3899 #ifdef HAVE_XVMC
3900     if(s->avctx->xvmc_acceleration){
3901         XVMC_decode_mb(s);//xvmc uses pblocks
3902         return;
3903     }
3904 #endif
3905
3906     mb_x = s->mb_x;
3907     mb_y = s->mb_y;
3908
3909     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3910        /* save DCT coefficients */
3911        int i,j;
3912        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3913        for(i=0; i<6; i++)
3914            for(j=0; j<64; j++)
3915                *dct++ = block[i][s->dsp.idct_permutation[j]];
3916     }
3917
3918     s->current_picture.qscale_table[mb_xy]= s->qscale;
3919
3920     /* update DC predictors for P macroblocks */
3921     if (!s->mb_intra) {
3922         if (s->h263_pred || s->h263_aic) {
3923             if(s->mbintra_table[mb_xy])
3924                 ff_clean_intra_table_entries(s);
3925         } else {
3926             s->last_dc[0] =
3927             s->last_dc[1] =
3928             s->last_dc[2] = 128 << s->intra_dc_precision;
3929         }
3930     }
3931     else if (s->h263_pred || s->h263_aic)
3932         s->mbintra_table[mb_xy]=1;
3933
3934     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
3935         uint8_t *dest_y, *dest_cb, *dest_cr;
3936         int dct_linesize, dct_offset;
3937         op_pixels_func (*op_pix)[4];
3938         qpel_mc_func (*op_qpix)[16];
3939         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3940         const int uvlinesize= s->current_picture.linesize[1];
3941         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3942         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3943
3944         /* avoid copy if macroblock skipped in last frame too */
3945         /* skip only during decoding as we might trash the buffers during encoding a bit */
3946         if(!s->encoding){
3947             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3948             const int age= s->current_picture.age;
3949
3950             assert(age);
3951
3952             if (s->mb_skipped) {
3953                 s->mb_skipped= 0;
3954                 assert(s->pict_type!=I_TYPE);
3955
3956                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3957                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3958
3959                 /* if previous was skipped too, then nothing to do !  */
3960                 if (*mbskip_ptr >= age && s->current_picture.reference){
3961                     return;
3962                 }
3963             } else if(!s->current_picture.reference){
3964                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3965                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3966             } else{
3967                 *mbskip_ptr = 0; /* not skipped */
3968             }
3969         }
3970
3971         dct_linesize = linesize << s->interlaced_dct;
3972         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3973
3974         if(readable){
3975             dest_y=  s->dest[0];
3976             dest_cb= s->dest[1];
3977             dest_cr= s->dest[2];
3978         }else{
3979             dest_y = s->b_scratchpad;
3980             dest_cb= s->b_scratchpad+16*linesize;
3981             dest_cr= s->b_scratchpad+32*linesize;
3982         }
3983
3984         if (!s->mb_intra) {
3985             /* motion handling */
3986             /* decoding or more than one mb_type (MC was already done otherwise) */
3987             if(!s->encoding){
3988                 if(lowres_flag){
3989                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3990
3991                     if (s->mv_dir & MV_DIR_FORWARD) {
3992                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3993                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3994                     }
3995                     if (s->mv_dir & MV_DIR_BACKWARD) {
3996                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3997                     }
3998                 }else{
3999                     op_qpix= s->me.qpel_put;
4000                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
4001                         op_pix = s->dsp.put_pixels_tab;
4002                     }else{
4003                         op_pix = s->dsp.put_no_rnd_pixels_tab;
4004                     }
4005                     if (s->mv_dir & MV_DIR_FORWARD) {
4006                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4007                         op_pix = s->dsp.avg_pixels_tab;
4008                         op_qpix= s->me.qpel_avg;
4009                     }
4010                     if (s->mv_dir & MV_DIR_BACKWARD) {
4011                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4012                     }
4013                 }
4014             }
4015
4016             /* skip dequant / idct if we are really late ;) */
4017             if(s->hurry_up>1) goto skip_idct;
4018             if(s->avctx->skip_idct){
4019                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
4020                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
4021                    || s->avctx->skip_idct >= AVDISCARD_ALL)
4022                     goto skip_idct;
4023             }
4024
4025             /* add dct residue */
4026             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
4027                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
4028                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4029                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4030                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4031                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4032
4033                 if(!(s->flags&CODEC_FLAG_GRAY)){
4034                     if (s->chroma_y_shift){
4035                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4036                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4037                     }else{
4038                         dct_linesize >>= 1;
4039                         dct_offset >>=1;
4040                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4041                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4042                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4043                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4044                     }
4045                 }
4046             } else if(s->codec_id != CODEC_ID_WMV2){
4047                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
4048                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
4049                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
4050                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
4051
4052                 if(!(s->flags&CODEC_FLAG_GRAY)){
4053                     if(s->chroma_y_shift){//Chroma420
4054                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
4055                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
4056                     }else{
4057                         //chroma422
4058                         dct_linesize = uvlinesize << s->interlaced_dct;
4059                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4060
4061                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
4062                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
4063                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
4064                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
4065                         if(!s->chroma_x_shift){//Chroma444
4066                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
4067                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
4068                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
4069                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
4070                         }
4071                     }
4072                 }//fi gray
4073             }
4074             else if (ENABLE_WMV2) {
4075                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
4076             }
4077         } else {
4078             /* dct only in intra block */
4079             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
4080                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4081                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4082                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4083                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4084
4085                 if(!(s->flags&CODEC_FLAG_GRAY)){
4086                     if(s->chroma_y_shift){
4087                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4088                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4089                     }else{
4090                         dct_offset >>=1;
4091                         dct_linesize >>=1;
4092                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4093                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4094                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4095                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4096                     }
4097                 }
4098             }else{
4099                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
4100                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
4101                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
4102                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
4103
4104                 if(!(s->flags&CODEC_FLAG_GRAY)){
4105                     if(s->chroma_y_shift){
4106                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
4107                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
4108                     }else{
4109
4110                         dct_linesize = uvlinesize << s->interlaced_dct;
4111                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4112
4113                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
4114                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
4115                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
4116                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
4117                         if(!s->chroma_x_shift){//Chroma444
4118                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
4119                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
4120                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
4121                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
4122                         }
4123                     }
4124                 }//gray
4125             }
4126         }
4127 skip_idct:
4128         if(!readable){
4129             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
4130             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
4131             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
4132         }
4133     }
4134 }
4135
4136 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
4137     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
4138     else                  MPV_decode_mb_internal(s, block, 0);
4139 }
4140
4141 #ifdef CONFIG_ENCODERS
4142
4143 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
4144 {
4145     static const char tab[64]=
4146         {3,2,2,1,1,1,1,1,
4147          1,1,1,1,1,1,1,1,
4148          1,1,1,1,1,1,1,1,
4149          0,0,0,0,0,0,0,0,
4150          0,0,0,0,0,0,0,0,
4151          0,0,0,0,0,0,0,0,
4152          0,0,0,0,0,0,0,0,
4153          0,0,0,0,0,0,0,0};
4154     int score=0;
4155     int run=0;
4156     int i;
4157     DCTELEM *block= s->block[n];
4158     const int last_index= s->block_last_index[n];
4159     int skip_dc;
4160
4161     if(threshold<0){
4162         skip_dc=0;
4163         threshold= -threshold;
4164     }else
4165         skip_dc=1;
4166
4167     /* are all which we could set to zero are allready zero? */
4168     if(last_index<=skip_dc - 1) return;
4169
4170     for(i=0; i<=last_index; i++){
4171         const int j = s->intra_scantable.permutated[i];
4172         const int level = FFABS(block[j]);
4173         if(level==1){
4174             if(skip_dc && i==0) continue;
4175             score+= tab[run];
4176             run=0;
4177         }else if(level>1){
4178             return;
4179         }else{
4180             run++;
4181         }
4182     }
4183     if(score >= threshold) return;
4184     for(i=skip_dc; i<=last_index; i++){
4185         const int j = s->intra_scantable.permutated[i];
4186         block[j]=0;
4187     }
4188     if(block[0]) s->block_last_index[n]= 0;
4189     else         s->block_last_index[n]= -1;
4190 }
4191
4192 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4193 {
4194     int i;
4195     const int maxlevel= s->max_qcoeff;
4196     const int minlevel= s->min_qcoeff;
4197     int overflow=0;
4198
4199     if(s->mb_intra){
4200         i=1; //skip clipping of intra dc
4201     }else
4202         i=0;
4203
4204     for(;i<=last_index; i++){
4205         const int j= s->intra_scantable.permutated[i];
4206         int level = block[j];
4207
4208         if     (level>maxlevel){
4209             level=maxlevel;
4210             overflow++;
4211         }else if(level<minlevel){
4212             level=minlevel;
4213             overflow++;
4214         }
4215
4216         block[j]= level;
4217     }
4218
4219     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4220         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4221 }
4222
4223 #endif //CONFIG_ENCODERS
4224
4225 /**
4226  *
4227  * @param h is the normal height, this will be reduced automatically if needed for the last row
4228  */
4229 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4230     if (s->avctx->draw_horiz_band) {
4231         AVFrame *src;
4232         int offset[4];
4233
4234         if(s->picture_structure != PICT_FRAME){
4235             h <<= 1;
4236             y <<= 1;
4237             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4238         }
4239
4240         h= FFMIN(h, s->avctx->height - y);
4241
4242         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4243             src= (AVFrame*)s->current_picture_ptr;
4244         else if(s->last_picture_ptr)
4245             src= (AVFrame*)s->last_picture_ptr;
4246         else
4247             return;
4248
4249         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4250             offset[0]=
4251             offset[1]=
4252             offset[2]=
4253             offset[3]= 0;
4254         }else{
4255             offset[0]= y * s->linesize;;
4256             offset[1]=
4257             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4258             offset[3]= 0;
4259         }
4260
4261         emms_c();
4262
4263         s->avctx->draw_horiz_band(s->avctx, src, offset,
4264                                   y, s->picture_structure, h);
4265     }
4266 }
4267
4268 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4269     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4270     const int uvlinesize= s->current_picture.linesize[1];
4271     const int mb_size= 4 - s->avctx->lowres;
4272
4273     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4274     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4275     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4276     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4277     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4278     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4279     //block_index is not used by mpeg2, so it is not affected by chroma_format
4280
4281     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4282     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4283     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4284
4285     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4286     {
4287         s->dest[0] += s->mb_y *   linesize << mb_size;
4288         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4289         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4290     }
4291 }
4292
4293 #ifdef CONFIG_ENCODERS
4294
4295 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4296     int x, y;
4297 //FIXME optimize
4298     for(y=0; y<8; y++){
4299         for(x=0; x<8; x++){
4300             int x2, y2;
4301             int sum=0;
4302             int sqr=0;
4303             int count=0;
4304
4305             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4306                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4307                     int v= ptr[x2 + y2*stride];
4308                     sum += v;
4309                     sqr += v*v;
4310                     count++;
4311                 }
4312             }
4313             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4314         }
4315     }
4316 }
4317
4318 static av_always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
4319 {
4320     int16_t weight[8][64];
4321     DCTELEM orig[8][64];
4322     const int mb_x= s->mb_x;
4323     const int mb_y= s->mb_y;
4324     int i;
4325     int skip_dct[8];
4326     int dct_offset   = s->linesize*8; //default for progressive frames
4327     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4328     int wrap_y, wrap_c;
4329
4330     for(i=0; i<mb_block_count; i++) skip_dct[i]=s->skipdct;
4331
4332     if(s->adaptive_quant){
4333         const int last_qp= s->qscale;
4334         const int mb_xy= mb_x + mb_y*s->mb_stride;
4335
4336         s->lambda= s->lambda_table[mb_xy];
4337         update_qscale(s);
4338
4339         if(!(s->flags&CODEC_FLAG_QP_RD)){
4340             s->qscale= s->current_picture_ptr->qscale_table[mb_xy];
4341             s->dquant= s->qscale - last_qp;
4342
4343             if(s->out_format==FMT_H263){
4344                 s->dquant= av_clip(s->dquant, -2, 2);
4345
4346                 if(s->codec_id==CODEC_ID_MPEG4){
4347                     if(!s->mb_intra){
4348                         if(s->pict_type == B_TYPE){
4349                             if(s->dquant&1 || s->mv_dir&MV_DIRECT)
4350                                 s->dquant= 0;
4351                         }
4352                         if(s->mv_type==MV_TYPE_8X8)
4353                             s->dquant=0;
4354                     }
4355                 }
4356             }
4357         }
4358         ff_set_qscale(s, last_qp + s->dquant);
4359     }else if(s->flags&CODEC_FLAG_QP_RD)
4360         ff_set_qscale(s, s->qscale + s->dquant);
4361
4362     wrap_y = s->linesize;
4363     wrap_c = s->uvlinesize;
4364     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4365     ptr_cb = s->new_picture.data[1] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4366     ptr_cr = s->new_picture.data[2] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4367
4368     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4369         uint8_t *ebuf= s->edge_emu_buffer + 32;
4370         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4371         ptr_y= ebuf;
4372         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4373         ptr_cb= ebuf+18*wrap_y;
4374         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4375         ptr_cr= ebuf+18*wrap_y+8;
4376     }
4377
4378     if (s->mb_intra) {
4379         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4380             int progressive_score, interlaced_score;
4381
4382             s->interlaced_dct=0;
4383             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4384                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4385
4386             if(progressive_score > 0){
4387                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4388                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4389                 if(progressive_score > interlaced_score){
4390                     s->interlaced_dct=1;
4391
4392                     dct_offset= wrap_y;
4393                     wrap_y<<=1;
4394                     if (s->chroma_format == CHROMA_422)
4395                         wrap_c<<=1;
4396                 }
4397             }
4398         }
4399
4400         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4401         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4402         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4403         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4404
4405         if(s->flags&CODEC_FLAG_GRAY){
4406             skip_dct[4]= 1;
4407             skip_dct[5]= 1;
4408         }else{
4409             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4410             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4411             if(!s->chroma_y_shift){ /* 422 */
4412                 s->dsp.get_pixels(s->block[6], ptr_cb + (dct_offset>>1), wrap_c);
4413                 s->dsp.get_pixels(s->block[7], ptr_cr + (dct_offset>>1), wrap_c);
4414             }
4415         }
4416     }else{
4417         op_pixels_func (*op_pix)[4];
4418         qpel_mc_func (*op_qpix)[16];
4419         uint8_t *dest_y, *dest_cb, *dest_cr;
4420
4421         dest_y  = s->dest[0];
4422         dest_cb = s->dest[1];
4423         dest_cr = s->dest[2];
4424
4425         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4426             op_pix = s->dsp.put_pixels_tab;
4427             op_qpix= s->dsp.put_qpel_pixels_tab;
4428         }else{
4429             op_pix = s->dsp.put_no_rnd_pixels_tab;
4430             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4431         }
4432
4433         if (s->mv_dir & MV_DIR_FORWARD) {
4434             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4435             op_pix = s->dsp.avg_pixels_tab;
4436             op_qpix= s->dsp.avg_qpel_pixels_tab;
4437         }
4438         if (s->mv_dir & MV_DIR_BACKWARD) {
4439             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4440         }
4441
4442         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4443             int progressive_score, interlaced_score;
4444
4445             s->interlaced_dct=0;
4446             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4447                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4448
4449             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4450
4451             if(progressive_score>0){
4452                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4453                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4454
4455                 if(progressive_score > interlaced_score){
4456                     s->interlaced_dct=1;
4457
4458                     dct_offset= wrap_y;
4459                     wrap_y<<=1;
4460                     if (s->chroma_format == CHROMA_422)
4461                         wrap_c<<=1;
4462                 }
4463             }
4464         }
4465
4466         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4467         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4468         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4469         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4470
4471         if(s->flags&CODEC_FLAG_GRAY){
4472             skip_dct[4]= 1;
4473             skip_dct[5]= 1;
4474         }else{
4475             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4476             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4477             if(!s->chroma_y_shift){ /* 422 */
4478                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset>>1), dest_cb + (dct_offset>>1), wrap_c);
4479                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset>>1), dest_cr + (dct_offset>>1), wrap_c);
4480             }
4481         }
4482         /* pre quantization */
4483         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4484             //FIXME optimize
4485             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4486             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4487             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4488             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4489             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4490             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4491             if(!s->chroma_y_shift){ /* 422 */
4492                 if(s->dsp.sad[1](NULL, ptr_cb +(dct_offset>>1), dest_cb +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[6]= 1;
4493                 if(s->dsp.sad[1](NULL, ptr_cr +(dct_offset>>1), dest_cr +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[7]= 1;
4494             }
4495         }
4496     }
4497
4498     if(s->avctx->quantizer_noise_shaping){
4499         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4500         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4501         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4502         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4503         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4504         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4505         if(!s->chroma_y_shift){ /* 422 */
4506             if(!skip_dct[6]) get_vissual_weight(weight[6], ptr_cb + (dct_offset>>1), wrap_c);
4507             if(!skip_dct[7]) get_vissual_weight(weight[7], ptr_cr + (dct_offset>>1), wrap_c);
4508         }
4509         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*mb_block_count);
4510     }
4511
4512     /* DCT & quantize */
4513     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4514     {
4515         for(i=0;i<mb_block_count;i++) {
4516             if(!skip_dct[i]){
4517                 int overflow;
4518                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4519             // FIXME we could decide to change to quantizer instead of clipping
4520             // JS: I don't think that would be a good idea it could lower quality instead
4521             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4522                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4523             }else
4524                 s->block_last_index[i]= -1;
4525         }
4526         if(s->avctx->quantizer_noise_shaping){
4527             for(i=0;i<mb_block_count;i++) {
4528                 if(!skip_dct[i]){
4529                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4530                 }
4531             }
4532         }
4533
4534         if(s->luma_elim_threshold && !s->mb_intra)
4535             for(i=0; i<4; i++)
4536                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4537         if(s->chroma_elim_threshold && !s->mb_intra)
4538             for(i=4; i<mb_block_count; i++)
4539                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4540
4541         if(s->flags & CODEC_FLAG_CBP_RD){
4542             for(i=0;i<mb_block_count;i++) {
4543                 if(s->block_last_index[i] == -1)
4544                     s->coded_score[i]= INT_MAX/256;
4545             }
4546         }
4547     }
4548
4549     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4550         s->block_last_index[4]=
4551         s->block_last_index[5]= 0;
4552         s->block[4][0]=
4553         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4554     }
4555
4556     //non c quantize code returns incorrect block_last_index FIXME
4557     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4558         for(i=0; i<mb_block_count; i++){
4559             int j;
4560             if(s->block_last_index[i]>0){
4561                 for(j=63; j>0; j--){
4562                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4563                 }
4564                 s->block_last_index[i]= j;
4565             }
4566         }
4567     }
4568
4569     /* huffman encode */
4570     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4571     case CODEC_ID_MPEG1VIDEO:
4572     case CODEC_ID_MPEG2VIDEO:
4573         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4574     case CODEC_ID_MPEG4:
4575         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4576     case CODEC_ID_MSMPEG4V2:
4577     case CODEC_ID_MSMPEG4V3:
4578     case CODEC_ID_WMV1:
4579         if (ENABLE_MSMPEG4_ENCODER)
4580             msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
4581         break;
4582     case CODEC_ID_WMV2:
4583         if (ENABLE_WMV2_ENCODER)
4584             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
4585         break;
4586     case CODEC_ID_H261:
4587         if (ENABLE_H261_ENCODER)
4588             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
4589         break;
4590     case CODEC_ID_H263:
4591     case CODEC_ID_H263P:
4592     case CODEC_ID_FLV1:
4593     case CODEC_ID_RV10:
4594     case CODEC_ID_RV20:
4595         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4596     case CODEC_ID_MJPEG:
4597         if (ENABLE_MJPEG_ENCODER)
4598             ff_mjpeg_encode_mb(s, s->block);
4599         break;
4600     default:
4601         assert(0);
4602     }
4603 }
4604
4605 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4606 {
4607     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
4608     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
4609 }
4610
4611 #endif //CONFIG_ENCODERS
4612
4613 void ff_mpeg_flush(AVCodecContext *avctx){
4614     int i;
4615     MpegEncContext *s = avctx->priv_data;
4616
4617     if(s==NULL || s->picture==NULL)
4618         return;
4619
4620     for(i=0; i<MAX_PICTURE_COUNT; i++){
4621        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4622                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4623         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4624     }
4625     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4626
4627     s->mb_x= s->mb_y= 0;
4628
4629     s->parse_context.state= -1;
4630     s->parse_context.frame_start_found= 0;
4631     s->parse_context.overread= 0;
4632     s->parse_context.overread_index= 0;
4633     s->parse_context.index= 0;
4634     s->parse_context.last_index= 0;
4635     s->bitstream_buffer_size=0;
4636     s->pp_time=0;
4637 }
4638
4639 #ifdef CONFIG_ENCODERS
4640 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4641 {
4642     const uint16_t *srcw= (uint16_t*)src;
4643     int words= length>>4;
4644     int bits= length&15;
4645     int i;
4646
4647     if(length==0) return;
4648
4649     if(words < 16){
4650         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4651     }else if(put_bits_count(pb)&7){
4652         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4653     }else{
4654         for(i=0; put_bits_count(pb)&31; i++)
4655             put_bits(pb, 8, src[i]);
4656         flush_put_bits(pb);
4657         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4658         skip_put_bytes(pb, 2*words-i);
4659     }
4660
4661     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4662 }
4663
4664 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4665     int i;
4666
4667     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4668
4669     /* mpeg1 */
4670     d->mb_skip_run= s->mb_skip_run;
4671     for(i=0; i<3; i++)
4672         d->last_dc[i]= s->last_dc[i];
4673
4674     /* statistics */
4675     d->mv_bits= s->mv_bits;
4676     d->i_tex_bits= s->i_tex_bits;
4677     d->p_tex_bits= s->p_tex_bits;
4678     d->i_count= s->i_count;
4679     d->f_count= s->f_count;
4680     d->b_count= s->b_count;
4681     d->skip_count= s->skip_count;
4682     d->misc_bits= s->misc_bits;
4683     d->last_bits= 0;
4684
4685     d->mb_skipped= 0;
4686     d->qscale= s->qscale;
4687     d->dquant= s->dquant;
4688 }
4689
4690 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4691     int i;
4692
4693     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4694     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4695
4696     /* mpeg1 */
4697     d->mb_skip_run= s->mb_skip_run;
4698     for(i=0; i<3; i++)
4699         d->last_dc[i]= s->last_dc[i];
4700
4701     /* statistics */
4702     d->mv_bits= s->mv_bits;
4703     d->i_tex_bits= s->i_tex_bits;
4704     d->p_tex_bits= s->p_tex_bits;
4705     d->i_count= s->i_count;
4706     d->f_count= s->f_count;
4707     d->b_count= s->b_count;
4708     d->skip_count= s->skip_count;
4709     d->misc_bits= s->misc_bits;
4710
4711     d->mb_intra= s->mb_intra;
4712     d->mb_skipped= s->mb_skipped;
4713     d->mv_type= s->mv_type;
4714     d->mv_dir= s->mv_dir;
4715     d->pb= s->pb;
4716     if(s->data_partitioning){
4717         d->pb2= s->pb2;
4718         d->tex_pb= s->tex_pb;
4719     }
4720     d->block= s->block;
4721     for(i=0; i<8; i++)
4722         d->block_last_index[i]= s->block_last_index[i];
4723     d->interlaced_dct= s->interlaced_dct;
4724     d->qscale= s->qscale;
4725 }
4726
4727 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4728                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4729                            int *dmin, int *next_block, int motion_x, int motion_y)
4730 {
4731     int score;
4732     uint8_t *dest_backup[3];
4733
4734     copy_context_before_encode(s, backup, type);
4735
4736     s->block= s->blocks[*next_block];
4737     s->pb= pb[*next_block];
4738     if(s->data_partitioning){
4739         s->pb2   = pb2   [*next_block];
4740         s->tex_pb= tex_pb[*next_block];
4741     }
4742
4743     if(*next_block){
4744         memcpy(dest_backup, s->dest, sizeof(s->dest));
4745         s->dest[0] = s->rd_scratchpad;
4746         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4747         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4748         assert(s->linesize >= 32); //FIXME
4749     }
4750
4751     encode_mb(s, motion_x, motion_y);
4752
4753     score= put_bits_count(&s->pb);
4754     if(s->data_partitioning){
4755         score+= put_bits_count(&s->pb2);
4756         score+= put_bits_count(&s->tex_pb);
4757     }
4758
4759     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4760         MPV_decode_mb(s, s->block);
4761
4762         score *= s->lambda2;
4763         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4764     }
4765
4766     if(*next_block){
4767         memcpy(s->dest, dest_backup, sizeof(s->dest));
4768     }
4769
4770     if(score<*dmin){
4771         *dmin= score;
4772         *next_block^=1;
4773
4774         copy_context_after_encode(best, s, type);
4775     }
4776 }
4777
4778 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4779     uint32_t *sq = ff_squareTbl + 256;
4780     int acc=0;
4781     int x,y;
4782
4783     if(w==16 && h==16)
4784         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4785     else if(w==8 && h==8)
4786         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4787
4788     for(y=0; y<h; y++){
4789         for(x=0; x<w; x++){
4790             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4791         }
4792     }
4793
4794     assert(acc>=0);
4795
4796     return acc;
4797 }
4798
4799 static int sse_mb(MpegEncContext *s){
4800     int w= 16;
4801     int h= 16;
4802
4803     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4804     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4805
4806     if(w==16 && h==16)
4807       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4808         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4809                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4810                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4811       }else{
4812         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4813                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4814                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4815       }
4816     else
4817         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4818                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4819                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4820 }
4821
4822 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4823     MpegEncContext *s= arg;
4824
4825
4826     s->me.pre_pass=1;
4827     s->me.dia_size= s->avctx->pre_dia_size;
4828     s->first_slice_line=1;
4829     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4830         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4831             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4832         }
4833         s->first_slice_line=0;
4834     }
4835
4836     s->me.pre_pass=0;
4837
4838     return 0;
4839 }
4840
4841 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4842     MpegEncContext *s= arg;
4843
4844     ff_check_alignment();
4845
4846     s->me.dia_size= s->avctx->dia_size;
4847     s->first_slice_line=1;
4848     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4849         s->mb_x=0; //for block init below
4850         ff_init_block_index(s);
4851         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4852             s->block_index[0]+=2;
4853             s->block_index[1]+=2;
4854             s->block_index[2]+=2;
4855             s->block_index[3]+=2;
4856
4857             /* compute motion vector & mb_type and store in context */
4858             if(s->pict_type==B_TYPE)
4859                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4860             else
4861                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4862         }
4863         s->first_slice_line=0;
4864     }
4865     return 0;
4866 }
4867
4868 static int mb_var_thread(AVCodecContext *c, void *arg){
4869     MpegEncContext *s= arg;
4870     int mb_x, mb_y;
4871
4872     ff_check_alignment();
4873
4874     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4875         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4876             int xx = mb_x * 16;
4877             int yy = mb_y * 16;
4878             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4879             int varc;
4880             int sum = s->dsp.pix_sum(pix, s->linesize);
4881
4882             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4883
4884             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4885             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4886             s->me.mb_var_sum_temp    += varc;
4887         }
4888     }
4889     return 0;
4890 }
4891
4892 static void write_slice_end(MpegEncContext *s){
4893     if(s->codec_id==CODEC_ID_MPEG4){
4894         if(s->partitioned_frame){
4895             ff_mpeg4_merge_partitions(s);
4896         }
4897
4898         ff_mpeg4_stuffing(&s->pb);
4899     }else if(ENABLE_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
4900         ff_mjpeg_encode_stuffing(&s->pb);
4901     }
4902
4903     align_put_bits(&s->pb);
4904     flush_put_bits(&s->pb);
4905
4906     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4907         s->misc_bits+= get_bits_diff(s);
4908 }
4909
4910 static int encode_thread(AVCodecContext *c, void *arg){
4911     MpegEncContext *s= arg;
4912     int mb_x, mb_y, pdif = 0;
4913     int i, j;
4914     MpegEncContext best_s, backup_s;
4915     uint8_t bit_buf[2][MAX_MB_BYTES];
4916     uint8_t bit_buf2[2][MAX_MB_BYTES];
4917     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4918     PutBitContext pb[2], pb2[2], tex_pb[2];
4919 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4920
4921     ff_check_alignment();
4922
4923     for(i=0; i<2; i++){
4924         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4925         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4926         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4927     }
4928
4929     s->last_bits= put_bits_count(&s->pb);
4930     s->mv_bits=0;
4931     s->misc_bits=0;
4932     s->i_tex_bits=0;
4933     s->p_tex_bits=0;
4934     s->i_count=0;
4935     s->f_count=0;
4936     s->b_count=0;
4937     s->skip_count=0;
4938
4939     for(i=0; i<3; i++){
4940         /* init last dc values */
4941         /* note: quant matrix value (8) is implied here */
4942         s->last_dc[i] = 128 << s->intra_dc_precision;
4943
4944         s->current_picture.error[i] = 0;
4945     }
4946     s->mb_skip_run = 0;
4947     memset(s->last_mv, 0, sizeof(s->last_mv));
4948
4949     s->last_mv_dir = 0;
4950
4951     switch(s->codec_id){
4952     case CODEC_ID_H263:
4953     case CODEC_ID_H263P:
4954     case CODEC_ID_FLV1:
4955         s->gob_index = ff_h263_get_gob_height(s);
4956         break;
4957     case CODEC_ID_MPEG4:
4958         if(s->partitioned_frame)
4959             ff_mpeg4_init_partitions(s);
4960         break;
4961     }
4962
4963     s->resync_mb_x=0;
4964     s->resync_mb_y=0;
4965     s->first_slice_line = 1;
4966     s->ptr_lastgob = s->pb.buf;
4967     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4968 //    printf("row %d at %X\n", s->mb_y, (int)s);
4969         s->mb_x=0;
4970         s->mb_y= mb_y;
4971
4972         ff_set_qscale(s, s->qscale);
4973         ff_init_block_index(s);
4974
4975         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4976             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4977             int mb_type= s->mb_type[xy];
4978 //            int d;
4979             int dmin= INT_MAX;
4980             int dir;
4981
4982             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4983                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4984                 return -1;
4985             }
4986             if(s->data_partitioning){
4987                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4988                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4989                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4990                     return -1;
4991                 }
4992             }
4993
4994             s->mb_x = mb_x;
4995             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4996             ff_update_block_index(s);
4997
4998             if(ENABLE_H261_ENCODER && s->codec_id == CODEC_ID_H261){
4999                 ff_h261_reorder_mb_index(s);
5000                 xy= s->mb_y*s->mb_stride + s->mb_x;
5001                 mb_type= s->mb_type[xy];
5002             }
5003
5004             /* write gob / video packet header  */
5005             if(s->rtp_mode){
5006                 int current_packet_size, is_gob_start;
5007
5008                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
5009
5010                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
5011
5012                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
5013
5014                 switch(s->codec_id){
5015                 case CODEC_ID_H263:
5016                 case CODEC_ID_H263P:
5017                     if(!s->h263_slice_structured)
5018                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
5019                     break;
5020                 case CODEC_ID_MPEG2VIDEO:
5021                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
5022                 case CODEC_ID_MPEG1VIDEO:
5023                     if(s->mb_skip_run) is_gob_start=0;
5024                     break;
5025                 }
5026
5027                 if(is_gob_start){
5028                     if(s->start_mb_y != mb_y || mb_x!=0){
5029                         write_slice_end(s);
5030
5031                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
5032                             ff_mpeg4_init_partitions(s);
5033                         }
5034                     }
5035
5036                     assert((put_bits_count(&s->pb)&7) == 0);
5037                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
5038
5039                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
5040                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
5041                         int d= 100 / s->avctx->error_rate;
5042                         if(r % d == 0){
5043                             current_packet_size=0;
5044 #ifndef ALT_BITSTREAM_WRITER
5045                             s->pb.buf_ptr= s->ptr_lastgob;
5046 #endif
5047                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
5048                         }
5049                     }
5050
5051                     if (s->avctx->rtp_callback){
5052                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
5053                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
5054                     }
5055
5056                     switch(s->codec_id){
5057                     case CODEC_ID_MPEG4:
5058                         ff_mpeg4_encode_video_packet_header(s);
5059                         ff_mpeg4_clean_buffers(s);
5060                     break;
5061                     case CODEC_ID_MPEG1VIDEO:
5062                     case CODEC_ID_MPEG2VIDEO:
5063                         ff_mpeg1_encode_slice_header(s);
5064                         ff_mpeg1_clean_buffers(s);
5065                     break;
5066                     case CODEC_ID_H263:
5067                     case CODEC_ID_H263P:
5068                         h263_encode_gob_header(s, mb_y);
5069                     break;
5070                     }
5071
5072                     if(s->flags&CODEC_FLAG_PASS1){
5073                         int bits= put_bits_count(&s->pb);
5074                         s->misc_bits+= bits - s->last_bits;
5075                         s->last_bits= bits;
5076                     }
5077
5078                     s->ptr_lastgob += current_packet_size;
5079                     s->first_slice_line=1;
5080                     s->resync_mb_x=mb_x;
5081                     s->resync_mb_y=mb_y;
5082                 }
5083             }
5084
5085             if(  (s->resync_mb_x   == s->mb_x)
5086                && s->resync_mb_y+1 == s->mb_y){
5087                 s->first_slice_line=0;
5088             }
5089
5090             s->mb_skipped=0;
5091             s->dquant=0; //only for QP_RD
5092
5093             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
5094                 int next_block=0;
5095                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
5096
5097                 copy_context_before_encode(&backup_s, s, -1);
5098                 backup_s.pb= s->pb;
5099                 best_s.data_partitioning= s->data_partitioning;
5100                 best_s.partitioned_frame= s->partitioned_frame;
5101                 if(s->data_partitioning){
5102                     backup_s.pb2= s->pb2;
5103                     backup_s.tex_pb= s->tex_pb;
5104                 }
5105
5106                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
5107                     s->mv_dir = MV_DIR_FORWARD;
5108                     s->mv_type = MV_TYPE_16X16;
5109                     s->mb_intra= 0;
5110                     s->mv[0][0][0] = s->p_mv_table[xy][0];
5111                     s->mv[0][0][1] = s->p_mv_table[xy][1];
5112                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
5113                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5114                 }
5115                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
5116                     s->mv_dir = MV_DIR_FORWARD;
5117                     s->mv_type = MV_TYPE_FIELD;
5118                     s->mb_intra= 0;
5119                     for(i=0; i<2; i++){
5120                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5121                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5122                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5123                     }
5124                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
5125                                  &dmin, &next_block, 0, 0);
5126                 }
5127                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
5128                     s->mv_dir = MV_DIR_FORWARD;
5129                     s->mv_type = MV_TYPE_16X16;
5130                     s->mb_intra= 0;
5131                     s->mv[0][0][0] = 0;
5132                     s->mv[0][0][1] = 0;
5133                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
5134                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5135                 }
5136                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
5137                     s->mv_dir = MV_DIR_FORWARD;
5138                     s->mv_type = MV_TYPE_8X8;
5139                     s->mb_intra= 0;
5140                     for(i=0; i<4; i++){
5141                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5142                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5143                     }
5144                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
5145                                  &dmin, &next_block, 0, 0);
5146                 }
5147                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
5148                     s->mv_dir = MV_DIR_FORWARD;
5149                     s->mv_type = MV_TYPE_16X16;
5150                     s->mb_intra= 0;
5151                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5152                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5153                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
5154                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5155                 }
5156                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
5157                     s->mv_dir = MV_DIR_BACKWARD;
5158                     s->mv_type = MV_TYPE_16X16;
5159                     s->mb_intra= 0;
5160                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5161                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5162                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
5163                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
5164                 }
5165                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
5166                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5167                     s->mv_type = MV_TYPE_16X16;
5168                     s->mb_intra= 0;
5169                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5170                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5171                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5172                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5173                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
5174                                  &dmin, &next_block, 0, 0);
5175                 }
5176                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
5177                     s->mv_dir = MV_DIR_FORWARD;
5178                     s->mv_type = MV_TYPE_FIELD;
5179                     s->mb_intra= 0;
5180                     for(i=0; i<2; i++){
5181                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5182                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5183                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5184                     }
5185                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
5186                                  &dmin, &next_block, 0, 0);
5187                 }
5188                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
5189                     s->mv_dir = MV_DIR_BACKWARD;
5190                     s->mv_type = MV_TYPE_FIELD;
5191                     s->mb_intra= 0;
5192                     for(i=0; i<2; i++){
5193                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5194                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5195                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5196                     }
5197                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
5198                                  &dmin, &next_block, 0, 0);
5199                 }
5200                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
5201                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5202                     s->mv_type = MV_TYPE_FIELD;
5203                     s->mb_intra= 0;
5204                     for(dir=0; dir<2; dir++){
5205                         for(i=0; i<2; i++){
5206                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5207                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5208                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5209                         }
5210                     }
5211                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
5212                                  &dmin, &next_block, 0, 0);
5213                 }
5214                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
5215                     s->mv_dir = 0;
5216                     s->mv_type = MV_TYPE_16X16;
5217                     s->mb_intra= 1;
5218                     s->mv[0][0][0] = 0;
5219                     s->mv[0][0][1] = 0;
5220                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
5221                                  &dmin, &next_block, 0, 0);
5222                     if(s->h263_pred || s->h263_aic){
5223                         if(best_s.mb_intra)
5224                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
5225                         else
5226                             ff_clean_intra_table_entries(s); //old mode?
5227                     }
5228                 }
5229
5230                 if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){
5231                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
5232                         const int last_qp= backup_s.qscale;
5233                         int qpi, qp, dc[6];
5234                         DCTELEM ac[6][16];
5235                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
5236                         static const int dquant_tab[4]={-1,1,-2,2};
5237
5238                         assert(backup_s.dquant == 0);
5239
5240                         //FIXME intra
5241                         s->mv_dir= best_s.mv_dir;
5242                         s->mv_type = MV_TYPE_16X16;
5243                         s->mb_intra= best_s.mb_intra;
5244                         s->mv[0][0][0] = best_s.mv[0][0][0];
5245                         s->mv[0][0][1] = best_s.mv[0][0][1];
5246                         s->mv[1][0][0] = best_s.mv[1][0][0];
5247                         s->mv[1][0][1] = best_s.mv[1][0][1];
5248
5249                         qpi = s->pict_type == B_TYPE ? 2 : 0;
5250                         for(; qpi<4; qpi++){
5251                             int dquant= dquant_tab[qpi];
5252                             qp= last_qp + dquant;
5253                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
5254                                 continue;
5255                             backup_s.dquant= dquant;
5256                             if(s->mb_intra && s->dc_val[0]){
5257                                 for(i=0; i<6; i++){
5258                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
5259                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
5260                                 }
5261                             }
5262
5263                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5264                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
5265                             if(best_s.qscale != qp){
5266                                 if(s->mb_intra && s->dc_val[0]){
5267                                     for(i=0; i<6; i++){
5268                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
5269                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
5270                                     }
5271                                 }
5272                             }
5273                         }
5274                     }
5275                 }
5276                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
5277                     int mx= s->b_direct_mv_table[xy][0];
5278                     int my= s->b_direct_mv_table[xy][1];
5279
5280                     backup_s.dquant = 0;
5281                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5282                     s->mb_intra= 0;
5283                     ff_mpeg4_set_direct_mv(s, mx, my);
5284                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5285                                  &dmin, &next_block, mx, my);
5286                 }
5287                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT0){
5288                     backup_s.dquant = 0;
5289                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5290                     s->mb_intra= 0;
5291                     ff_mpeg4_set_direct_mv(s, 0, 0);
5292                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5293                                  &dmin, &next_block, 0, 0);
5294                 }
5295                 if(!best_s.mb_intra && s->flags2&CODEC_FLAG2_SKIP_RD){
5296                     int coded=0;
5297                     for(i=0; i<6; i++)
5298                         coded |= s->block_last_index[i];
5299                     if(coded){
5300                         int mx,my;
5301                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
5302                         if(best_s.mv_dir & MV_DIRECT){
5303                             mx=my=0; //FIXME find the one we actually used
5304                             ff_mpeg4_set_direct_mv(s, mx, my);
5305                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
5306                             mx= s->mv[1][0][0];
5307                             my= s->mv[1][0][1];
5308                         }else{
5309                             mx= s->mv[0][0][0];
5310                             my= s->mv[0][0][1];
5311                         }
5312
5313                         s->mv_dir= best_s.mv_dir;
5314                         s->mv_type = best_s.mv_type;
5315                         s->mb_intra= 0;
5316 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
5317                         s->mv[0][0][1] = best_s.mv[0][0][1];
5318                         s->mv[1][0][0] = best_s.mv[1][0][0];
5319                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
5320                         backup_s.dquant= 0;
5321                         s->skipdct=1;
5322                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5323                                         &dmin, &next_block, mx, my);
5324                         s->skipdct=0;
5325                     }
5326                 }
5327
5328                 s->current_picture.qscale_table[xy]= best_s.qscale;
5329
5330                 copy_context_after_encode(s, &best_s, -1);
5331
5332                 pb_bits_count= put_bits_count(&s->pb);
5333                 flush_put_bits(&s->pb);
5334                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5335                 s->pb= backup_s.pb;
5336
5337                 if(s->data_partitioning){
5338                     pb2_bits_count= put_bits_count(&s->pb2);
5339                     flush_put_bits(&s->pb2);
5340                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5341                     s->pb2= backup_s.pb2;
5342
5343                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5344                     flush_put_bits(&s->tex_pb);
5345                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5346                     s->tex_pb= backup_s.tex_pb;
5347                 }
5348                 s->last_bits= put_bits_count(&s->pb);
5349
5350                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5351                     ff_h263_update_motion_val(s);
5352
5353                 if(next_block==0){ //FIXME 16 vs linesize16
5354                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5355                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5356                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5357                 }
5358
5359                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5360                     MPV_decode_mb(s, s->block);
5361             } else {
5362                 int motion_x = 0, motion_y = 0;
5363                 s->mv_type=MV_TYPE_16X16;
5364                 // only one MB-Type possible
5365
5366                 switch(mb_type){
5367                 case CANDIDATE_MB_TYPE_INTRA:
5368                     s->mv_dir = 0;
5369                     s->mb_intra= 1;
5370                     motion_x= s->mv[0][0][0] = 0;
5371                     motion_y= s->mv[0][0][1] = 0;
5372                     break;
5373                 case CANDIDATE_MB_TYPE_INTER:
5374                     s->mv_dir = MV_DIR_FORWARD;
5375                     s->mb_intra= 0;
5376                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5377                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5378                     break;
5379                 case CANDIDATE_MB_TYPE_INTER_I:
5380                     s->mv_dir = MV_DIR_FORWARD;
5381                     s->mv_type = MV_TYPE_FIELD;
5382                     s->mb_intra= 0;
5383                     for(i=0; i<2; i++){
5384                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5385                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5386                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5387                     }
5388                     break;
5389                 case CANDIDATE_MB_TYPE_INTER4V:
5390                     s->mv_dir = MV_DIR_FORWARD;
5391                     s->mv_type = MV_TYPE_8X8;
5392                     s->mb_intra= 0;
5393                     for(i=0; i<4; i++){
5394                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5395                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5396                     }
5397                     break;
5398                 case CANDIDATE_MB_TYPE_DIRECT:
5399                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5400                     s->mb_intra= 0;
5401                     motion_x=s->b_direct_mv_table[xy][0];
5402                     motion_y=s->b_direct_mv_table[xy][1];
5403                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5404                     break;
5405                 case CANDIDATE_MB_TYPE_DIRECT0:
5406                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5407                     s->mb_intra= 0;
5408                     ff_mpeg4_set_direct_mv(s, 0, 0);
5409                     break;
5410                 case CANDIDATE_MB_TYPE_BIDIR:
5411                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5412                     s->mb_intra= 0;
5413                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5414                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5415                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5416                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5417                     break;
5418                 case CANDIDATE_MB_TYPE_BACKWARD:
5419                     s->mv_dir = MV_DIR_BACKWARD;
5420                     s->mb_intra= 0;
5421                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5422                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5423                     break;
5424                 case CANDIDATE_MB_TYPE_FORWARD:
5425                     s->mv_dir = MV_DIR_FORWARD;
5426                     s->mb_intra= 0;
5427                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5428                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5429 //                    printf(" %d %d ", motion_x, motion_y);
5430                     break;
5431                 case CANDIDATE_MB_TYPE_FORWARD_I:
5432                     s->mv_dir = MV_DIR_FORWARD;
5433                     s->mv_type = MV_TYPE_FIELD;
5434                     s->mb_intra= 0;
5435                     for(i=0; i<2; i++){
5436                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5437                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5438                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5439                     }
5440                     break;
5441                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5442                     s->mv_dir = MV_DIR_BACKWARD;
5443                     s->mv_type = MV_TYPE_FIELD;
5444                     s->mb_intra= 0;
5445                     for(i=0; i<2; i++){
5446                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5447                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5448                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5449                     }
5450                     break;
5451                 case CANDIDATE_MB_TYPE_BIDIR_I:
5452                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5453                     s->mv_type = MV_TYPE_FIELD;
5454                     s->mb_intra= 0;
5455                     for(dir=0; dir<2; dir++){
5456                         for(i=0; i<2; i++){
5457                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5458                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5459                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5460                         }
5461                     }
5462                     break;
5463                 default:
5464                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5465                 }
5466
5467                 encode_mb(s, motion_x, motion_y);
5468
5469                 // RAL: Update last macroblock type
5470                 s->last_mv_dir = s->mv_dir;
5471
5472                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5473                     ff_h263_update_motion_val(s);
5474
5475                 MPV_decode_mb(s, s->block);
5476             }
5477
5478             /* clean the MV table in IPS frames for direct mode in B frames */
5479             if(s->mb_intra /* && I,P,S_TYPE */){
5480                 s->p_mv_table[xy][0]=0;
5481                 s->p_mv_table[xy][1]=0;
5482             }
5483
5484             if(s->flags&CODEC_FLAG_PSNR){
5485                 int w= 16;
5486                 int h= 16;
5487
5488                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5489                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5490
5491                 s->current_picture.error[0] += sse(
5492                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5493                     s->dest[0], w, h, s->linesize);
5494                 s->current_picture.error[1] += sse(
5495                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5496                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5497                 s->current_picture.error[2] += sse(
5498                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5499                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5500             }
5501             if(s->loop_filter){
5502                 if(s->out_format == FMT_H263)
5503                     ff_h263_loop_filter(s);
5504             }
5505 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5506         }
5507     }
5508
5509     //not beautiful here but we must write it before flushing so it has to be here
5510     if (ENABLE_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5511         msmpeg4_encode_ext_header(s);
5512
5513     write_slice_end(s);
5514
5515     /* Send the last GOB if RTP */
5516     if (s->avctx->rtp_callback) {
5517         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5518         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5519         /* Call the RTP callback to send the last GOB */
5520         emms_c();
5521         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5522     }
5523
5524     return 0;
5525 }
5526
5527 #define MERGE(field) dst->field += src->field; src->field=0
5528 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5529     MERGE(me.scene_change_score);
5530     MERGE(me.mc_mb_var_sum_temp);
5531     MERGE(me.mb_var_sum_temp);
5532 }
5533
5534 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5535     int i;
5536
5537     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5538     MERGE(dct_count[1]);
5539     MERGE(mv_bits);
5540     MERGE(i_tex_bits);
5541     MERGE(p_tex_bits);
5542     MERGE(i_count);
5543     MERGE(f_count);
5544     MERGE(b_count);
5545     MERGE(skip_count);
5546     MERGE(misc_bits);
5547     MERGE(error_count);
5548     MERGE(padding_bug_score);
5549     MERGE(current_picture.error[0]);
5550     MERGE(current_picture.error[1]);
5551     MERGE(current_picture.error[2]);
5552
5553     if(dst->avctx->noise_reduction){
5554         for(i=0; i<64; i++){
5555             MERGE(dct_error_sum[0][i]);
5556             MERGE(dct_error_sum[1][i]);
5557         }
5558     }
5559
5560     assert(put_bits_count(&src->pb) % 8 ==0);
5561     assert(put_bits_count(&dst->pb) % 8 ==0);
5562     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5563     flush_put_bits(&dst->pb);
5564 }
5565
5566 static int estimate_qp(MpegEncContext *s, int dry_run){
5567     if (s->next_lambda){
5568         s->current_picture_ptr->quality=
5569         s->current_picture.quality = s->next_lambda;
5570         if(!dry_run) s->next_lambda= 0;
5571     } else if (!s->fixed_qscale) {
5572         s->current_picture_ptr->quality=
5573         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
5574         if (s->current_picture.quality < 0)
5575             return -1;
5576     }
5577
5578     if(s->adaptive_quant){
5579         switch(s->codec_id){
5580         case CODEC_ID_MPEG4:
5581             ff_clean_mpeg4_qscales(s);
5582             break;
5583         case CODEC_ID_H263:
5584         case CODEC_ID_H263P:
5585         case CODEC_ID_FLV1:
5586             ff_clean_h263_qscales(s);
5587             break;
5588         }
5589
5590         s->lambda= s->lambda_table[0];
5591         //FIXME broken
5592     }else
5593         s->lambda= s->current_picture.quality;
5594 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5595     update_qscale(s);
5596     return 0;
5597 }
5598
5599 static int encode_picture(MpegEncContext *s, int picture_number)
5600 {
5601     int i;
5602     int bits;
5603
5604     s->picture_number = picture_number;
5605
5606     /* Reset the average MB variance */
5607     s->me.mb_var_sum_temp    =
5608     s->me.mc_mb_var_sum_temp = 0;
5609
5610     /* we need to initialize some time vars before we can encode b-frames */
5611     // RAL: Condition added for MPEG1VIDEO
5612     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5613         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5614
5615     s->me.scene_change_score=0;
5616
5617 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5618
5619     if(s->pict_type==I_TYPE){
5620         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5621         else                        s->no_rounding=0;
5622     }else if(s->pict_type!=B_TYPE){
5623         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5624             s->no_rounding ^= 1;
5625     }
5626
5627     if(s->flags & CODEC_FLAG_PASS2){
5628         if (estimate_qp(s,1) < 0)
5629             return -1;
5630         ff_get_2pass_fcode(s);
5631     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
5632         if(s->pict_type==B_TYPE)
5633             s->lambda= s->last_lambda_for[s->pict_type];
5634         else
5635             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
5636         update_qscale(s);
5637     }
5638
5639     s->mb_intra=0; //for the rate distortion & bit compare functions
5640     for(i=1; i<s->avctx->thread_count; i++){
5641         ff_update_duplicate_context(s->thread_context[i], s);
5642     }
5643
5644     ff_init_me(s);
5645
5646     /* Estimate motion for every MB */
5647     if(s->pict_type != I_TYPE){
5648         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5649         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
5650         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5651             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5652                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5653             }
5654         }
5655
5656         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5657     }else /* if(s->pict_type == I_TYPE) */{
5658         /* I-Frame */
5659         for(i=0; i<s->mb_stride*s->mb_height; i++)
5660             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5661
5662         if(!s->fixed_qscale){
5663             /* finding spatial complexity for I-frame rate control */
5664             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5665         }
5666     }
5667     for(i=1; i<s->avctx->thread_count; i++){
5668         merge_context_after_me(s, s->thread_context[i]);
5669     }
5670     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5671     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5672     emms_c();
5673
5674     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5675         s->pict_type= I_TYPE;
5676         for(i=0; i<s->mb_stride*s->mb_height; i++)
5677             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5678 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5679     }
5680
5681     if(!s->umvplus){
5682         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5683             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5684
5685             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5686                 int a,b;
5687                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5688                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5689                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5690             }
5691
5692             ff_fix_long_p_mvs(s);
5693             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5694             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5695                 int j;
5696                 for(i=0; i<2; i++){
5697                     for(j=0; j<2; j++)
5698                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5699                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5700                 }
5701             }
5702         }
5703
5704         if(s->pict_type==B_TYPE){
5705             int a, b;
5706
5707             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5708             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5709             s->f_code = FFMAX(a, b);
5710
5711             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5712             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5713             s->b_code = FFMAX(a, b);
5714
5715             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5716             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5717             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5718             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5719             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5720                 int dir, j;
5721                 for(dir=0; dir<2; dir++){
5722                     for(i=0; i<2; i++){
5723                         for(j=0; j<2; j++){
5724                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5725                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5726                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5727                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5728                         }
5729                     }
5730                 }
5731             }
5732         }
5733     }
5734
5735     if (estimate_qp(s, 0) < 0)
5736         return -1;
5737
5738     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5739         s->qscale= 3; //reduce clipping problems
5740
5741     if (s->out_format == FMT_MJPEG) {
5742         /* for mjpeg, we do include qscale in the matrix */
5743         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5744         for(i=1;i<64;i++){
5745             int j= s->dsp.idct_permutation[i];
5746
5747             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
5748         }
5749         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5750                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5751         s->qscale= 8;
5752     }
5753
5754     //FIXME var duplication
5755     s->current_picture_ptr->key_frame=
5756     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5757     s->current_picture_ptr->pict_type=
5758     s->current_picture.pict_type= s->pict_type;
5759
5760     if(s->current_picture.key_frame)
5761         s->picture_in_gop_number=0;
5762
5763     s->last_bits= put_bits_count(&s->pb);
5764     switch(s->out_format) {
5765     case FMT_MJPEG:
5766         if (ENABLE_MJPEG_ENCODER)
5767             ff_mjpeg_encode_picture_header(s);
5768         break;
5769     case FMT_H261:
5770         if (ENABLE_H261_ENCODER)
5771             ff_h261_encode_picture_header(s, picture_number);
5772         break;
5773     case FMT_H263:
5774         if (ENABLE_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
5775             ff_wmv2_encode_picture_header(s, picture_number);
5776         else if (ENABLE_MSMPEG4_ENCODER && s->h263_msmpeg4)
5777             msmpeg4_encode_picture_header(s, picture_number);
5778         else if (s->h263_pred)
5779             mpeg4_encode_picture_header(s, picture_number);
5780         else if (ENABLE_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
5781             rv10_encode_picture_header(s, picture_number);
5782         else if (ENABLE_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
5783             rv20_encode_picture_header(s, picture_number);
5784         else if (s->codec_id == CODEC_ID_FLV1)
5785             ff_flv_encode_picture_header(s, picture_number);
5786         else
5787             h263_encode_picture_header(s, picture_number);
5788         break;
5789     case FMT_MPEG1:
5790         mpeg1_encode_picture_header(s, picture_number);
5791         break;
5792     case FMT_H264:
5793         break;
5794     default:
5795         assert(0);
5796     }
5797     bits= put_bits_count(&s->pb);
5798     s->header_bits= bits - s->last_bits;
5799
5800     for(i=1; i<s->avctx->thread_count; i++){
5801         update_duplicate_context_after_me(s->thread_context[i], s);
5802     }
5803     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5804     for(i=1; i<s->avctx->thread_count; i++){
5805         merge_context_after_encode(s, s->thread_context[i]);
5806     }
5807     emms_c();
5808     return 0;
5809 }
5810
5811 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5812     const int intra= s->mb_intra;
5813     int i;
5814
5815     s->dct_count[intra]++;
5816
5817     for(i=0; i<64; i++){
5818         int level= block[i];
5819
5820         if(level){
5821             if(level>0){
5822                 s->dct_error_sum[intra][i] += level;
5823                 level -= s->dct_offset[intra][i];
5824                 if(level<0) level=0;
5825             }else{
5826                 s->dct_error_sum[intra][i] -= level;
5827                 level += s->dct_offset[intra][i];
5828                 if(level>0) level=0;
5829             }
5830             block[i]= level;
5831         }
5832     }
5833 }
5834
5835 static int dct_quantize_trellis_c(MpegEncContext *s,
5836                         DCTELEM *block, int n,
5837                         int qscale, int *overflow){
5838     const int *qmat;
5839     const uint8_t *scantable= s->intra_scantable.scantable;
5840     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5841     int max=0;
5842     unsigned int threshold1, threshold2;
5843     int bias=0;
5844     int run_tab[65];
5845     int level_tab[65];
5846     int score_tab[65];
5847     int survivor[65];
5848     int survivor_count;
5849     int last_run=0;
5850     int last_level=0;
5851     int last_score= 0;
5852     int last_i;
5853     int coeff[2][64];
5854     int coeff_count[64];
5855     int qmul, qadd, start_i, last_non_zero, i, dc;
5856     const int esc_length= s->ac_esc_length;
5857     uint8_t * length;
5858     uint8_t * last_length;
5859     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5860
5861     s->dsp.fdct (block);
5862
5863     if(s->dct_error_sum)
5864         s->denoise_dct(s, block);
5865     qmul= qscale*16;
5866     qadd= ((qscale-1)|1)*8;
5867
5868     if (s->mb_intra) {
5869         int q;
5870         if (!s->h263_aic) {
5871             if (n < 4)
5872                 q = s->y_dc_scale;
5873             else
5874                 q = s->c_dc_scale;
5875             q = q << 3;
5876         } else{
5877             /* For AIC we skip quant/dequant of INTRADC */
5878             q = 1 << 3;
5879             qadd=0;
5880         }
5881
5882         /* note: block[0] is assumed to be positive */
5883         block[0] = (block[0] + (q >> 1)) / q;
5884         start_i = 1;
5885         last_non_zero = 0;
5886         qmat = s->q_intra_matrix[qscale];
5887         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5888             bias= 1<<(QMAT_SHIFT-1);
5889         length     = s->intra_ac_vlc_length;
5890         last_length= s->intra_ac_vlc_last_length;
5891     } else {
5892         start_i = 0;
5893         last_non_zero = -1;
5894         qmat = s->q_inter_matrix[qscale];
5895         length     = s->inter_ac_vlc_length;
5896         last_length= s->inter_ac_vlc_last_length;
5897     }
5898     last_i= start_i;
5899
5900     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5901     threshold2= (threshold1<<1);
5902
5903     for(i=63; i>=start_i; i--) {
5904         const int j = scantable[i];
5905         int level = block[j] * qmat[j];
5906
5907         if(((unsigned)(level+threshold1))>threshold2){
5908             last_non_zero = i;
5909             break;
5910         }
5911     }
5912
5913     for(i=start_i; i<=last_non_zero; i++) {
5914         const int j = scantable[i];
5915         int level = block[j] * qmat[j];
5916
5917 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5918 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5919         if(((unsigned)(level+threshold1))>threshold2){
5920             if(level>0){
5921                 level= (bias + level)>>QMAT_SHIFT;
5922                 coeff[0][i]= level;
5923                 coeff[1][i]= level-1;
5924 //                coeff[2][k]= level-2;
5925             }else{
5926                 level= (bias - level)>>QMAT_SHIFT;
5927                 coeff[0][i]= -level;
5928                 coeff[1][i]= -level+1;
5929 //                coeff[2][k]= -level+2;
5930             }
5931             coeff_count[i]= FFMIN(level, 2);
5932             assert(coeff_count[i]);
5933             max |=level;
5934         }else{
5935             coeff[0][i]= (level>>31)|1;
5936             coeff_count[i]= 1;
5937         }
5938     }
5939
5940     *overflow= s->max_qcoeff < max; //overflow might have happened
5941
5942     if(last_non_zero < start_i){
5943         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5944         return last_non_zero;
5945     }
5946
5947     score_tab[start_i]= 0;
5948     survivor[0]= start_i;
5949     survivor_count= 1;
5950
5951     for(i=start_i; i<=last_non_zero; i++){
5952         int level_index, j;
5953         const int dct_coeff= FFABS(block[ scantable[i] ]);
5954         const int zero_distoration= dct_coeff*dct_coeff;
5955         int best_score=256*256*256*120;
5956         for(level_index=0; level_index < coeff_count[i]; level_index++){
5957             int distoration;
5958             int level= coeff[level_index][i];
5959             const int alevel= FFABS(level);
5960             int unquant_coeff;
5961
5962             assert(level);
5963
5964             if(s->out_format == FMT_H263){
5965                 unquant_coeff= alevel*qmul + qadd;
5966             }else{ //MPEG1
5967                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5968                 if(s->mb_intra){
5969                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5970                         unquant_coeff =   (unquant_coeff - 1) | 1;
5971                 }else{
5972                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5973                         unquant_coeff =   (unquant_coeff - 1) | 1;
5974                 }
5975                 unquant_coeff<<= 3;
5976             }
5977
5978             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5979             level+=64;
5980             if((level&(~127)) == 0){
5981                 for(j=survivor_count-1; j>=0; j--){
5982                     int run= i - survivor[j];
5983                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5984                     score += score_tab[i-run];
5985
5986                     if(score < best_score){
5987                         best_score= score;
5988                         run_tab[i+1]= run;
5989                         level_tab[i+1]= level-64;
5990                     }
5991                 }
5992
5993                 if(s->out_format == FMT_H263){
5994                     for(j=survivor_count-1; j>=0; j--){
5995                         int run= i - survivor[j];
5996                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5997                         score += score_tab[i-run];
5998                         if(score < last_score){
5999                             last_score= score;
6000                             last_run= run;
6001                             last_level= level-64;
6002                             last_i= i+1;
6003                         }
6004                     }
6005                 }
6006             }else{
6007                 distoration += esc_length*lambda;
6008                 for(j=survivor_count-1; j>=0; j--){
6009                     int run= i - survivor[j];
6010                     int score= distoration + score_tab[i-run];
6011
6012                     if(score < best_score){
6013                         best_score= score;
6014                         run_tab[i+1]= run;
6015                         level_tab[i+1]= level-64;
6016                     }
6017                 }
6018
6019                 if(s->out_format == FMT_H263){
6020                   for(j=survivor_count-1; j>=0; j--){
6021                         int run= i - survivor[j];
6022                         int score= distoration + score_tab[i-run];
6023                         if(score < last_score){
6024                             last_score= score;
6025                             last_run= run;
6026                             last_level= level-64;
6027                             last_i= i+1;
6028                         }
6029                     }
6030                 }
6031             }
6032         }
6033
6034         score_tab[i+1]= best_score;
6035
6036         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
6037         if(last_non_zero <= 27){
6038             for(; survivor_count; survivor_count--){
6039                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
6040                     break;
6041             }
6042         }else{
6043             for(; survivor_count; survivor_count--){
6044                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
6045                     break;
6046             }
6047         }
6048
6049         survivor[ survivor_count++ ]= i+1;
6050     }
6051
6052     if(s->out_format != FMT_H263){
6053         last_score= 256*256*256*120;
6054         for(i= survivor[0]; i<=last_non_zero + 1; i++){
6055             int score= score_tab[i];
6056             if(i) score += lambda*2; //FIXME exacter?
6057
6058             if(score < last_score){
6059                 last_score= score;
6060                 last_i= i;
6061                 last_level= level_tab[i];
6062                 last_run= run_tab[i];
6063             }
6064         }
6065     }
6066
6067     s->coded_score[n] = last_score;
6068
6069     dc= FFABS(block[0]);
6070     last_non_zero= last_i - 1;
6071     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
6072
6073     if(last_non_zero < start_i)
6074         return last_non_zero;
6075
6076     if(last_non_zero == 0 && start_i == 0){
6077         int best_level= 0;
6078         int best_score= dc * dc;
6079
6080         for(i=0; i<coeff_count[0]; i++){
6081             int level= coeff[i][0];
6082             int alevel= FFABS(level);
6083             int unquant_coeff, score, distortion;
6084
6085             if(s->out_format == FMT_H263){
6086                     unquant_coeff= (alevel*qmul + qadd)>>3;
6087             }else{ //MPEG1
6088                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
6089                     unquant_coeff =   (unquant_coeff - 1) | 1;
6090             }
6091             unquant_coeff = (unquant_coeff + 4) >> 3;
6092             unquant_coeff<<= 3 + 3;
6093
6094             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
6095             level+=64;
6096             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
6097             else                    score= distortion + esc_length*lambda;
6098
6099             if(score < best_score){
6100                 best_score= score;
6101                 best_level= level - 64;
6102             }
6103         }
6104         block[0]= best_level;
6105         s->coded_score[n] = best_score - dc*dc;
6106         if(best_level == 0) return -1;
6107         else                return last_non_zero;
6108     }
6109
6110     i= last_i;
6111     assert(last_level);
6112
6113     block[ perm_scantable[last_non_zero] ]= last_level;
6114     i -= last_run + 1;
6115
6116     for(; i>start_i; i -= run_tab[i] + 1){
6117         block[ perm_scantable[i-1] ]= level_tab[i];
6118     }
6119
6120     return last_non_zero;
6121 }
6122
6123 //#define REFINE_STATS 1
6124 static int16_t basis[64][64];
6125
6126 static void build_basis(uint8_t *perm){
6127     int i, j, x, y;
6128     emms_c();
6129     for(i=0; i<8; i++){
6130         for(j=0; j<8; j++){
6131             for(y=0; y<8; y++){
6132                 for(x=0; x<8; x++){
6133                     double s= 0.25*(1<<BASIS_SHIFT);
6134                     int index= 8*i + j;
6135                     int perm_index= perm[index];
6136                     if(i==0) s*= sqrt(0.5);
6137                     if(j==0) s*= sqrt(0.5);
6138                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
6139                 }
6140             }
6141         }
6142     }
6143 }
6144
6145 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
6146                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
6147                         int n, int qscale){
6148     int16_t rem[64];
6149     DECLARE_ALIGNED_16(DCTELEM, d1[64]);
6150     const int *qmat;
6151     const uint8_t *scantable= s->intra_scantable.scantable;
6152     const uint8_t *perm_scantable= s->intra_scantable.permutated;
6153 //    unsigned int threshold1, threshold2;
6154 //    int bias=0;
6155     int run_tab[65];
6156     int prev_run=0;
6157     int prev_level=0;
6158     int qmul, qadd, start_i, last_non_zero, i, dc;
6159     uint8_t * length;
6160     uint8_t * last_length;
6161     int lambda;
6162     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
6163 #ifdef REFINE_STATS
6164 static int count=0;
6165 static int after_last=0;
6166 static int to_zero=0;
6167 static int from_zero=0;
6168 static int raise=0;
6169 static int lower=0;
6170 static int messed_sign=0;
6171 #endif
6172
6173     if(basis[0][0] == 0)
6174         build_basis(s->dsp.idct_permutation);
6175
6176     qmul= qscale*2;
6177     qadd= (qscale-1)|1;
6178     if (s->mb_intra) {
6179         if (!s->h263_aic) {
6180             if (n < 4)
6181                 q = s->y_dc_scale;
6182             else
6183                 q = s->c_dc_scale;
6184         } else{
6185             /* For AIC we skip quant/dequant of INTRADC */
6186             q = 1;
6187             qadd=0;
6188         }
6189         q <<= RECON_SHIFT-3;
6190         /* note: block[0] is assumed to be positive */
6191         dc= block[0]*q;
6192 //        block[0] = (block[0] + (q >> 1)) / q;
6193         start_i = 1;
6194         qmat = s->q_intra_matrix[qscale];
6195 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
6196 //            bias= 1<<(QMAT_SHIFT-1);
6197         length     = s->intra_ac_vlc_length;
6198         last_length= s->intra_ac_vlc_last_length;
6199     } else {
6200         dc= 0;
6201         start_i = 0;
6202         qmat = s->q_inter_matrix[qscale];
6203         length     = s->inter_ac_vlc_length;
6204         last_length= s->inter_ac_vlc_last_length;
6205     }
6206     last_non_zero = s->block_last_index[n];
6207
6208 #ifdef REFINE_STATS
6209 {START_TIMER
6210 #endif
6211     dc += (1<<(RECON_SHIFT-1));
6212     for(i=0; i<64; i++){
6213         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
6214     }
6215 #ifdef REFINE_STATS
6216 STOP_TIMER("memset rem[]")}
6217 #endif
6218     sum=0;
6219     for(i=0; i<64; i++){
6220         int one= 36;
6221         int qns=4;
6222         int w;
6223
6224         w= FFABS(weight[i]) + qns*one;
6225         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
6226
6227         weight[i] = w;
6228 //        w=weight[i] = (63*qns + (w/2)) / w;
6229
6230         assert(w>0);
6231         assert(w<(1<<6));
6232         sum += w*w;
6233     }
6234     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
6235 #ifdef REFINE_STATS
6236 {START_TIMER
6237 #endif
6238     run=0;
6239     rle_index=0;
6240     for(i=start_i; i<=last_non_zero; i++){
6241         int j= perm_scantable[i];
6242         const int level= block[j];
6243         int coeff;
6244
6245         if(level){
6246             if(level<0) coeff= qmul*level - qadd;
6247             else        coeff= qmul*level + qadd;
6248             run_tab[rle_index++]=run;
6249             run=0;
6250
6251             s->dsp.add_8x8basis(rem, basis[j], coeff);
6252         }else{
6253             run++;
6254         }
6255     }
6256 #ifdef REFINE_STATS
6257 if(last_non_zero>0){
6258 STOP_TIMER("init rem[]")
6259 }
6260 }
6261
6262 {START_TIMER
6263 #endif
6264     for(;;){
6265         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
6266         int best_coeff=0;
6267         int best_change=0;
6268         int run2, best_unquant_change=0, analyze_gradient;
6269 #ifdef REFINE_STATS
6270 {START_TIMER
6271 #endif
6272         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
6273
6274         if(analyze_gradient){
6275 #ifdef REFINE_STATS
6276 {START_TIMER
6277 #endif
6278             for(i=0; i<64; i++){
6279                 int w= weight[i];
6280
6281                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
6282             }
6283 #ifdef REFINE_STATS
6284 STOP_TIMER("rem*w*w")}
6285 {START_TIMER
6286 #endif
6287             s->dsp.fdct(d1);
6288 #ifdef REFINE_STATS
6289 STOP_TIMER("dct")}
6290 #endif
6291         }
6292
6293         if(start_i){
6294             const int level= block[0];
6295             int change, old_coeff;
6296
6297             assert(s->mb_intra);
6298
6299             old_coeff= q*level;
6300
6301             for(change=-1; change<=1; change+=2){
6302                 int new_level= level + change;
6303                 int score, new_coeff;
6304
6305                 new_coeff= q*new_level;
6306                 if(new_coeff >= 2048 || new_coeff < 0)
6307                     continue;
6308
6309                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
6310                 if(score<best_score){
6311                     best_score= score;
6312                     best_coeff= 0;
6313                     best_change= change;
6314                     best_unquant_change= new_coeff - old_coeff;
6315                 }
6316             }
6317         }
6318
6319         run=0;
6320         rle_index=0;
6321         run2= run_tab[rle_index++];
6322         prev_level=0;
6323         prev_run=0;
6324
6325         for(i=start_i; i<64; i++){
6326             int j= perm_scantable[i];
6327             const int level= block[j];
6328             int change, old_coeff;
6329
6330             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
6331                 break;
6332
6333             if(level){
6334                 if(level<0) old_coeff= qmul*level - qadd;
6335                 else        old_coeff= qmul*level + qadd;
6336                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
6337             }else{
6338                 old_coeff=0;
6339                 run2--;
6340                 assert(run2>=0 || i >= last_non_zero );
6341             }
6342
6343             for(change=-1; change<=1; change+=2){
6344                 int new_level= level + change;
6345                 int score, new_coeff, unquant_change;
6346
6347                 score=0;
6348                 if(s->avctx->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
6349                    continue;
6350
6351                 if(new_level){
6352                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6353                     else            new_coeff= qmul*new_level + qadd;
6354                     if(new_coeff >= 2048 || new_coeff <= -2048)
6355                         continue;
6356                     //FIXME check for overflow
6357
6358                     if(level){
6359                         if(level < 63 && level > -63){
6360                             if(i < last_non_zero)
6361                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6362                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6363                             else
6364                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6365                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6366                         }
6367                     }else{
6368                         assert(FFABS(new_level)==1);
6369
6370                         if(analyze_gradient){
6371                             int g= d1[ scantable[i] ];
6372                             if(g && (g^new_level) >= 0)
6373                                 continue;
6374                         }
6375
6376                         if(i < last_non_zero){
6377                             int next_i= i + run2 + 1;
6378                             int next_level= block[ perm_scantable[next_i] ] + 64;
6379
6380                             if(next_level&(~127))
6381                                 next_level= 0;
6382
6383                             if(next_i < last_non_zero)
6384                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6385                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6386                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6387                             else
6388                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6389                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6390                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6391                         }else{
6392                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6393                             if(prev_level){
6394                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6395                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6396                             }
6397                         }
6398                     }
6399                 }else{
6400                     new_coeff=0;
6401                     assert(FFABS(level)==1);
6402
6403                     if(i < last_non_zero){
6404                         int next_i= i + run2 + 1;
6405                         int next_level= block[ perm_scantable[next_i] ] + 64;
6406
6407                         if(next_level&(~127))
6408                             next_level= 0;
6409
6410                         if(next_i < last_non_zero)
6411                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6412                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6413                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6414                         else
6415                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6416                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6417                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6418                     }else{
6419                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6420                         if(prev_level){
6421                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6422                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6423                         }
6424                     }
6425                 }
6426
6427                 score *= lambda;
6428
6429                 unquant_change= new_coeff - old_coeff;
6430                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6431
6432                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6433                 if(score<best_score){
6434                     best_score= score;
6435                     best_coeff= i;
6436                     best_change= change;
6437                     best_unquant_change= unquant_change;
6438                 }
6439             }
6440             if(level){
6441                 prev_level= level + 64;
6442                 if(prev_level&(~127))
6443                     prev_level= 0;
6444                 prev_run= run;
6445                 run=0;
6446             }else{
6447                 run++;
6448             }
6449         }
6450 #ifdef REFINE_STATS
6451 STOP_TIMER("iterative step")}
6452 #endif
6453
6454         if(best_change){
6455             int j= perm_scantable[ best_coeff ];
6456
6457             block[j] += best_change;
6458
6459             if(best_coeff > last_non_zero){
6460                 last_non_zero= best_coeff;
6461                 assert(block[j]);
6462 #ifdef REFINE_STATS
6463 after_last++;
6464 #endif
6465             }else{
6466 #ifdef REFINE_STATS
6467 if(block[j]){
6468     if(block[j] - best_change){
6469         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
6470             raise++;
6471         }else{
6472             lower++;
6473         }
6474     }else{
6475         from_zero++;
6476     }
6477 }else{
6478     to_zero++;
6479 }
6480 #endif
6481                 for(; last_non_zero>=start_i; last_non_zero--){
6482                     if(block[perm_scantable[last_non_zero]])
6483                         break;
6484                 }
6485             }
6486 #ifdef REFINE_STATS
6487 count++;
6488 if(256*256*256*64 % count == 0){
6489     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6490 }
6491 #endif
6492             run=0;
6493             rle_index=0;
6494             for(i=start_i; i<=last_non_zero; i++){
6495                 int j= perm_scantable[i];
6496                 const int level= block[j];
6497
6498                  if(level){
6499                      run_tab[rle_index++]=run;
6500                      run=0;
6501                  }else{
6502                      run++;
6503                  }
6504             }
6505
6506             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6507         }else{
6508             break;
6509         }
6510     }
6511 #ifdef REFINE_STATS
6512 if(last_non_zero>0){
6513 STOP_TIMER("iterative search")
6514 }
6515 }
6516 #endif
6517
6518     return last_non_zero;
6519 }
6520
6521 static int dct_quantize_c(MpegEncContext *s,
6522                         DCTELEM *block, int n,
6523                         int qscale, int *overflow)
6524 {
6525     int i, j, level, last_non_zero, q, start_i;
6526     const int *qmat;
6527     const uint8_t *scantable= s->intra_scantable.scantable;
6528     int bias;
6529     int max=0;
6530     unsigned int threshold1, threshold2;
6531
6532     s->dsp.fdct (block);
6533
6534     if(s->dct_error_sum)
6535         s->denoise_dct(s, block);
6536
6537     if (s->mb_intra) {
6538         if (!s->h263_aic) {
6539             if (n < 4)
6540                 q = s->y_dc_scale;
6541             else
6542                 q = s->c_dc_scale;
6543             q = q << 3;
6544         } else
6545             /* For AIC we skip quant/dequant of INTRADC */
6546             q = 1 << 3;
6547
6548         /* note: block[0] is assumed to be positive */
6549         block[0] = (block[0] + (q >> 1)) / q;
6550         start_i = 1;
6551         last_non_zero = 0;
6552         qmat = s->q_intra_matrix[qscale];
6553         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6554     } else {
6555         start_i = 0;
6556         last_non_zero = -1;
6557         qmat = s->q_inter_matrix[qscale];
6558         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6559     }
6560     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6561     threshold2= (threshold1<<1);
6562     for(i=63;i>=start_i;i--) {
6563         j = scantable[i];
6564         level = block[j] * qmat[j];
6565
6566         if(((unsigned)(level+threshold1))>threshold2){
6567             last_non_zero = i;
6568             break;
6569         }else{
6570             block[j]=0;
6571         }
6572     }
6573     for(i=start_i; i<=last_non_zero; i++) {
6574         j = scantable[i];
6575         level = block[j] * qmat[j];
6576
6577 //        if(   bias+level >= (1<<QMAT_SHIFT)
6578 //           || bias-level >= (1<<QMAT_SHIFT)){
6579         if(((unsigned)(level+threshold1))>threshold2){
6580             if(level>0){
6581                 level= (bias + level)>>QMAT_SHIFT;
6582                 block[j]= level;
6583             }else{
6584                 level= (bias - level)>>QMAT_SHIFT;
6585                 block[j]= -level;
6586             }
6587             max |=level;
6588         }else{
6589             block[j]=0;
6590         }
6591     }
6592     *overflow= s->max_qcoeff < max; //overflow might have happened
6593
6594     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6595     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6596         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6597
6598     return last_non_zero;
6599 }
6600
6601 #endif //CONFIG_ENCODERS
6602
6603 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6604                                    DCTELEM *block, int n, int qscale)
6605 {
6606     int i, level, nCoeffs;
6607     const uint16_t *quant_matrix;
6608
6609     nCoeffs= s->block_last_index[n];
6610
6611     if (n < 4)
6612         block[0] = block[0] * s->y_dc_scale;
6613     else
6614         block[0] = block[0] * s->c_dc_scale;
6615     /* XXX: only mpeg1 */
6616     quant_matrix = s->intra_matrix;
6617     for(i=1;i<=nCoeffs;i++) {
6618         int j= s->intra_scantable.permutated[i];
6619         level = block[j];
6620         if (level) {
6621             if (level < 0) {
6622                 level = -level;
6623                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6624                 level = (level - 1) | 1;
6625                 level = -level;
6626             } else {
6627                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6628                 level = (level - 1) | 1;
6629             }
6630             block[j] = level;
6631         }
6632     }
6633 }
6634
6635 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6636                                    DCTELEM *block, int n, int qscale)
6637 {
6638     int i, level, nCoeffs;
6639     const uint16_t *quant_matrix;
6640
6641     nCoeffs= s->block_last_index[n];
6642
6643     quant_matrix = s->inter_matrix;
6644     for(i=0; i<=nCoeffs; i++) {
6645         int j= s->intra_scantable.permutated[i];
6646         level = block[j];
6647         if (level) {
6648             if (level < 0) {
6649                 level = -level;
6650                 level = (((level << 1) + 1) * qscale *
6651                          ((int) (quant_matrix[j]))) >> 4;
6652                 level = (level - 1) | 1;
6653                 level = -level;
6654             } else {
6655                 level = (((level << 1) + 1) * qscale *
6656                          ((int) (quant_matrix[j]))) >> 4;
6657                 level = (level - 1) | 1;
6658             }
6659             block[j] = level;
6660         }
6661     }
6662 }
6663
6664 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6665                                    DCTELEM *block, int n, int qscale)
6666 {
6667     int i, level, nCoeffs;
6668     const uint16_t *quant_matrix;
6669
6670     if(s->alternate_scan) nCoeffs= 63;
6671     else nCoeffs= s->block_last_index[n];
6672
6673     if (n < 4)
6674         block[0] = block[0] * s->y_dc_scale;
6675     else
6676         block[0] = block[0] * s->c_dc_scale;
6677     quant_matrix = s->intra_matrix;
6678     for(i=1;i<=nCoeffs;i++) {
6679         int j= s->intra_scantable.permutated[i];
6680         level = block[j];
6681         if (level) {
6682             if (level < 0) {
6683                 level = -level;
6684                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6685                 level = -level;
6686             } else {
6687                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6688             }
6689             block[j] = level;
6690         }
6691     }
6692 }
6693
6694 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
6695                                    DCTELEM *block, int n, int qscale)
6696 {
6697     int i, level, nCoeffs;
6698     const uint16_t *quant_matrix;
6699     int sum=-1;
6700
6701     if(s->alternate_scan) nCoeffs= 63;
6702     else nCoeffs= s->block_last_index[n];
6703
6704     if (n < 4)
6705         block[0] = block[0] * s->y_dc_scale;
6706     else
6707         block[0] = block[0] * s->c_dc_scale;
6708     quant_matrix = s->intra_matrix;
6709     for(i=1;i<=nCoeffs;i++) {
6710         int j= s->intra_scantable.permutated[i];
6711         level = block[j];
6712         if (level) {
6713             if (level < 0) {
6714                 level = -level;
6715                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6716                 level = -level;
6717             } else {
6718                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6719             }
6720             block[j] = level;
6721             sum+=level;
6722         }
6723     }
6724     block[63]^=sum&1;
6725 }
6726
6727 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6728                                    DCTELEM *block, int n, int qscale)
6729 {
6730     int i, level, nCoeffs;
6731     const uint16_t *quant_matrix;
6732     int sum=-1;
6733
6734     if(s->alternate_scan) nCoeffs= 63;
6735     else nCoeffs= s->block_last_index[n];
6736
6737     quant_matrix = s->inter_matrix;
6738     for(i=0; i<=nCoeffs; i++) {
6739         int j= s->intra_scantable.permutated[i];
6740         level = block[j];
6741         if (level) {
6742             if (level < 0) {
6743                 level = -level;
6744                 level = (((level << 1) + 1) * qscale *
6745                          ((int) (quant_matrix[j]))) >> 4;
6746                 level = -level;
6747             } else {
6748                 level = (((level << 1) + 1) * qscale *
6749                          ((int) (quant_matrix[j]))) >> 4;
6750             }
6751             block[j] = level;
6752             sum+=level;
6753         }
6754     }
6755     block[63]^=sum&1;
6756 }
6757
6758 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6759                                   DCTELEM *block, int n, int qscale)
6760 {
6761     int i, level, qmul, qadd;
6762     int nCoeffs;
6763
6764     assert(s->block_last_index[n]>=0);
6765
6766     qmul = qscale << 1;
6767
6768     if (!s->h263_aic) {
6769         if (n < 4)
6770             block[0] = block[0] * s->y_dc_scale;
6771         else
6772             block[0] = block[0] * s->c_dc_scale;
6773         qadd = (qscale - 1) | 1;
6774     }else{
6775         qadd = 0;
6776     }
6777     if(s->ac_pred)
6778         nCoeffs=63;
6779     else
6780         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6781
6782     for(i=1; i<=nCoeffs; i++) {
6783         level = block[i];
6784         if (level) {
6785             if (level < 0) {
6786                 level = level * qmul - qadd;
6787             } else {
6788                 level = level * qmul + qadd;
6789             }
6790             block[i] = level;
6791         }
6792     }
6793 }
6794
6795 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6796                                   DCTELEM *block, int n, int qscale)
6797 {
6798     int i, level, qmul, qadd;
6799     int nCoeffs;
6800
6801     assert(s->block_last_index[n]>=0);
6802
6803     qadd = (qscale - 1) | 1;
6804     qmul = qscale << 1;
6805
6806     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6807
6808     for(i=0; i<=nCoeffs; i++) {
6809         level = block[i];
6810         if (level) {
6811             if (level < 0) {
6812                 level = level * qmul - qadd;
6813             } else {
6814                 level = level * qmul + qadd;
6815             }
6816             block[i] = level;
6817         }
6818     }
6819 }
6820
6821 #ifdef CONFIG_ENCODERS
6822 AVCodec h263_encoder = {
6823     "h263",
6824     CODEC_TYPE_VIDEO,
6825     CODEC_ID_H263,
6826     sizeof(MpegEncContext),
6827     MPV_encode_init,
6828     MPV_encode_picture,
6829     MPV_encode_end,
6830     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6831 };
6832
6833 AVCodec h263p_encoder = {
6834     "h263p",
6835     CODEC_TYPE_VIDEO,
6836     CODEC_ID_H263P,
6837     sizeof(MpegEncContext),
6838     MPV_encode_init,
6839     MPV_encode_picture,
6840     MPV_encode_end,
6841     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6842 };
6843
6844 AVCodec flv_encoder = {
6845     "flv",
6846     CODEC_TYPE_VIDEO,
6847     CODEC_ID_FLV1,
6848     sizeof(MpegEncContext),
6849     MPV_encode_init,
6850     MPV_encode_picture,
6851     MPV_encode_end,
6852     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6853 };
6854
6855 AVCodec rv10_encoder = {
6856     "rv10",
6857     CODEC_TYPE_VIDEO,
6858     CODEC_ID_RV10,
6859     sizeof(MpegEncContext),
6860     MPV_encode_init,
6861     MPV_encode_picture,
6862     MPV_encode_end,
6863     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6864 };
6865
6866 AVCodec rv20_encoder = {
6867     "rv20",
6868     CODEC_TYPE_VIDEO,
6869     CODEC_ID_RV20,
6870     sizeof(MpegEncContext),
6871     MPV_encode_init,
6872     MPV_encode_picture,
6873     MPV_encode_end,
6874     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6875 };
6876
6877 AVCodec mpeg4_encoder = {
6878     "mpeg4",
6879     CODEC_TYPE_VIDEO,
6880     CODEC_ID_MPEG4,
6881     sizeof(MpegEncContext),
6882     MPV_encode_init,
6883     MPV_encode_picture,
6884     MPV_encode_end,
6885     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6886     .capabilities= CODEC_CAP_DELAY,
6887 };
6888
6889 AVCodec msmpeg4v1_encoder = {
6890     "msmpeg4v1",
6891     CODEC_TYPE_VIDEO,
6892     CODEC_ID_MSMPEG4V1,
6893     sizeof(MpegEncContext),
6894     MPV_encode_init,
6895     MPV_encode_picture,
6896     MPV_encode_end,
6897     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6898 };
6899
6900 AVCodec msmpeg4v2_encoder = {
6901     "msmpeg4v2",
6902     CODEC_TYPE_VIDEO,
6903     CODEC_ID_MSMPEG4V2,
6904     sizeof(MpegEncContext),
6905     MPV_encode_init,
6906     MPV_encode_picture,
6907     MPV_encode_end,
6908     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6909 };
6910
6911 AVCodec msmpeg4v3_encoder = {
6912     "msmpeg4",
6913     CODEC_TYPE_VIDEO,
6914     CODEC_ID_MSMPEG4V3,
6915     sizeof(MpegEncContext),
6916     MPV_encode_init,
6917     MPV_encode_picture,
6918     MPV_encode_end,
6919     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6920 };
6921
6922 AVCodec wmv1_encoder = {
6923     "wmv1",
6924     CODEC_TYPE_VIDEO,
6925     CODEC_ID_WMV1,
6926     sizeof(MpegEncContext),
6927     MPV_encode_init,
6928     MPV_encode_picture,
6929     MPV_encode_end,
6930     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6931 };
6932
6933 #endif //CONFIG_ENCODERS